From dbb27b70c11e6cd3965f4bddb57164c4f98bfb3a Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 27 Jun 2024 11:30:23 +0200 Subject: [PATCH 01/23] Add predict_params into Mapie regression files without adding any unit test --- mapie/estimator/estimator.py | 31 ++++++++++++++++------ mapie/estimator/interface.py | 6 ++++- mapie/regression/quantile_regression.py | 6 ++--- mapie/regression/regression.py | 20 ++++++++++---- mapie/regression/time_series_regression.py | 6 +++-- mapie/tests/test_regression.py | 2 +- 6 files changed, 51 insertions(+), 20 deletions(-) diff --git a/mapie/estimator/estimator.py b/mapie/estimator/estimator.py index b8c7d4ecf..e446cae87 100644 --- a/mapie/estimator/estimator.py +++ b/mapie/estimator/estimator.py @@ -233,6 +233,7 @@ def _predict_oof_estimator( estimator: RegressorMixin, X: ArrayLike, val_index: ArrayLike, + **predict_params ) -> Tuple[NDArray, ArrayLike]: """ Perform predictions on a single out-of-fold model on a validation set. @@ -248,6 +249,9 @@ def _predict_oof_estimator( val_index: ArrayLike of shape (n_samples_val) Validation data indices. + **predict_params : dict + Additional predict parameters. + Returns ------- Tuple[NDArray, ArrayLike] @@ -255,7 +259,7 @@ def _predict_oof_estimator( """ X_val = _safe_indexing(X, val_index) if _num_samples(X_val) > 0: - y_pred = estimator.predict(X_val) + y_pred = estimator.predict(X_val, **predict_params) else: y_pred = np.array([]) return y_pred, val_index @@ -306,7 +310,7 @@ def _aggregate_with_mask( else: raise ValueError("The value of self.agg_function is not correct") - def _pred_multi(self, X: ArrayLike) -> NDArray: + def _pred_multi(self, X: ArrayLike, **predict_params) -> NDArray: """ Return a prediction per train sample for each test sample, by aggregation with matrix ``k_``. @@ -316,12 +320,15 @@ def _pred_multi(self, X: ArrayLike) -> NDArray: X: ArrayLike of shape (n_samples_test, n_features) Input data + **predict_params : dict + Additional predict parameters. + Returns ------- NDArray of shape (n_samples_test, n_samples_train) """ y_pred_multi = np.column_stack( - [e.predict(X) for e in self.estimators_] + [e.predict(X, **predict_params) for e in self.estimators_] ) # At this point, y_pred_multi is of shape # (n_samples_test, n_estimators_). The method @@ -334,7 +341,8 @@ def predict_calib( self, X: ArrayLike, y: Optional[ArrayLike] = None, - groups: Optional[ArrayLike] = None + groups: Optional[ArrayLike] = None, + **predict_params ) -> NDArray: """ Perform predictions on X : the calibration set. @@ -355,6 +363,9 @@ def predict_calib( By default ``None``. + **predict_params : dict + Additional predict parameters. + Returns ------- NDArray of shape (n_samples_test, 1) @@ -371,7 +382,7 @@ def predict_calib( cv = cast(BaseCrossValidator, self.cv) outputs = Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( delayed(self._predict_oof_estimator)( - estimator, X, calib_index, + estimator, X, calib_index, **predict_params ) for (_, calib_index), estimator in zip( cv.split(X, y, groups), @@ -497,7 +508,8 @@ def predict( self, X: ArrayLike, ensemble: bool = False, - return_multi_pred: bool = True + return_multi_pred: bool = True, + **predict_params, ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample @@ -525,6 +537,9 @@ def predict( predictions (3 arrays). If ``False`` the method return the simple predictions only. + **predict_params : dict + Additional predict parameters. + Returns ------- Tuple[NDArray, NDArray, NDArray] @@ -534,7 +549,7 @@ def predict( """ check_is_fitted(self, self.fit_attributes) - y_pred = self.single_estimator_.predict(X) + y_pred = self.single_estimator_.predict(X, **predict_params) if not return_multi_pred and not ensemble: return y_pred @@ -542,7 +557,7 @@ def predict( y_pred_multi_low = y_pred[:, np.newaxis] y_pred_multi_up = y_pred[:, np.newaxis] else: - y_pred_multi = self._pred_multi(X) + y_pred_multi = self._pred_multi(X, **predict_params) if self.method == "minmax": y_pred_multi_low = np.min(y_pred_multi, axis=1, keepdims=True) diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py index 3e76377f1..d6d122cc6 100644 --- a/mapie/estimator/interface.py +++ b/mapie/estimator/interface.py @@ -62,7 +62,8 @@ def predict( self, X: ArrayLike, ensemble: bool = False, - return_multi_pred: bool = True + return_multi_pred: bool = True, + **predict_params, ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample @@ -90,6 +91,9 @@ def predict( predictions (3 arrays). If ``False`` the method return the simple predictions only. + **predict_params : dict + Additional predict parameters. + Returns ------- Tuple[NDArray, NDArray, NDArray] diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index 2635b0267..63cf3032f 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import Iterable, List, Optional, Tuple, Union, cast +from typing import Iterable, List, Optional, Tuple, Union, cast, Any import numpy as np from sklearn.base import RegressorMixin, clone @@ -547,7 +547,6 @@ def fit( The model itself. """ self.cv = self._check_cv(cast(str, self.cv)) - # Initialization self.estimators_: List[RegressorMixin] = [] if self.cv == "prefit": @@ -649,6 +648,7 @@ def predict( optimize_beta: bool = False, allow_infinite_bounds: bool = False, symmetry: Optional[bool] = True, + **predict_params: Any, ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Predict target on new samples with confidence intervals. @@ -699,7 +699,7 @@ def predict( dtype=float, ) for i, est in enumerate(self.estimators_): - y_preds[i] = est.predict(X) + y_preds[i] = est.predict(X, **predict_params) check_lower_upper_bounds(y_preds[0], y_preds[1], y_preds[2]) if symmetry: quantile = np.full( diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index d589e56f7..6bc13e226 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import Iterable, Optional, Tuple, Union, cast +from typing import Iterable, Optional, Tuple, Union, cast, Any import numpy as np from sklearn.base import BaseEstimator, RegressorMixin @@ -469,7 +469,7 @@ def fit( y: ArrayLike, sample_weight: Optional[ArrayLike] = None, groups: Optional[ArrayLike] = None, - **fit_params, + **kwargs: Any, ) -> MapieRegressor: """ Fit estimator and compute conformity scores used for @@ -502,14 +502,19 @@ def fit( train/test set. By default ``None``. - **fit_params : dict + fit_params : dict Additional fit parameters. + predict_params : dict + Additional predict parameters. + Returns ------- MapieRegressor The model itself. """ + fit_params = kwargs.pop('fit_params', {}) + predict_params = kwargs.pop('predict_params', {}) # Checks (estimator, self.conformity_score_function_, @@ -536,7 +541,8 @@ def fit( ) # Predict on calibration data - y_pred = self.estimator_.predict_calib(X, y=y, groups=groups) + y_pred = self.estimator_.predict_calib(X, y=y, groups=groups, + **predict_params) # Compute the conformity scores (manage jk-ab case) self.conformity_scores_ = \ @@ -553,6 +559,7 @@ def predict( alpha: Optional[Union[float, Iterable[float]]] = None, optimize_beta: bool = False, allow_infinite_bounds: bool = False, + **predict_params ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Predict target on new samples with confidence intervals. @@ -602,6 +609,9 @@ def predict( By default ``False``. + **predict_params : dict + Additional predict parameters. + Returns ------- Union[NDArray, Tuple[NDArray, NDArray]] @@ -619,7 +629,7 @@ def predict( # If alpha is None, predict the target without confidence intervals if alpha is None: y_pred = self.estimator_.predict( - X, ensemble, return_multi_pred=False + X, ensemble, return_multi_pred=False, **predict_params ) return np.array(y_pred) diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py index b4bf0cc03..00bb09758 100644 --- a/mapie/regression/time_series_regression.py +++ b/mapie/regression/time_series_regression.py @@ -405,6 +405,7 @@ def predict( alpha: Optional[Union[float, Iterable[float]]] = None, optimize_beta: bool = False, allow_infinite_bounds: bool = False, + **predict_params, ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Predict target on new samples with confidence intervals. @@ -450,7 +451,8 @@ def predict( """ if alpha is None: super().predict( - X, ensemble=ensemble, alpha=alpha, optimize_beta=optimize_beta + X, ensemble=ensemble, alpha=alpha, optimize_beta=optimize_beta, + **predict_params ) if self.method == "aci": @@ -458,7 +460,7 @@ def predict( return super().predict( X, ensemble=ensemble, alpha=alpha, optimize_beta=optimize_beta, - allow_infinite_bounds=allow_infinite_bounds + allow_infinite_bounds=allow_infinite_bounds, **predict_params ) def _more_tags(self): diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index fb86658d0..ed7f14133 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -867,7 +867,7 @@ def early_stopping_monitor(i, est, locals): else: return False - mapie.fit(X, y, monitor=early_stopping_monitor) + mapie.fit(X, y, fit_params={'monitor': early_stopping_monitor}) assert mapie.estimator_.single_estimator_.estimators_.shape[0] == 3 From a19c1156a6698568522de80ab30c6dcd07ac1f17 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Tue, 2 Jul 2024 14:24:16 +0200 Subject: [PATCH 02/23] Adding unit tests --- mapie/regression/quantile_regression.py | 5 +- mapie/regression/regression.py | 32 +++- mapie/regression/time_series_regression.py | 3 + mapie/tests/test_regression.py | 177 ++++++++++++++++++++- 4 files changed, 209 insertions(+), 8 deletions(-) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index 63cf3032f..74d1a11c3 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import Iterable, List, Optional, Tuple, Union, cast, Any +from typing import Any, Iterable, List, Optional, Tuple, Union, cast import numpy as np from sklearn.base import RegressorMixin, clone @@ -676,6 +676,9 @@ def predict( each residuals separatly or to use the maximum of the two combined. + **predict_params : dict + Additional predict parameters. + Returns ------- Union[NDArray, Tuple[NDArray, NDArray]] diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 6bc13e226..aae883718 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import Iterable, Optional, Tuple, Union, cast, Any +from typing import Any, Iterable, Optional, Tuple, Union, cast import numpy as np from sklearn.base import BaseEstimator, RegressorMixin @@ -228,6 +228,7 @@ def __init__( verbose: int = 0, conformity_score: Optional[ConformityScore] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, + predict_params: Optional[bool] = False ) -> None: self.estimator = estimator self.method = method @@ -238,6 +239,7 @@ def __init__( self.verbose = verbose self.conformity_score = conformity_score self.random_state = random_state + self.predict_params = predict_params def _check_parameters(self) -> None: """ @@ -502,11 +504,8 @@ def fit( train/test set. By default ``None``. - fit_params : dict - Additional fit parameters. - - predict_params : dict - Additional predict parameters. + kwargs : dict + Additional ft and parameters. Returns ------- @@ -515,6 +514,9 @@ def fit( """ fit_params = kwargs.pop('fit_params', {}) predict_params = kwargs.pop('predict_params', {}) + + if len(predict_params) > 0: + self.predict_params = True # Checks (estimator, self.conformity_score_function_, @@ -621,6 +623,24 @@ def predict( - [:, 0, :]: Lower bound of the prediction interval. - [:, 1, :]: Upper bound of the prediction interval. """ + + if self.predict_params is True: + warnings.warn( + f"Be careful that predict_params: '{predict_params}' " + "is used in fit method", + UserWarning + ) + + elif (len(predict_params) > 0 and + self.predict_params is False and + self.cv != "prefit"): + raise ValueError( + f"Using 'predict_param' '{predict_params}' " + f"without having used it in the fit method. " + f"Please ensure '{predict_params}' " + f"is used in the fit method before calling predict." + ) + # Checks check_is_fitted(self, self.fit_attributes) self._check_ensemble(ensemble) diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py index 00bb09758..bf6212800 100644 --- a/mapie/regression/time_series_regression.py +++ b/mapie/regression/time_series_regression.py @@ -440,6 +440,9 @@ def predict( allow_infinite_bounds: bool Allow infinite prediction intervals to be produced. + **predict_params : dict + Additional predict parameters. + Returns ------- Union[NDArray, Tuple[NDArray, NDArray]] diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index ed7f14133..1541ba9ea 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import pytest +from scipy.stats import ttest_1samp from sklearn.compose import ColumnTransformer from sklearn.datasets import make_regression from sklearn.dummy import DummyRegressor @@ -18,7 +19,6 @@ from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import OneHotEncoder from sklearn.utils.validation import check_is_fitted -from scipy.stats import ttest_1samp from typing_extensions import TypedDict from mapie._typing import NDArray @@ -41,6 +41,64 @@ random_state = 1 + +class CustomGradientBoostingRegressor(GradientBoostingRegressor): + def __init__(self, + loss='squared_error', + learning_rate=0.1, + n_estimators=100, + subsample=1.0, + criterion='friedman_mse', + min_samples_split=2, + min_samples_leaf=1, + min_weight_fraction_leaf=0.0, + max_depth=3, + min_impurity_decrease=0.0, + init=None, + random_state=None, + max_features=None, + alpha=0.9, + verbose=0, + max_leaf_nodes=None, + warm_start=False, + validation_fraction=0.1, + n_iter_no_change=None, + tol=0.0001, + ccp_alpha=0.0): + + super().__init__( + loss=loss, + learning_rate=learning_rate, + n_estimators=n_estimators, + subsample=subsample, + criterion=criterion, + min_samples_split=min_samples_split, + min_samples_leaf=min_samples_leaf, + min_weight_fraction_leaf=min_weight_fraction_leaf, + max_depth=max_depth, + min_impurity_decrease=min_impurity_decrease, + init=init, + random_state=random_state, + max_features=max_features, + alpha=alpha, + verbose=verbose, + max_leaf_nodes=max_leaf_nodes, + warm_start=warm_start, + validation_fraction=validation_fraction, + n_iter_no_change=n_iter_no_change, + tol=tol, + ccp_alpha=ccp_alpha + ) + + def fit(self, X, y, **kwargs): + return super().fit(X, y, **kwargs) + + def predict(self, X, check_predict_params=False): + if check_predict_params: + return np.zeros(X.shape[0]) + return super().predict(X) + + Params = TypedDict( "Params", { @@ -875,6 +933,123 @@ def early_stopping_monitor(i, est, locals): assert estimator.estimators_.shape[0] == 3 +def test_predict_parameters_passing() -> None: + """ + Test passing predict parameters. + Checks that y_pred from train are 0 and y_pred from test are 0 + """ + + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) + + X_train, X_test, y_train, y_test = ( + train_test_split(X, y, test_size=0.2, random_state=random_state)) + + mapie_1 = MapieRegressor(estimator=custom_gbr) + + mapie_2 = MapieRegressor(estimator=custom_gbr) + + predict_params = {'check_predict_params': True} + + mapie_1 = mapie_1.fit(X_train, y_train, + predict_params=predict_params) + + np.testing.assert_allclose(mapie_1.conformity_scores_, np.abs(y_train)) + + mapie_2 = mapie_2.fit(X_train, y_train) + + y_pred_1 = mapie_1.predict(X_test, **predict_params) + + np.testing.assert_allclose(y_pred_1, 0) + + y_pred_2 = mapie_2.predict(X_test) + + with np.testing.assert_raises(AssertionError): + np.testing.assert_array_equal(y_pred_1, y_pred_2) + + +def test_fit_and_predict_parameters_passing() -> None: + """ + Test passing fit parameters and predict parameters. + For fit : checks that underlying GradientBoosting + estimators have used 3 iterations only during boosting, + instead of default value for n_estimators (=100). + For predict : Checks that y_pred from train are 0 + and y_pred from test are 0. + """ + def early_stopping_monitor(i, est, locals): + """Returns True on the 3rd iteration.""" + if i == 2: + return True + else: + return False + + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) + + X_train, X_test, y_train, y_test = ( + train_test_split(X, y, test_size=0.2, random_state=random_state)) + + score = AbsoluteConformityScore(sym=True) + + mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) + + mapie_2 = MapieRegressor(estimator=custom_gbr) + + fit_params = {'monitor': early_stopping_monitor} + + predict_params = {'check_predict_params': True} + + mapie_1 = mapie_1.fit(X_train, y_train, + fit_params=fit_params, + predict_params=predict_params) + + mapie_2 = mapie_2.fit(X_train, y_train) + + assert mapie_1.estimator_.single_estimator_.estimators_.shape[0] == 3 + + for estimator in mapie_1.estimator_.estimators_: + assert estimator.estimators_.shape[0] == 3 + + assert (mapie_2.estimator_.single_estimator_.n_estimators == + custom_gbr.n_estimators) + + for estimator in mapie_2.estimator_.estimators_: + assert estimator.n_estimators == custom_gbr.n_estimators + + np.testing.assert_array_equal(mapie_1.conformity_scores_, np.abs(y_train)) + + y_pred_1 = mapie_1.predict(X_test, **predict_params) + + np.testing.assert_allclose(y_pred_1, 0) + + y_pred_2 = mapie_2.predict(X_test) + + with np.testing.assert_raises(AssertionError): + np.testing.assert_array_equal(y_pred_1, y_pred_2) + + +def test_invalid_predict_parameters() -> None: + """Test that invalid predict_parameters raise errors.""" + + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) + + X_train, X_test, y_train, y_test = ( + train_test_split(X, y, test_size=0.2, random_state=random_state)) + + mapie = MapieRegressor(estimator=custom_gbr) + + predict_params = {'check_predict_params': True} + + mapie_fitted = mapie.fit(X_train, y_train) + + with pytest.raises(ValueError, match=( + fr".*Using 'predict_param' '{predict_params}'" + r".*without having used it in the fit method\..*" + fr"Please ensure '{predict_params}'" + r".*is used in the fit method before calling predict\..*" + )): + mapie_fitted.predict(X_test, **predict_params) + + def test_predict_infinite_intervals() -> None: """Test that MapieRegressor produces infinite bounds with alpha=0""" mapie_reg = MapieRegressor().fit(X, y) From 306f3be19ac54d3386b374e0af1aa0d5635cc199 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Tue, 2 Jul 2024 14:48:11 +0200 Subject: [PATCH 03/23] Update History.rst --- HISTORY.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 31da81500..59135547a 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -5,6 +5,8 @@ History 0.8.x (2024-xx-xx) ------------------ +* Add `**predict_params` attributes into `MapieRegressor` and linked classes +* Change incoherent sign on C_k in the Kolmogorov-Smirnov statistical test documentation * Building a training set with a fraction between 0 and 1 with `n_samples` attribute when using `split` method from `Subsample` class. 0.8.6 (2024-06-14) From 9317271be8706f8f73749f88b85a295bfb355d29 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Tue, 2 Jul 2024 17:31:37 +0200 Subject: [PATCH 04/23] Fix type-check --- mapie/estimator/classifier.py | 2 +- mapie/estimator/interface.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py index 16df810e2..a97495319 100644 --- a/mapie/estimator/classifier.py +++ b/mapie/estimator/classifier.py @@ -448,7 +448,7 @@ def predict( self, X: ArrayLike, agg_scores: Optional[str] = None, - **predict_params + **predict_params, ) -> NDArray: """ Predict target from X. It also computes the prediction per train sample diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py index e798273b7..fdb67d618 100644 --- a/mapie/estimator/interface.py +++ b/mapie/estimator/interface.py @@ -32,8 +32,6 @@ def fit( def predict( self, X: ArrayLike, - ensemble: bool = False, - return_multi_pred: bool = True, **kwargs, ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ From a28d2bfadf19d0f3898d6a12b0d9d1f903d3f25c Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 3 Jul 2024 12:02:29 +0200 Subject: [PATCH 05/23] Update : take remarks into account --- mapie/estimator/interface.py | 26 ++++------------------ mapie/estimator/regressor.py | 4 ++-- mapie/regression/quantile_regression.py | 6 ++--- mapie/regression/regression.py | 6 ++--- mapie/regression/time_series_regression.py | 4 ++-- mapie/tests/test_regression.py | 23 +++++++------------ 6 files changed, 22 insertions(+), 47 deletions(-) diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py index fdb67d618..4b5abab8f 100644 --- a/mapie/estimator/interface.py +++ b/mapie/estimator/interface.py @@ -32,7 +32,7 @@ def fit( def predict( self, X: ArrayLike, - **kwargs, + **kwargs ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample @@ -43,30 +43,12 @@ def predict( X: ArrayLike of shape (n_samples, n_features) Test data. - ensemble: bool - Boolean determining whether the predictions are ensembled or not. - If ``False``, predictions are those of the model trained on the - whole training set. - If ``True``, predictions from perturbed models are aggregated by - the aggregation function specified in the ``agg_function`` - attribute. - - If ``cv`` is ``"prefit"`` or ``"split"``, ``ensemble`` is ignored. - - By default ``False``. - - return_multi_pred: bool - If ``True`` the method returns the predictions and the multiple - predictions (3 arrays). If ``False`` the method return the - simple predictions only. - **kwargs : dict - Additional parameters. + Additional fit and predict parameters. Returns ------- - Tuple[NDArray, NDArray, NDArray] + Tuple[NDArray, NDArray] - Predictions - - The multiple predictions for the lower bound of the intervals. - - The multiple predictions for the upper bound of the intervals. + - Predictions sets """ diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py index 91dce5011..a200586c6 100644 --- a/mapie/estimator/regressor.py +++ b/mapie/estimator/regressor.py @@ -415,7 +415,7 @@ def fit( y: ArrayLike, sample_weight: Optional[ArrayLike] = None, groups: Optional[ArrayLike] = None, - **fit_params, + **fit_params ) -> EnsembleRegressor: """ Fit the base estimator under the ``single_estimator_`` attribute. @@ -509,7 +509,7 @@ def predict( X: ArrayLike, ensemble: bool = False, return_multi_pred: bool = True, - **predict_params, + **predict_params ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: """ Predict target from X. It also computes the prediction per train sample diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index 74d1a11c3..e66f1939f 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -1,7 +1,7 @@ from __future__ import annotations import warnings -from typing import Any, Iterable, List, Optional, Tuple, Union, cast +from typing import Iterable, List, Optional, Tuple, Union, cast import numpy as np from sklearn.base import RegressorMixin, clone @@ -648,7 +648,7 @@ def predict( optimize_beta: bool = False, allow_infinite_bounds: bool = False, symmetry: Optional[bool] = True, - **predict_params: Any, + **predict_params, ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Predict target on new samples with confidence intervals. @@ -676,7 +676,7 @@ def predict( each residuals separatly or to use the maximum of the two combined. - **predict_params : dict + predict_params : dict Additional predict parameters. Returns diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 09756b9a0..190832190 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -469,7 +469,7 @@ def fit( y: ArrayLike, sample_weight: Optional[ArrayLike] = None, groups: Optional[ArrayLike] = None, - **kwargs: Any, + **kwargs: Any ) -> MapieRegressor: """ Fit estimator and compute conformity scores used for @@ -503,7 +503,7 @@ def fit( By default ``None``. kwargs : dict - Additional ft and parameters. + Additional fit and predict parameters. Returns ------- @@ -609,7 +609,7 @@ def predict( By default ``False``. - **predict_params : dict + predict_params : dict Additional predict parameters. Returns diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py index bf6212800..f70e2b0e6 100644 --- a/mapie/regression/time_series_regression.py +++ b/mapie/regression/time_series_regression.py @@ -405,7 +405,7 @@ def predict( alpha: Optional[Union[float, Iterable[float]]] = None, optimize_beta: bool = False, allow_infinite_bounds: bool = False, - **predict_params, + **predict_params ) -> Union[NDArray, Tuple[NDArray, NDArray]]: """ Predict target on new samples with confidence intervals. @@ -440,7 +440,7 @@ def predict( allow_infinite_bounds: bool Allow infinite prediction intervals to be produced. - **predict_params : dict + predict_params : dict Additional predict parameters. Returns diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index c59ee3ff4..d2e6f0599 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -942,27 +942,20 @@ def test_predict_parameters_passing() -> None: custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) X_train, X_test, y_train, y_test = ( - train_test_split(X, y, test_size=0.2, random_state=random_state)) - + train_test_split(X, y, test_size=0.2, random_state=random_state) + ) + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) mapie_1 = MapieRegressor(estimator=custom_gbr) - mapie_2 = MapieRegressor(estimator=custom_gbr) - predict_params = {'check_predict_params': True} - - mapie_1 = mapie_1.fit(X_train, y_train, - predict_params=predict_params) - - np.testing.assert_allclose(mapie_1.conformity_scores_, np.abs(y_train)) - + mapie_1 = mapie_1.fit( + X_train, y_train, predict_params=predict_params + ) mapie_2 = mapie_2.fit(X_train, y_train) - y_pred_1 = mapie_1.predict(X_test, **predict_params) - - np.testing.assert_allclose(y_pred_1, 0) - y_pred_2 = mapie_2.predict(X_test) - + np.testing.assert_allclose(y_pred_1, 0) + np.testing.assert_allclose(mapie_1.conformity_scores_, np.abs(y_train)) with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(y_pred_1, y_pred_2) From a495462bafda6f18ad2f4dc96655e0491a237206 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 3 Jul 2024 16:04:35 +0200 Subject: [PATCH 06/23] Update : take remarks into account v2 --- mapie/regression/regression.py | 18 +++++---- mapie/tests/test_regression.py | 74 +++------------------------------- 2 files changed, 15 insertions(+), 77 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 190832190..094c8554f 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -228,7 +228,6 @@ def __init__( verbose: int = 0, conformity_score: Optional[ConformityScore] = None, random_state: Optional[Union[int, np.random.RandomState]] = None, - predict_params: Optional[bool] = False ) -> None: self.estimator = estimator self.method = method @@ -239,7 +238,6 @@ def __init__( self.verbose = verbose self.conformity_score = conformity_score self.random_state = random_state - self.predict_params = predict_params def _check_parameters(self) -> None: """ @@ -514,7 +512,10 @@ def fit( predict_params = kwargs.pop('predict_params', {}) if len(predict_params) > 0: - self.predict_params = True + self._predict_params = predict_params + else: + self._predict_params = {} + # Checks (estimator, self.conformity_score_function_, @@ -622,15 +623,16 @@ def predict( - [:, 1, :]: Upper bound of the prediction interval. """ - if self.predict_params is True: + if hasattr(self, '_predict_params') and len(self._predict_params) > 0: + predict_params = self._predict_params warnings.warn( - f"Be careful that predict_params: '{predict_params}' " - "is used in fit method", + f"Using predict_params: '{predict_params}' " + "from the fit method in the predict method by default", UserWarning ) - elif (len(predict_params) > 0 and - self.predict_params is False and + elif (len(predict_params) > 0 and hasattr(self, '_predict_params') and + len(self._predict_params) == 0 and self.cv != "prefit"): raise ValueError( f"Using 'predict_param' '{predict_params}' " diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index d2e6f0599..6f48a6821 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -43,52 +43,8 @@ class CustomGradientBoostingRegressor(GradientBoostingRegressor): - def __init__(self, - loss='squared_error', - learning_rate=0.1, - n_estimators=100, - subsample=1.0, - criterion='friedman_mse', - min_samples_split=2, - min_samples_leaf=1, - min_weight_fraction_leaf=0.0, - max_depth=3, - min_impurity_decrease=0.0, - init=None, - random_state=None, - max_features=None, - alpha=0.9, - verbose=0, - max_leaf_nodes=None, - warm_start=False, - validation_fraction=0.1, - n_iter_no_change=None, - tol=0.0001, - ccp_alpha=0.0): - - super().__init__( - loss=loss, - learning_rate=learning_rate, - n_estimators=n_estimators, - subsample=subsample, - criterion=criterion, - min_samples_split=min_samples_split, - min_samples_leaf=min_samples_leaf, - min_weight_fraction_leaf=min_weight_fraction_leaf, - max_depth=max_depth, - min_impurity_decrease=min_impurity_decrease, - init=init, - random_state=random_state, - max_features=max_features, - alpha=alpha, - verbose=verbose, - max_leaf_nodes=max_leaf_nodes, - warm_start=warm_start, - validation_fraction=validation_fraction, - n_iter_no_change=n_iter_no_change, - tol=tol, - ccp_alpha=ccp_alpha - ) + def __init__(self, **kwargs): + super().__init__(**kwargs) def fit(self, X, y, **kwargs): return super().fit(X, y, **kwargs) @@ -976,46 +932,30 @@ def early_stopping_monitor(i, est, locals): else: return False - custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) - X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state)) - + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) score = AbsoluteConformityScore(sym=True) - mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) - mapie_2 = MapieRegressor(estimator=custom_gbr) - fit_params = {'monitor': early_stopping_monitor} - predict_params = {'check_predict_params': True} - mapie_1 = mapie_1.fit(X_train, y_train, fit_params=fit_params, predict_params=predict_params) - mapie_2 = mapie_2.fit(X_train, y_train) + y_pred_1 = mapie_1.predict(X_test, **predict_params) + y_pred_2 = mapie_2.predict(X_test) assert mapie_1.estimator_.single_estimator_.estimators_.shape[0] == 3 - for estimator in mapie_1.estimator_.estimators_: assert estimator.estimators_.shape[0] == 3 - assert (mapie_2.estimator_.single_estimator_.n_estimators == custom_gbr.n_estimators) - for estimator in mapie_2.estimator_.estimators_: assert estimator.n_estimators == custom_gbr.n_estimators - np.testing.assert_array_equal(mapie_1.conformity_scores_, np.abs(y_train)) - - y_pred_1 = mapie_1.predict(X_test, **predict_params) - np.testing.assert_allclose(y_pred_1, 0) - - y_pred_2 = mapie_2.predict(X_test) - with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(y_pred_1, y_pred_2) @@ -1024,14 +964,10 @@ def test_invalid_predict_parameters() -> None: """Test that invalid predict_parameters raise errors.""" custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) - X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state)) - mapie = MapieRegressor(estimator=custom_gbr) - predict_params = {'check_predict_params': True} - mapie_fitted = mapie.fit(X_train, y_train) with pytest.raises(ValueError, match=( From 43ed079abb321f42edafc539f0bcbb0d1209d369 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 3 Jul 2024 16:14:53 +0200 Subject: [PATCH 07/23] run isort --- mapie/regression/regression.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 094c8554f..577122552 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -13,12 +13,12 @@ from mapie._typing import ArrayLike, NDArray from mapie.conformity_scores import ConformityScore, ResidualNormalisedScore -from mapie.estimator.regressor import EnsembleRegressor -from mapie.utils import (check_alpha, check_alpha_and_n_samples, - check_cv, check_estimator_fit_predict, - check_n_features_in, check_n_jobs, check_null_weight, - check_verbose, get_effective_calibration_samples) from mapie.conformity_scores.checks import check_conformity_score +from mapie.estimator.regressor import EnsembleRegressor +from mapie.utils import (check_alpha, check_alpha_and_n_samples, check_cv, + check_estimator_fit_predict, check_n_features_in, + check_n_jobs, check_null_weight, check_verbose, + get_effective_calibration_samples) class MapieRegressor(BaseEstimator, RegressorMixin): From 18b38665e5b7528a2a081bdaa56b36493d839541 Mon Sep 17 00:00:00 2001 From: BaptisteCalot <115455912+BaptisteCalot@users.noreply.github.com> Date: Wed, 3 Jul 2024 16:39:06 +0200 Subject: [PATCH 08/23] Update mapie/regression/quantile_regression.py Co-authored-by: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> --- mapie/regression/quantile_regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index e66f1939f..e30646ab3 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -547,6 +547,7 @@ def fit( The model itself. """ self.cv = self._check_cv(cast(str, self.cv)) + # Initialization self.estimators_: List[RegressorMixin] = [] if self.cv == "prefit": From dbf244f8a37e3d3fb01d0fd4c2b76691bdbccc9e Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 3 Jul 2024 17:27:47 +0200 Subject: [PATCH 09/23] Update tests --- mapie/regression/regression.py | 18 +++++------------- mapie/tests/test_regression.py | 8 ++++---- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 577122552..49c355a5e 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -623,21 +623,13 @@ def predict( - [:, 1, :]: Upper bound of the prediction interval. """ - if hasattr(self, '_predict_params') and len(self._predict_params) > 0: - predict_params = self._predict_params - warnings.warn( - f"Using predict_params: '{predict_params}' " - "from the fit method in the predict method by default", - UserWarning - ) - - elif (len(predict_params) > 0 and hasattr(self, '_predict_params') and - len(self._predict_params) == 0 and - self.cv != "prefit"): + if (len(predict_params) > 0 and hasattr(self, '_predict_params') and + len(self._predict_params) == 0 and + self.cv != "prefit"): raise ValueError( f"Using 'predict_param' '{predict_params}' " - f"without having used it in the fit method. " - f"Please ensure '{predict_params}' " + f"without using one 'predict_param' in the fit method. " + f"Please ensure one 'predict_param' " f"is used in the fit method before calling predict." ) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 6f48a6821..b61db77dc 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -971,10 +971,10 @@ def test_invalid_predict_parameters() -> None: mapie_fitted = mapie.fit(X_train, y_train) with pytest.raises(ValueError, match=( - fr".*Using 'predict_param' '{predict_params}'" - r".*without having used it in the fit method\..*" - fr"Please ensure '{predict_params}'" - r".*is used in the fit method before calling predict\..*" + fr".*Using 'predict_param' '{predict_params}' " + r"without using one 'predict_param' in the fit method\..*" + r"Please ensure one 'predict_param' " + r"is used in the fit method before calling predict\..*" )): mapie_fitted.predict(X_test, **predict_params) From bbf21b02ebce25aaf56668e884de74b8369489ec Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 4 Jul 2024 11:45:16 +0200 Subject: [PATCH 10/23] Update : change self._predict params --- mapie/regression/regression.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 49c355a5e..986910d8a 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -512,9 +512,9 @@ def fit( predict_params = kwargs.pop('predict_params', {}) if len(predict_params) > 0: - self._predict_params = predict_params + self._predict_params = True else: - self._predict_params = {} + self._predict_params = False # Checks (estimator, @@ -624,7 +624,7 @@ def predict( """ if (len(predict_params) > 0 and hasattr(self, '_predict_params') and - len(self._predict_params) == 0 and + self._predict_params is False and self.cv != "prefit"): raise ValueError( f"Using 'predict_param' '{predict_params}' " From dd28ae86c3a71c5a4902c9836845badfa5a2277e Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 4 Jul 2024 18:20:02 +0200 Subject: [PATCH 11/23] Update : Incorporating PR comments --- mapie/regression/regression.py | 2 +- mapie/tests/test_regression.py | 40 ++++++++++++++++++++++++++++------ 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 986910d8a..22f106fe5 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -623,7 +623,7 @@ def predict( - [:, 1, :]: Upper bound of the prediction interval. """ - if (len(predict_params) > 0 and hasattr(self, '_predict_params') and + if (len(predict_params) > 0 and self._predict_params is False and self.cv != "prefit"): raise ValueError( diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index b61db77dc..1e960f6e5 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -916,14 +916,12 @@ def test_predict_parameters_passing() -> None: np.testing.assert_array_equal(y_pred_1, y_pred_2) -def test_fit_and_predict_parameters_passing() -> None: +def test_fit_parameters_passing_with_predict_parameter() -> None: """ - Test passing fit parameters and predict parameters. - For fit : checks that underlying GradientBoosting + Test passing fit parameters with predict parameters into the model. + Checks that underlying GradientBoosting estimators have used 3 iterations only during boosting, instead of default value for n_estimators (=100). - For predict : Checks that y_pred from train are 0 - and y_pred from test are 0. """ def early_stopping_monitor(i, est, locals): """Returns True on the 3rd iteration.""" @@ -944,8 +942,6 @@ def early_stopping_monitor(i, est, locals): fit_params=fit_params, predict_params=predict_params) mapie_2 = mapie_2.fit(X_train, y_train) - y_pred_1 = mapie_1.predict(X_test, **predict_params) - y_pred_2 = mapie_2.predict(X_test) assert mapie_1.estimator_.single_estimator_.estimators_.shape[0] == 3 for estimator in mapie_1.estimator_.estimators_: @@ -954,6 +950,36 @@ def early_stopping_monitor(i, est, locals): custom_gbr.n_estimators) for estimator in mapie_2.estimator_.estimators_: assert estimator.n_estimators == custom_gbr.n_estimators + + +def test_predict_parameters_passing_with_fit_parameter() -> None: + """ + Test passing predict parameters with fit parameters into the model. + Checks that y_pred from train are 0 + and y_pred from test are 0. + """ + def early_stopping_monitor(i, est, locals): + """Returns True on the 3rd iteration.""" + if i == 2: + return True + else: + return False + + X_train, X_test, y_train, y_test = ( + train_test_split(X, y, test_size=0.2, random_state=random_state)) + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) + score = AbsoluteConformityScore(sym=True) + mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) + mapie_2 = MapieRegressor(estimator=custom_gbr) + fit_params = {'monitor': early_stopping_monitor} + predict_params = {'check_predict_params': True} + mapie_1 = mapie_1.fit(X_train, y_train, + fit_params=fit_params, + predict_params=predict_params) + mapie_2 = mapie_2.fit(X_train, y_train) + y_pred_1 = mapie_1.predict(X_test, **predict_params) + y_pred_2 = mapie_2.predict(X_test) + np.testing.assert_array_equal(mapie_1.conformity_scores_, np.abs(y_train)) np.testing.assert_allclose(y_pred_1, 0) with np.testing.assert_raises(AssertionError): From 964fd5e2a0fb9138e0caed1b84547900ee357039 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 5 Jul 2024 18:06:05 +0200 Subject: [PATCH 12/23] Update : tests --- mapie/tests/test_regression.py | 76 ++++++++++++++-------------------- 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 1e960f6e5..f9248893c 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -55,6 +55,14 @@ def predict(self, X, check_predict_params=False): return super().predict(X) +def early_stopping_monitor(i, est, locals): + """Returns True on the 3rd iteration.""" + if i == 2: + return True + else: + return False + + Params = TypedDict( "Params", { @@ -871,20 +879,10 @@ def test_fit_parameters_passing() -> None: only during boosting, instead of default value for n_estimators (=100). """ gb = GradientBoostingRegressor(random_state=random_state) - mapie = MapieRegressor(estimator=gb, random_state=random_state) - - def early_stopping_monitor(i, est, locals): - """Returns True on the 3rd iteration.""" - if i == 2: - return True - else: - return False - mapie.fit(X, y, fit_params={'monitor': early_stopping_monitor}) assert mapie.estimator_.single_estimator_.estimators_.shape[0] == 3 - for estimator in mapie.estimator_.estimators_: assert estimator.estimators_.shape[0] == 3 @@ -892,17 +890,17 @@ def early_stopping_monitor(i, est, locals): def test_predict_parameters_passing() -> None: """ Test passing predict parameters. - Checks that y_pred from train are 0 and y_pred from test are 0 + Checks that y_pred from train are 0, y_pred from test are 0 and + we check that y_pred constructed with or without predict_params + are different """ - - custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) - X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state) ) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) - mapie_1 = MapieRegressor(estimator=custom_gbr) - mapie_2 = MapieRegressor(estimator=custom_gbr) + score = AbsoluteConformityScore(sym=True) + mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) + mapie_2 = MapieRegressor(estimator=custom_gbr, conformity_score=score) predict_params = {'check_predict_params': True} mapie_1 = mapie_1.fit( X_train, y_train, predict_params=predict_params @@ -910,38 +908,31 @@ def test_predict_parameters_passing() -> None: mapie_2 = mapie_2.fit(X_train, y_train) y_pred_1 = mapie_1.predict(X_test, **predict_params) y_pred_2 = mapie_2.predict(X_test) - np.testing.assert_allclose(y_pred_1, 0) np.testing.assert_allclose(mapie_1.conformity_scores_, np.abs(y_train)) + np.testing.assert_allclose(y_pred_1, 0) with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(y_pred_1, y_pred_2) -def test_fit_parameters_passing_with_predict_parameter() -> None: +def test_fit_params_expected_behavior_unaffected_by_predict_params() -> None: """ - Test passing fit parameters with predict parameters into the model. + We want to verify that there are no interferences + with predict_params on the expected behavior of fit_params Checks that underlying GradientBoosting estimators have used 3 iterations only during boosting, instead of default value for n_estimators (=100). """ - def early_stopping_monitor(i, est, locals): - """Returns True on the 3rd iteration.""" - if i == 2: - return True - else: - return False - X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state)) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) - score = AbsoluteConformityScore(sym=True) - mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) + mapie_1 = MapieRegressor(estimator=custom_gbr) mapie_2 = MapieRegressor(estimator=custom_gbr) fit_params = {'monitor': early_stopping_monitor} predict_params = {'check_predict_params': True} mapie_1 = mapie_1.fit(X_train, y_train, fit_params=fit_params, predict_params=predict_params) - mapie_2 = mapie_2.fit(X_train, y_train) + mapie_2 = mapie_2.fit(X_train, y_train, predict_params=predict_params) assert mapie_1.estimator_.single_estimator_.estimators_.shape[0] == 3 for estimator in mapie_1.estimator_.estimators_: @@ -952,43 +943,40 @@ def early_stopping_monitor(i, est, locals): assert estimator.n_estimators == custom_gbr.n_estimators -def test_predict_parameters_passing_with_fit_parameter() -> None: +def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: """ - Test passing predict parameters with fit parameters into the model. - Checks that y_pred from train are 0 - and y_pred from test are 0. + We want to verify that there are no interferences + with fit_params on the expected behavior of predict_params + Checks that the predictions on the training and test sets + are 0 for the model with predict_params and that this is not + the case for the model without predict_params """ - def early_stopping_monitor(i, est, locals): - """Returns True on the 3rd iteration.""" - if i == 2: - return True - else: - return False - X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state)) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) score = AbsoluteConformityScore(sym=True) mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) - mapie_2 = MapieRegressor(estimator=custom_gbr) + mapie_2 = MapieRegressor(estimator=custom_gbr, conformity_score=score) fit_params = {'monitor': early_stopping_monitor} predict_params = {'check_predict_params': True} mapie_1 = mapie_1.fit(X_train, y_train, fit_params=fit_params, predict_params=predict_params) - mapie_2 = mapie_2.fit(X_train, y_train) + mapie_2 = mapie_2.fit(X_train, y_train, fit_params=fit_params,) y_pred_1 = mapie_1.predict(X_test, **predict_params) y_pred_2 = mapie_2.predict(X_test) - np.testing.assert_array_equal(mapie_1.conformity_scores_, np.abs(y_train)) + np.testing.assert_array_equal(mapie_1.conformity_scores_, + np.abs(y_train)) np.testing.assert_allclose(y_pred_1, 0) with np.testing.assert_raises(AssertionError): + np.testing.assert_array_equal(mapie_2.conformity_scores_, + np.abs(y_train)) np.testing.assert_array_equal(y_pred_1, y_pred_2) def test_invalid_predict_parameters() -> None: """Test that invalid predict_parameters raise errors.""" - custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state)) From 3cdf6fe0a1679d1e182702ebe687a658e2003af5 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 5 Jul 2024 18:21:27 +0200 Subject: [PATCH 13/23] Fix : coverage --- mapie/tests/test_regression.py | 1 + 1 file changed, 1 insertion(+) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index f9248893c..930823ec8 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -972,6 +972,7 @@ def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(mapie_2.conformity_scores_, np.abs(y_train)) + with np.testing.assert_raises(AssertionError): np.testing.assert_array_equal(y_pred_1, y_pred_2) From ab5c6e8110386010f9dc2713fee8d34231452ec4 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 10 Jul 2024 16:29:18 +0200 Subject: [PATCH 14/23] Update : add function in utils --- mapie/regression/regression.py | 15 ++++----------- mapie/utils.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 22f106fe5..3baf04b8f 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -18,7 +18,8 @@ from mapie.utils import (check_alpha, check_alpha_and_n_samples, check_cv, check_estimator_fit_predict, check_n_features_in, check_n_jobs, check_null_weight, check_verbose, - get_effective_calibration_samples) + get_effective_calibration_samples, + check_predict_params) class MapieRegressor(BaseEstimator, RegressorMixin): @@ -623,17 +624,9 @@ def predict( - [:, 1, :]: Upper bound of the prediction interval. """ - if (len(predict_params) > 0 and - self._predict_params is False and - self.cv != "prefit"): - raise ValueError( - f"Using 'predict_param' '{predict_params}' " - f"without using one 'predict_param' in the fit method. " - f"Please ensure one 'predict_param' " - f"is used in the fit method before calling predict." - ) - # Checks + if hasattr(self, '_predict_params'): + check_predict_params(self._predict_params, predict_params, self.cv) check_is_fitted(self, self.fit_attributes) self._check_ensemble(ensemble) alpha = cast(Optional[NDArray], check_alpha(alpha)) diff --git a/mapie/utils.py b/mapie/utils.py index 13641b154..86cd51a82 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -1373,3 +1373,19 @@ def check_n_samples( " int in the range [1, inf)" ) return int(n_samples) + + +def check_predict_params( + predict_params_used_in_fit: bool, + predict_params: dict, + cv: Optional[Union[int, str, BaseCrossValidator]] = None +) -> None: + if (len(predict_params) > 0 and + predict_params_used_in_fit is False and + cv != "prefit"): + raise ValueError( + f"Using 'predict_param' '{predict_params}' " + f"without using one 'predict_param' in the fit method. " + f"Please ensure one 'predict_param' " + f"is used in the fit method before calling predict." + ) From 7ce4c85975011d410021330d6c55fde0d4cf025e Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:16:24 +0200 Subject: [PATCH 15/23] UPD: Apply suggestions from code review --- mapie/regression/regression.py | 6 +++--- mapie/tests/test_regression.py | 24 +++++++++++++++--------- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 3baf04b8f..8ff861211 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -543,8 +543,9 @@ def fit( ) # Predict on calibration data - y_pred = self.estimator_.predict_calib(X, y=y, groups=groups, - **predict_params) + y_pred = self.estimator_.predict_calib( + X, y=y, groups=groups, **predict_params + ) # Compute the conformity scores (manage jk-ab case) self.conformity_scores_ = \ @@ -623,7 +624,6 @@ def predict( - [:, 0, :]: Lower bound of the prediction interval. - [:, 1, :]: Upper bound of the prediction interval. """ - # Checks if hasattr(self, '_predict_params'): check_predict_params(self._predict_params, predict_params, self.cv) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 930823ec8..3462f5a58 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -923,15 +923,17 @@ def test_fit_params_expected_behavior_unaffected_by_predict_params() -> None: instead of default value for n_estimators (=100). """ X_train, X_test, y_train, y_test = ( - train_test_split(X, y, test_size=0.2, random_state=random_state)) + train_test_split(X, y, test_size=0.2, random_state=random_state) + ) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) mapie_1 = MapieRegressor(estimator=custom_gbr) mapie_2 = MapieRegressor(estimator=custom_gbr) fit_params = {'monitor': early_stopping_monitor} predict_params = {'check_predict_params': True} - mapie_1 = mapie_1.fit(X_train, y_train, - fit_params=fit_params, - predict_params=predict_params) + mapie_1 = mapie_1.fit( + X_train, y_train, + fit_params=fit_params, predict_params=predict_params + ) mapie_2 = mapie_2.fit(X_train, y_train, predict_params=predict_params) assert mapie_1.estimator_.single_estimator_.estimators_.shape[0] == 3 @@ -952,16 +954,19 @@ def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: the case for the model without predict_params """ X_train, X_test, y_train, y_test = ( - train_test_split(X, y, test_size=0.2, random_state=random_state)) + train_test_split(X, y, test_size=0.2, random_state=random_state) + ) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) score = AbsoluteConformityScore(sym=True) mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) mapie_2 = MapieRegressor(estimator=custom_gbr, conformity_score=score) fit_params = {'monitor': early_stopping_monitor} predict_params = {'check_predict_params': True} - mapie_1 = mapie_1.fit(X_train, y_train, - fit_params=fit_params, - predict_params=predict_params) + mapie_1 = mapie_1.fit( + X_train, y_train, + fit_params=fit_params, + predict_params=predict_params + ) mapie_2 = mapie_2.fit(X_train, y_train, fit_params=fit_params,) y_pred_1 = mapie_1.predict(X_test, **predict_params) y_pred_2 = mapie_2.predict(X_test) @@ -980,7 +985,8 @@ def test_invalid_predict_parameters() -> None: """Test that invalid predict_parameters raise errors.""" custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) X_train, X_test, y_train, y_test = ( - train_test_split(X, y, test_size=0.2, random_state=random_state)) + train_test_split(X, y, test_size=0.2, random_state=random_state) + ) mapie = MapieRegressor(estimator=custom_gbr) predict_params = {'check_predict_params': True} mapie_fitted = mapie.fit(X_train, y_train) From c4af59f4d4e95279029eb1a3cd14184f4944fc96 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Mon, 15 Jul 2024 14:17:42 +0200 Subject: [PATCH 16/23] UPD: remove doctring --- mapie/estimator/interface.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py index 4b5abab8f..e015d4d7c 100644 --- a/mapie/estimator/interface.py +++ b/mapie/estimator/interface.py @@ -37,18 +37,4 @@ def predict( """ Predict target from X. It also computes the prediction per train sample for each test sample according to ``self.method``. - - Parameters - ---------- - X: ArrayLike of shape (n_samples, n_features) - Test data. - - **kwargs : dict - Additional fit and predict parameters. - - Returns - ------- - Tuple[NDArray, NDArray] - - Predictions - - Predictions sets """ From 8f058a3bd1db7855ba659d4da3bff18917380b47 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Mon, 15 Jul 2024 16:04:38 +0200 Subject: [PATCH 17/23] Add check_predict_params() docstring --- mapie/utils.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/mapie/utils.py b/mapie/utils.py index 86cd51a82..806927dc2 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -1380,6 +1380,24 @@ def check_predict_params( predict_params: dict, cv: Optional[Union[int, str, BaseCrossValidator]] = None ) -> None: + """ + Check that if predict_params is used in the predict method, + it is also used in the fit method. Otherwise, raise an error." + + Parameters + ---------- + predict_params_used_in_fit: bool + True or False. It is True if one or more predict_params + are used in the fit method + + predict_param: dict. Contains all predict params used in predict method + + Raises + ------ + ValueError + "If any predict_params are used in the predict method but none + are used in the fit method." + """ if (len(predict_params) > 0 and predict_params_used_in_fit is False and cv != "prefit"): From 76018ada27d260fc8bc747153104816998932023 Mon Sep 17 00:00:00 2001 From: Thibault Cordier <124613154+thibaultcordier@users.noreply.github.com> Date: Mon, 15 Jul 2024 17:22:00 +0200 Subject: [PATCH 18/23] UPD: Apply suggestions from code review --- mapie/utils.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mapie/utils.py b/mapie/utils.py index 806927dc2..34d077695 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -1382,21 +1382,21 @@ def check_predict_params( ) -> None: """ Check that if predict_params is used in the predict method, - it is also used in the fit method. Otherwise, raise an error." + it is also used in the fit method. Otherwise, raise an error. Parameters ---------- predict_params_used_in_fit: bool - True or False. It is True if one or more predict_params - are used in the fit method + True if one or more predict_params are used in the fit method - predict_param: dict. Contains all predict params used in predict method + predict_param: dict + Contains all predict params used in predict method Raises ------ ValueError - "If any predict_params are used in the predict method but none - are used in the fit method." + If any predict_params are used in the predict method but none + are used in the fit method. """ if (len(predict_params) > 0 and predict_params_used_in_fit is False and From 41efb83bb8334d77c3b2e1de0a125553b168e377 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Mon, 15 Jul 2024 17:37:35 +0200 Subject: [PATCH 19/23] Update : History --- HISTORY.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/HISTORY.rst b/HISTORY.rst index b88fc99dc..26ad2df7f 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -5,6 +5,7 @@ History 0.8.x (2024-xx-xx) ------------------ +* Add `** predict_params` in fit and predict method for Mapie Regression * Building unit tests for different `Subsample` and `BlockBooststrap` instances * Change the sign of C_k in the `Kolmogorov-Smirnov` test documentation * Building a training set with a fraction between 0 and 1 with `n_samples` attribute when using `split` method from `Subsample` class. From 14267a2a35b8fe815b973781b356fcfb4f9e2a3b Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Wed, 24 Jul 2024 16:23:59 +0200 Subject: [PATCH 20/23] Add : Taking comments into account --- mapie/tests/test_regression.py | 51 ++++++++++------------------------ 1 file changed, 15 insertions(+), 36 deletions(-) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 3462f5a58..0f9382130 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -890,28 +890,21 @@ def test_fit_parameters_passing() -> None: def test_predict_parameters_passing() -> None: """ Test passing predict parameters. - Checks that y_pred from train are 0, y_pred from test are 0 and - we check that y_pred constructed with or without predict_params - are different + Checks that y_pred from train are 0, y_pred from test are 0. """ X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state) ) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) score = AbsoluteConformityScore(sym=True) - mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) - mapie_2 = MapieRegressor(estimator=custom_gbr, conformity_score=score) + mapie_model = MapieRegressor(estimator=custom_gbr, conformity_score=score) predict_params = {'check_predict_params': True} - mapie_1 = mapie_1.fit( + mapie_model = mapie_model.fit( X_train, y_train, predict_params=predict_params ) - mapie_2 = mapie_2.fit(X_train, y_train) - y_pred_1 = mapie_1.predict(X_test, **predict_params) - y_pred_2 = mapie_2.predict(X_test) - np.testing.assert_allclose(mapie_1.conformity_scores_, np.abs(y_train)) - np.testing.assert_allclose(y_pred_1, 0) - with np.testing.assert_raises(AssertionError): - np.testing.assert_array_equal(y_pred_1, y_pred_2) + y_pred = mapie_model.predict(X_test, **predict_params) + np.testing.assert_allclose(mapie_model.conformity_scores_, np.abs(y_train)) + np.testing.assert_allclose(y_pred, 0) def test_fit_params_expected_behavior_unaffected_by_predict_params() -> None: @@ -926,23 +919,17 @@ def test_fit_params_expected_behavior_unaffected_by_predict_params() -> None: train_test_split(X, y, test_size=0.2, random_state=random_state) ) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) - mapie_1 = MapieRegressor(estimator=custom_gbr) - mapie_2 = MapieRegressor(estimator=custom_gbr) + mapie_model = MapieRegressor(estimator=custom_gbr) fit_params = {'monitor': early_stopping_monitor} predict_params = {'check_predict_params': True} - mapie_1 = mapie_1.fit( + mapie_model = mapie_model.fit( X_train, y_train, fit_params=fit_params, predict_params=predict_params ) - mapie_2 = mapie_2.fit(X_train, y_train, predict_params=predict_params) - assert mapie_1.estimator_.single_estimator_.estimators_.shape[0] == 3 - for estimator in mapie_1.estimator_.estimators_: + assert mapie_model.estimator_.single_estimator_.estimators_.shape[0] == 3 + for estimator in mapie_model.estimator_.estimators_: assert estimator.estimators_.shape[0] == 3 - assert (mapie_2.estimator_.single_estimator_.n_estimators == - custom_gbr.n_estimators) - for estimator in mapie_2.estimator_.estimators_: - assert estimator.n_estimators == custom_gbr.n_estimators def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: @@ -958,27 +945,19 @@ def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: ) custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) score = AbsoluteConformityScore(sym=True) - mapie_1 = MapieRegressor(estimator=custom_gbr, conformity_score=score) - mapie_2 = MapieRegressor(estimator=custom_gbr, conformity_score=score) + mapie_model = MapieRegressor(estimator=custom_gbr, conformity_score=score) fit_params = {'monitor': early_stopping_monitor} predict_params = {'check_predict_params': True} - mapie_1 = mapie_1.fit( + mapie_model = mapie_model.fit( X_train, y_train, fit_params=fit_params, predict_params=predict_params ) - mapie_2 = mapie_2.fit(X_train, y_train, fit_params=fit_params,) - y_pred_1 = mapie_1.predict(X_test, **predict_params) - y_pred_2 = mapie_2.predict(X_test) + y_pred = mapie_model.predict(X_test, **predict_params) - np.testing.assert_array_equal(mapie_1.conformity_scores_, + np.testing.assert_array_equal(mapie_model.conformity_scores_, np.abs(y_train)) - np.testing.assert_allclose(y_pred_1, 0) - with np.testing.assert_raises(AssertionError): - np.testing.assert_array_equal(mapie_2.conformity_scores_, - np.abs(y_train)) - with np.testing.assert_raises(AssertionError): - np.testing.assert_array_equal(y_pred_1, y_pred_2) + np.testing.assert_allclose(y_pred, 0) def test_invalid_predict_parameters() -> None: From 20a881ebd36a93b9354c49c2b7595c53a73860e2 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 25 Jul 2024 10:26:36 +0200 Subject: [PATCH 21/23] Change : name of unit test and its documentation --- mapie/regression/regression.py | 1 - mapie/tests/test_regression.py | 5 +++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 6d97481e8..aa6656e81 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -515,7 +515,6 @@ def fit( """ fit_params = kwargs.pop('fit_params', {}) predict_params = kwargs.pop('predict_params', {}) - if len(predict_params) > 0: self._predict_params = True else: diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index f6b02013c..9bc5bfa36 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -965,8 +965,9 @@ def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: np.testing.assert_allclose(y_pred, 0) -def test_invalid_predict_parameters() -> None: - """Test that invalid predict_parameters raise errors.""" +def test_using_one_predict_parameter_into_predict_but_not_in_fit() -> None: + """Test that using predict parameters in the predict method + without using one predict_parameter in the fit method raises an error""" custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state) From b2d03b10f64a21638d21c9dd3104f067d0c9961b Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Fri, 26 Jul 2024 14:57:35 +0200 Subject: [PATCH 22/23] Add : new raise value error and linked unit test --- mapie/tests/test_regression.py | 21 +++++++++++++++++++++ mapie/utils.py | 24 +++++++++++++++--------- 2 files changed, 36 insertions(+), 9 deletions(-) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 9bc5bfa36..9aae449f2 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -985,6 +985,27 @@ def test_using_one_predict_parameter_into_predict_but_not_in_fit() -> None: mapie_fitted.predict(X_test, **predict_params) +def test_using_one_predict_parameter_into_fit_but_not_in_predict() -> None: + """Test that using predict parameters in the fit method + without using one predict_parameter in + the predict method raises an error""" + custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) + X_train, X_test, y_train, y_test = ( + train_test_split(X, y, test_size=0.2, random_state=random_state) + ) + mapie = MapieRegressor(estimator=custom_gbr) + predict_params = {'check_predict_params': True} + mapie_fitted = mapie.fit(X_train, y_train, predict_params=predict_params) + + with pytest.raises(ValueError, match=( + r"Using one 'predict_param' in the fit method " + r"without using one 'predict_param' in the predict method. " + r"Please ensure one 'predict_param' " + r"is used in the predict method before calling it." + )): + mapie_fitted.predict(X_test) + + def test_predict_infinite_intervals() -> None: """Test that MapieRegressor produces infinite bounds with alpha=0""" mapie_reg = MapieRegressor().fit(X, y) diff --git a/mapie/utils.py b/mapie/utils.py index 34d077695..224e5b05d 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -1398,12 +1398,18 @@ def check_predict_params( If any predict_params are used in the predict method but none are used in the fit method. """ - if (len(predict_params) > 0 and - predict_params_used_in_fit is False and - cv != "prefit"): - raise ValueError( - f"Using 'predict_param' '{predict_params}' " - f"without using one 'predict_param' in the fit method. " - f"Please ensure one 'predict_param' " - f"is used in the fit method before calling predict." - ) + if cv != "prefit": + if len(predict_params) > 0 and predict_params_used_in_fit is False: + raise ValueError( + f"Using 'predict_param' '{predict_params}' " + f"without using one 'predict_param' in the fit method. " + f"Please ensure one 'predict_param' " + f"is used in the fit method before calling predict." + ) + if len(predict_params) == 0 and predict_params_used_in_fit is True: + raise ValueError( + "Using one 'predict_param' in the fit method " + "without using one 'predict_param' in the predict method. " + "Please ensure one 'predict_param' " + "is used in the predict method before calling it." + ) From d2bc12ff2b75983a5fabfdd1cad591904b9cc3f6 Mon Sep 17 00:00:00 2001 From: Baptiste Calot Date: Thu, 1 Aug 2024 18:17:17 +0200 Subject: [PATCH 23/23] Update : Taking into account PR comments --- mapie/tests/test_regression.py | 30 +++++++++++++++++++----------- mapie/utils.py | 8 ++++---- 2 files changed, 23 insertions(+), 15 deletions(-) diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 9aae449f2..80e578556 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -966,8 +966,10 @@ def test_predict_params_expected_behavior_unaffected_by_fit_params() -> None: def test_using_one_predict_parameter_into_predict_but_not_in_fit() -> None: - """Test that using predict parameters in the predict method - without using one predict_parameter in the fit method raises an error""" + """ + Test that using predict parameters in the predict method + without using predict_parameter in the fit method raises an error. + """ custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state) @@ -979,16 +981,18 @@ def test_using_one_predict_parameter_into_predict_but_not_in_fit() -> None: with pytest.raises(ValueError, match=( fr".*Using 'predict_param' '{predict_params}' " r"without using one 'predict_param' in the fit method\..*" - r"Please ensure one 'predict_param' " - r"is used in the fit method before calling predict\..*" + r"Please ensure a similar configuration of 'predict_param' " + r"is used in the fit method before calling it in predict\..*" )): mapie_fitted.predict(X_test, **predict_params) def test_using_one_predict_parameter_into_fit_but_not_in_predict() -> None: - """Test that using predict parameters in the fit method - without using one predict_parameter in - the predict method raises an error""" + """ + Test that using predict parameters in the fit method + without using predict_parameter in + the predict method raises an error. + """ custom_gbr = CustomGradientBoostingRegressor(random_state=random_state) X_train, X_test, y_train, y_test = ( train_test_split(X, y, test_size=0.2, random_state=random_state) @@ -1000,14 +1004,16 @@ def test_using_one_predict_parameter_into_fit_but_not_in_predict() -> None: with pytest.raises(ValueError, match=( r"Using one 'predict_param' in the fit method " r"without using one 'predict_param' in the predict method. " - r"Please ensure one 'predict_param' " - r"is used in the predict method before calling it." + r"Please ensure a similar configuration of 'predict_param' " + r"is used in the predict method as called in the fit." )): mapie_fitted.predict(X_test) def test_predict_infinite_intervals() -> None: - """Test that MapieRegressor produces infinite bounds with alpha=0""" + """ + Test that MapieRegressor produces infinite bounds with alpha=0 + """ mapie_reg = MapieRegressor().fit(X, y) _, y_pis = mapie_reg.predict(X, alpha=0., allow_infinite_bounds=True) np.testing.assert_allclose(y_pis[:, 0, 0], -np.inf) @@ -1017,7 +1023,9 @@ def test_predict_infinite_intervals() -> None: @pytest.mark.parametrize("method", ["minmax", "naive", "plus", "base"]) @pytest.mark.parametrize("cv", ["split", "prefit"]) def test_check_change_method_to_base(method: str, cv: str) -> None: - """Test of the overloading of method attribute to `base` method in fit""" + """ + Test of the overloading of method attribute to `base` method in fit + """ X_train, X_val, y_train, y_val = train_test_split( X, y, test_size=0.5, random_state=random_state diff --git a/mapie/utils.py b/mapie/utils.py index 224e5b05d..fa781edb5 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -1403,13 +1403,13 @@ def check_predict_params( raise ValueError( f"Using 'predict_param' '{predict_params}' " f"without using one 'predict_param' in the fit method. " - f"Please ensure one 'predict_param' " - f"is used in the fit method before calling predict." + f"Please ensure a similar configuration of 'predict_param' " + f"is used in the fit method before calling it in predict." ) if len(predict_params) == 0 and predict_params_used_in_fit is True: raise ValueError( "Using one 'predict_param' in the fit method " "without using one 'predict_param' in the predict method. " - "Please ensure one 'predict_param' " - "is used in the predict method before calling it." + "Please ensure a similar configuration of 'predict_param' " + "is used in the predict method as called in the fit." )