Skip to content

Commit

Permalink
GH-6779: Add custom metric to leaderboard [nocheck] (#15568)
Browse files Browse the repository at this point in the history
* Add custom metric to leaderboard

* Verify sorting by custom metric

* Make sure the custom metrics share the same names

* Fix custom metric calculation in CV (#15576)

* GH-15565: Add custom metric to automl [nocheck] (#15577)

* Fix custom metric calculation in CV (prototype)

* Allow custom_metric_func in automl

* Return NaN for models that don't support custom metric

* Revert change used to ensure existing metric

* GH-15559: Add custom metric to SE [nocheck] (#15579)

* Fix custom metric calculation in CV (prototype)

* Allow custom_metric_func in automl

* Return NaN for models that don't support custom metric

* Revert change used to ensure existing metric

* Add custom metric to SE

* Mention SE support of the custom metric func in docs

* Use directly model metrics in leaderboard

This change is motivated by an issue with SE level-one validation frame
that is ephemeral and can not be checksummed.
  • Loading branch information
tomasfryda authored and maurever committed Sep 6, 2023
1 parent a6b6c88 commit 5288084
Show file tree
Hide file tree
Showing 18 changed files with 216 additions and 22 deletions.
1 change: 1 addition & 0 deletions h2o-algos/src/main/java/hex/ensemble/Metalearner.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ protected void setCommonParams(P parms) {
parms._weights_column = _model._parms._weights_column;
parms._offset_column = _model._parms._offset_column;
parms._main_model_time_budget_factor = _model._parms._main_model_time_budget_factor;
parms._custom_metric_func = _model._parms._custom_metric_func;
}

protected void setCrossValidationParams(P parms) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public static final class StackedEnsembleParametersV99 extends ModelParametersSc
"max_runtime_secs",
"weights_column",
"offset_column",
"custom_metric_func",
"seed",
"score_training_samples",
"keep_levelone_frame",
Expand Down
4 changes: 3 additions & 1 deletion h2o-algos/src/main/java/hex/tree/Score.java
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,13 @@ protected boolean modifiesVolatileVecs() {
_mb.reduce(t._mb);
}

// We need to satsify MB invariant
// We need to satisfy MB invariant
@Override protected void postGlobal() {
super.postGlobal();
if(_mb != null) {
_mb.postGlobal(getComputedCustomMetric());
if (null != cFuncRef)
_mb._CMetricScoringTask = (CMetricScoringTask) this;
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public static final class AutoMLBuildControl extends Iced {
public double tweedie_power = 1.5;
public double quantile_alpha = 0.5;
public double huber_alpha = 0.9;
public String custom_metric_func;

public boolean keep_cross_validation_predictions = false;
public boolean keep_cross_validation_models = false;
Expand Down
3 changes: 2 additions & 1 deletion h2o-automl/src/main/java/ai/h2o/automl/ModelingStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -361,11 +361,12 @@ protected void setCommonModelBuilderParams(Model.Parameters params) {
setCrossValidationParams(params);
setWeightingParams(params);
setClassBalancingParams(params);
params._custom_metric_func = buildSpec.build_control.custom_metric_func;

params._keep_cross_validation_models = buildSpec.build_control.keep_cross_validation_models;
params._keep_cross_validation_fold_assignment = buildSpec.build_control.nfolds != 0 && buildSpec.build_control.keep_cross_validation_fold_assignment;
params._export_checkpoints_dir = buildSpec.build_control.export_checkpoints_dir;

/** Using _main_model_time_budget_factor to determine if and how we should restrict the time for the main model.
* Value 0 means do not use time constraint for the main model.
* More details in {@link ModelBuilder#setMaxRuntimeSecsForMainModel()}.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ public static final class AutoMLBuildControlV99 extends SchemaV3<AutoMLBuildSpec
@API(direction = API.Direction.INPUT,
help = "Desired quantile for Huber/M-regression (threshold between quadratic and linear loss, must be between 0 and 1).")
public double huber_alpha;


@API(help = "Reference to custom evaluation function, format: `language:keyName=funcName`", level = API.Level.secondary, direction=API.Direction.INOUT, gridable = false)
public String custom_metric_func;

@API(help = "Reference to custom distribution, format: `language:keyName=funcName`", direction=API.Direction.INOUT)
public String custom_distribution_func;
} // class AutoMLBuildControlV99
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public AutoMLKeyV3(Key<AutoML> key) {

@API(help="Metric used to sort leaderboard", direction=API.Direction.INPUT)
public String sort_metric;

@API(help="The list of modeling steps effectively used during the AutoML run", direction=API.Direction.OUTPUT)
public StepDefinitionV99[] modeling_steps;

Expand Down
15 changes: 11 additions & 4 deletions h2o-core/src/main/java/hex/CMetricScoringTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@ protected final void customMetricPerRow(double preds[], float yact[],double weig
@Override
public void reduce(T t) {
super.reduce(t);
reduceCustomMetric(t);
}

public void reduceCustomMetric(T t) {
if (func != null) {
if (customMetricWs == null) {
customMetricWs = t.customMetricWs;
Expand All @@ -56,15 +60,18 @@ public void reduce(T t) {
@Override
protected void postGlobal() {
super.postGlobal();
result = computeCustomMetric();
}

public CustomMetric computeCustomMetric() {
if (func != null) {
result = CustomMetric.from(cFuncRef.getName(),
return CustomMetric.from(cFuncRef.getName(),
customMetricWs != null ? func.metric(customMetricWs)
: Double.NaN);
} else {
result = null;
}
return null;
}

public CustomMetric getComputedCustomMetric() {
return result;
}
Expand Down
2 changes: 2 additions & 0 deletions h2o-core/src/main/java/hex/Model.java
Original file line number Diff line number Diff line change
Expand Up @@ -2288,6 +2288,8 @@ public void close() {
super.postGlobal();
if(_mb != null) {
_mb.postGlobal(getComputedCustomMetric());
if (null != cFuncRef)
_mb._CMetricScoringTask = (CMetricScoringTask) this;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion h2o-core/src/main/java/hex/ModelBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,7 @@ public ModelMetrics.MetricBuilder[] cv_scoreCVModels(int N, Vec[] weights, Model
|| _parms._keep_cross_validation_predictions
|| (cvModel.isDistributionHuber() /*need to compute quantiles on abs error of holdout predictions*/)) {
String predName = cvModelBuilders[i].getPredictionKey();
Model.PredictScoreResult result = cvModel.predictScoreImpl(cvValid, adaptFr, predName, _job, true, CFuncRef.NOP);
Model.PredictScoreResult result = cvModel.predictScoreImpl(cvValid, adaptFr, predName, _job, true, CFuncRef.from(_parms._custom_metric_func));
result.makeModelMetrics(cvValid, adaptFr);
mbs[i] = result.getMetricBuilder();
DKV.put(cvModel);
Expand Down
13 changes: 12 additions & 1 deletion h2o-core/src/main/java/hex/ModelMetrics.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,12 @@ public static double getMetricFromModelMetric(ModelMetrics mm, String criterion)
Object obj = null;
criterion = criterion.toLowerCase();

if ("custom".equals(criterion)){
if (null == mm._custom_metric)
return Double.NaN;
return mm._custom_metric.value;
}

// Constructing confusion matrix based on criterion
ConfusionMatrix cm;
if(mm instanceof ModelMetricsBinomial) {
Expand Down Expand Up @@ -173,7 +179,7 @@ public static double getMetricFromModelMetric(ModelMetrics mm, String criterion)
}
}
if (null == method)
throw new H2OIllegalArgumentException("Failed to find ModelMetrics for criterion: " + criterion);
throw new H2OIllegalArgumentException("Failed to find ModelMetrics for criterion: " + criterion + " for model_id: " + mm._modelKey);

try {
return (double) method.invoke(obj);
Expand Down Expand Up @@ -417,6 +423,7 @@ public static abstract class MetricBuilder<T extends MetricBuilder<T>> extends I

// Custom metric holder
public CustomMetric _customMetric = null;
public CMetricScoringTask _CMetricScoringTask = null;

public double weightedSigma() {
// double sampleCorrection = _count/(_count-1); //sample variance -> depends on the number of ACTUAL ROWS (not the weighted count)
Expand All @@ -442,6 +449,10 @@ public void reduce(Object mb) {
}

public void reduceForCV(T mb){
if (null != _CMetricScoringTask) {
_CMetricScoringTask.reduceCustomMetric(mb._CMetricScoringTask);
_customMetric = _CMetricScoringTask.computeCustomMetric();
}
this.reduce(mb);
}

Expand Down
30 changes: 21 additions & 9 deletions h2o-core/src/main/java/hex/leaderboard/Leaderboard.java
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,19 @@ public void addModels(final Key<Model>[] modelKeys) {

if (_metrics == null) {
// lazily set to default for this model category
setDefaultMetrics(modelKeys[0].get());
Model model = null;
String cm = modelKeys[0].get()._parms._custom_metric_func;
String[] metricsFirst = defaultMetricsForModel(modelKeys[0].get());
for (Key<Model> k : modelKeys) {
final String[] metrics = defaultMetricsForModel(model = k.get());
if (metrics.length != metricsFirst.length || !Arrays.equals(metricsFirst, metrics))
throw new H2OIllegalArgumentException("Models don't have the same metrics (e.g. model \"" +
modelKeys[0].toString()+"\" and model \""+k+"\").");
if (!Objects.equals(cm, k.get()._parms._custom_metric_func))
throw new H2OIllegalArgumentException("Models don't have the same custom metrics (e.g. model \"" +
modelKeys[0].toString()+"\" and model \""+k+"\").");
}
setDefaultMetrics(model);
}

for (Key<Model> key : badKeys) {
Expand Down Expand Up @@ -634,11 +646,8 @@ private double getMetric(String metric, Model model) {
);
} else {
// otherwise use default model metrics
Key model_key = model._key;
long model_checksum = model.checksum();
ModelMetrics mm = getModelMetrics(model);
return ModelMetrics.getMetricFromModelMetric(
_leaderboard_model_metrics.get(ModelMetrics.buildKey(model_key, model_checksum, mm.frame()._key, mm.frame().checksum())),
getModelMetrics(model),
metric
);
}
Expand Down Expand Up @@ -670,14 +679,17 @@ protected Futures remove_impl(Futures fs, boolean cascade) {
}

private static String[] defaultMetricsForModel(Model m) {
ArrayList<String> result = new ArrayList<>();
if (m._output.isBinomialClassifier()) { //binomial
return new String[] {"auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse"};
Collections.addAll(result, "auc", "logloss", "aucpr", "mean_per_class_error", "rmse", "mse");
} else if (m._output.isMultinomialClassifier()) { // multinomial
return new String[] {"mean_per_class_error", "logloss", "rmse", "mse"};
Collections.addAll(result, "mean_per_class_error", "logloss", "rmse", "mse");
} else if (m._output.isSupervised()) { // regression
return new String[] {"rmse", "mse", "mae", "rmsle", "mean_residual_deviance"};
Collections.addAll(result, "rmse", "mse", "mae", "rmsle", "mean_residual_deviance");
}
return new String[0];
if (m._parms._custom_metric_func != null)
result.add("custom");
return result.toArray(new String[0]);
}

private double[] getModelMetricValues(int rank) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
``custom_metric_func``
----------------------

- Available in: GBM, DRF, GLM, Deeplearning
- Available in: GBM, DRF, GLM, Deeplearning, Stacked Ensembles
- Hyperparameter: no

Description
Expand Down
8 changes: 7 additions & 1 deletion h2o-py/h2o/automl/_estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ def __init__(self,
keep_cross_validation_models=False,
keep_cross_validation_fold_assignment=False,
sort_metric="AUTO",
custom_metric_func=None,
export_checkpoints_dir=None,
verbosity="warn",
**kwargs):
Expand Down Expand Up @@ -288,6 +289,9 @@ def __init__(self,
- ``"rmlse"``
Defaults to ``"AUTO"`` (This translates to ``"auc"`` for binomial classification, ``"mean_per_class_error"`` for multinomial classification, ``"deviance"`` for regression).
:param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
Defaults to ``None``.
:type custom_metric_func: str, optional
:param export_checkpoints_dir: Path to a directory where every model will be stored in binary form.
:param verbosity: Verbosity of the backend messages printed during training.
Available options are ``None`` (live log disabled), ``"debug"``, ``"info"``, ``"warn"`` or ``"error"``.
Expand Down Expand Up @@ -332,6 +336,7 @@ def __init__(self,
self.project_name = project_name
self.nfolds = nfolds
self.distribution = distribution
self.custom_metric_func = custom_metric_func
self.balance_classes = balance_classes
self.class_sampling_factors = class_sampling_factors
self.max_after_balance_size = max_after_balance_size
Expand Down Expand Up @@ -488,6 +493,7 @@ def __validate_distribution(self, distribution):
_huber_alpha = _aml_property('build_control.huber_alpha', types=(numeric,), freezable=True)
_tweedie_power = _aml_property('build_control.tweedie_power', types=(numeric,), freezable=True)
_quantile_alpha = _aml_property('build_control.quantile_alpha', types=(numeric,), freezable=True)
custom_metric_func = _aml_property('build_control.custom_metric_func', types=(str, None))
balance_classes = _aml_property('build_control.balance_classes', types=(bool,), freezable=True)
class_sampling_factors = _aml_property('build_control.class_sampling_factors', types=(None, [numeric]), freezable=True)
max_after_balance_size = _aml_property('build_control.max_after_balance_size', types=(None, numeric), freezable=True)
Expand Down Expand Up @@ -531,7 +537,7 @@ def __validate_distribution(self, distribution):
blending_frame = _aml_property('input_spec.blending_frame', set_input=False,
validate_fn=ft.partial(__validate_frame, name='blending_frame'))
response_column = _aml_property('input_spec.response_column', types=(str,))

#---------------------------------------------------------------------------
# Basic properties
#---------------------------------------------------------------------------
Expand Down
19 changes: 19 additions & 0 deletions h2o-py/h2o/estimators/stackedensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def __init__(self,
max_runtime_secs=0.0, # type: float
weights_column=None, # type: Optional[str]
offset_column=None, # type: Optional[str]
custom_metric_func=None, # type: Optional[str]
seed=-1, # type: int
score_training_samples=10000, # type: int
keep_levelone_frame=False, # type: bool
Expand Down Expand Up @@ -151,6 +152,9 @@ def __init__(self,
function.
Defaults to ``None``.
:type offset_column: str, optional
:param custom_metric_func: Reference to custom evaluation function, format: `language:keyName=funcName`
Defaults to ``None``.
:type custom_metric_func: str, optional
:param seed: Seed for random numbers; passed through to the metalearner algorithm. Defaults to -1 (time-based
random number)
Defaults to ``-1``.
Expand Down Expand Up @@ -186,6 +190,7 @@ def __init__(self,
self.max_runtime_secs = max_runtime_secs
self.weights_column = weights_column
self.offset_column = offset_column
self.custom_metric_func = custom_metric_func
self.seed = seed
self.score_training_samples = score_training_samples
self.keep_levelone_frame = keep_levelone_frame
Expand Down Expand Up @@ -714,6 +719,20 @@ def offset_column(self, offset_column):
assert_is_type(offset_column, None, str)
self._parms["offset_column"] = offset_column

@property
def custom_metric_func(self):
"""
Reference to custom evaluation function, format: `language:keyName=funcName`
Type: ``str``.
"""
return self._parms.get("custom_metric_func")

@custom_metric_func.setter
def custom_metric_func(self, custom_metric_func):
assert_is_type(custom_metric_func, None, str)
self._parms["custom_metric_func"] = custom_metric_func

@property
def seed(self):
"""
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import os
import sys

sys.path.insert(1, os.path.join("..", "..", ".."))
import h2o
from h2o.automl import H2OAutoML
from tests import pyunit_utils as pu, dataset_prostate, CustomMaeFunc


def test_automl_custom_metric():
def custom_mae_mm():
return h2o.upload_custom_metric(CustomMaeFunc, func_name="mae", func_file="mm_mae.py")

ftrain, fvalid, _ = dataset_prostate()
ftrain = ftrain.rbind(fvalid)
ftrain = h2o.H2OFrame(ftrain.as_data_frame(), "my_training_frame")
aml = H2OAutoML(max_models=20, custom_metric_func=custom_mae_mm(), sort_metric="custom")
aml.train(y="AGE", training_frame=ftrain)

for sd in ["train", "valid", "xval", "AUTO"]:
print(sd + "\n" + ("=" * len(sd)))
ldb = h2o.make_leaderboard(aml, scoring_data="xval").as_data_frame()
print(f"MAE==Custom: {((ldb.mae == ldb.custom) | ldb.custom.isna()).all()}")
print(ldb)
assert ((ldb.mae == ldb.custom) | ldb.custom.isna()).all() and (~ldb.custom.isna()).any()


pu.run_tests([
test_automl_custom_metric,
])
Loading

0 comments on commit 5288084

Please sign in to comment.