Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[GH-5720] AIC/Loglikelihood test fixes #5723

Merged
merged 7 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ object MetricFieldExceptions {
def ignored(): Set[String] =
Set("__meta", "domain", "model", "model_checksum", "frame", "frame_checksum", "model_category", "predictions")

def optional(): Set[String] = Set("custom_metric_name", "custom_metric_value", "mean_score", "mean_normalized_score")
def optional(): Set[String] =
Set("custom_metric_name", "custom_metric_value", "mean_score", "mean_normalized_score", "AIC", "loglikelihood")
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ import water.api.API

trait MetricResolver {
def resolveMetrics(substitutionContext: ModelMetricsSubstitutionContext): Seq[Metric] = {
val h2oSchemaClass = substitutionContext.h2oSchemaClass
val h2oSchemaClass: Class[_] = substitutionContext.h2oSchemaClass

val parameters =
for (field <- h2oSchemaClass.getDeclaredFields
if field.getAnnotation(classOf[API]) != null && !MetricFieldExceptions.ignored().contains(field.getName))
if field.getAnnotation(classOf[API]) != null
if !MetricFieldExceptions.ignored().contains(field.getName)
if !substitutionContext.skipFields.contains(field.getName))
yield {
val (swFieldName, swMetricName) = MetricNameConverter.convertFromH2OToSW(field.getName)
Metric(swFieldName, swMetricName, field.getName, field.getType, field.getAnnotation(classOf[API]).help())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,82 +22,92 @@ import water.api.schemas3._

trait MetricsConfigurations {
def metricsConfiguration: Seq[ModelMetricsSubstitutionContext] = {
val duplicatedGLMMetrics = Seq("loglikelihood", "AIC")
Seq(
ModelMetricsSubstitutionContext(
"H2OCommonMetrics",
classOf[ModelMetricsBaseV3[_, _]],
Seq("H2OMetrics"),
"The class makes available all metrics that shared across all algorithms, and ML problems." +
entityName = "H2OCommonMetrics",
h2oSchemaClass = classOf[ModelMetricsBaseV3[_, _]],
parentEntities = Seq("H2OMetrics"),
classDescription = "The class makes available all metrics that shared across all algorithms, and ML problems." +
" (classification, regression, dimension reduction)."),
ModelMetricsSubstitutionContext(
"H2OBinomialMetrics",
classOf[ModelMetricsBinomialV3[_, _]],
Seq("H2OCommonMetrics"),
"The class makes available all metrics that shared across all algorithms supporting binomial classification."),
entityName = "H2OBinomialMetrics",
h2oSchemaClass = classOf[ModelMetricsBinomialV3[_, _]],
parentEntities = Seq("H2OCommonMetrics"),
classDescription =
"The class makes available all metrics that shared across all algorithms supporting binomial classification."),
ModelMetricsSubstitutionContext(
"H2OBinomialGLMMetrics",
classOf[ModelMetricsBinomialGLMV3],
Seq("H2OBinomialMetrics", "H2OGLMMetrics"),
"The class makes available all binomial metrics supported by GLM algorithm."),
entityName = "H2OBinomialGLMMetrics",
h2oSchemaClass = classOf[ModelMetricsBinomialGLMV3],
parentEntities = Seq("H2OBinomialMetrics", "H2OGLMMetrics"),
classDescription = "The class makes available all binomial metrics supported by GLM algorithm.",
skipFields = duplicatedGLMMetrics),
ModelMetricsSubstitutionContext(
"H2ORegressionMetrics",
classOf[ModelMetricsRegressionV3[_, _]],
Seq("H2OCommonMetrics"),
"The class makes available all metrics that shared across all algorithms supporting regression."),
entityName = "H2ORegressionMetrics",
h2oSchemaClass = classOf[ModelMetricsRegressionV3[_, _]],
parentEntities = Seq("H2OCommonMetrics"),
classDescription =
"The class makes available all metrics that shared across all algorithms supporting regression."),
ModelMetricsSubstitutionContext(
"H2ORegressionGLMMetrics",
classOf[ModelMetricsRegressionGLMV3],
Seq("H2ORegressionMetrics", "H2OGLMMetrics"),
"The class makes available all regression metrics supported by GLM algorithm."),
entityName = "H2ORegressionGLMMetrics",
h2oSchemaClass = classOf[ModelMetricsRegressionGLMV3],
parentEntities = Seq("H2ORegressionMetrics", "H2OGLMMetrics"),
classDescription = "The class makes available all regression metrics supported by GLM algorithm.",
skipFields = duplicatedGLMMetrics),
ModelMetricsSubstitutionContext(
"H2ORegressionCoxPHMetrics",
classOf[ModelMetricsRegressionCoxPHV3],
Seq("H2ORegressionMetrics"),
"The class makes available all regression metrics supported by CoxPH algorithm."),
entityName = "H2ORegressionCoxPHMetrics",
h2oSchemaClass = classOf[ModelMetricsRegressionCoxPHV3],
parentEntities = Seq("H2ORegressionMetrics"),
classDescription = "The class makes available all regression metrics supported by CoxPH algorithm."),
ModelMetricsSubstitutionContext(
"H2OMultinomialMetrics",
classOf[ModelMetricsMultinomialV3[_, _]],
Seq("H2OCommonMetrics"),
"The class makes available all metrics that shared across all algorithms supporting multinomial classification."),
entityName = "H2OMultinomialMetrics",
h2oSchemaClass = classOf[ModelMetricsMultinomialV3[_, _]],
parentEntities = Seq("H2OCommonMetrics"),
classDescription =
"The class makes available all metrics that shared across all algorithms supporting multinomial classification."),
ModelMetricsSubstitutionContext(
"H2OMultinomialGLMMetrics",
classOf[ModelMetricsMultinomialGLMV3],
Seq("H2OMultinomialMetrics", "H2OGLMMetrics"),
"The class makes available all multinomial metrics supported by GLM algorithm."),
entityName = "H2OMultinomialGLMMetrics",
h2oSchemaClass = classOf[ModelMetricsMultinomialGLMV3],
parentEntities = Seq("H2OMultinomialMetrics", "H2OGLMMetrics"),
classDescription = "The class makes available all multinomial metrics supported by GLM algorithm.",
skipFields = duplicatedGLMMetrics),
ModelMetricsSubstitutionContext(
"H2OOrdinalMetrics",
classOf[ModelMetricsOrdinalV3[_, _]],
Seq("H2OCommonMetrics"),
"The class makes available all metrics that shared across all algorithms supporting ordinal regression."),
entityName = "H2OOrdinalMetrics",
h2oSchemaClass = classOf[ModelMetricsOrdinalV3[_, _]],
parentEntities = Seq("H2OCommonMetrics"),
classDescription =
"The class makes available all metrics that shared across all algorithms supporting ordinal regression."),
ModelMetricsSubstitutionContext(
"H2OOrdinalGLMMetrics",
classOf[ModelMetricsOrdinalGLMV3],
Seq("H2OOrdinalMetrics", "H2OGLMMetrics"),
"The class makes available all ordinal metrics supported by GLM algorithm."),
entityName = "H2OOrdinalGLMMetrics",
h2oSchemaClass = classOf[ModelMetricsOrdinalGLMV3],
parentEntities = Seq("H2OOrdinalMetrics", "H2OGLMMetrics"),
classDescription = "The class makes available all ordinal metrics supported by GLM algorithm."),
ModelMetricsSubstitutionContext(
"H2OAnomalyMetrics",
classOf[ModelMetricsAnomalyV3],
Seq("H2OCommonMetrics"),
"The class makes available all metrics that shared across all algorithms supporting anomaly detection."),
entityName = "H2OAnomalyMetrics",
h2oSchemaClass = classOf[ModelMetricsAnomalyV3],
parentEntities = Seq("H2OCommonMetrics"),
classDescription =
"The class makes available all metrics that shared across all algorithms supporting anomaly detection."),
ModelMetricsSubstitutionContext(
"H2OClusteringMetrics",
classOf[ModelMetricsClusteringV3],
Seq("H2OCommonMetrics"),
"The class makes available all metrics that shared across all algorithms supporting clustering."),
entityName = "H2OClusteringMetrics",
h2oSchemaClass = classOf[ModelMetricsClusteringV3],
parentEntities = Seq("H2OCommonMetrics"),
classDescription =
"The class makes available all metrics that shared across all algorithms supporting clustering."),
ModelMetricsSubstitutionContext(
"H2OAutoEncoderMetrics",
classOf[ModelMetricsAutoEncoderV3],
Seq("H2OCommonMetrics"),
"The class provides all metrics available for ``H2OAutoEncoder``."),
entityName = "H2OAutoEncoderMetrics",
h2oSchemaClass = classOf[ModelMetricsAutoEncoderV3],
parentEntities = Seq("H2OCommonMetrics"),
classDescription = "The class provides all metrics available for ``H2OAutoEncoder``."),
ModelMetricsSubstitutionContext(
"H2OGLRMMetrics",
classOf[ModelMetricsGLRMV99],
Seq("H2OCommonMetrics"),
"The class provides all metrics available for ``H2OGLRM``."),
entityName = "H2OGLRMMetrics",
h2oSchemaClass = classOf[ModelMetricsGLRMV99],
parentEntities = Seq("H2OCommonMetrics"),
classDescription = "The class provides all metrics available for ``H2OGLRM``."),
ModelMetricsSubstitutionContext(
"H2OPCAMetrics",
classOf[ModelMetricsPCAV3],
Seq("H2OCommonMetrics"),
"The class provides all metrics available for ``H2OPCA``."))
entityName = "H2OPCAMetrics",
h2oSchemaClass = classOf[ModelMetricsPCAV3],
parentEntities = Seq("H2OCommonMetrics"),
classDescription = "The class provides all metrics available for ``H2OPCA``."))
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ case class ModelMetricsSubstitutionContext(
entityName: String,
h2oSchemaClass: Class[_],
parentEntities: Seq[String],
classDescription: String)
classDescription: String,
skipFields: Seq[String] = Seq.empty)
extends SubstitutionContextBase {

val namespace = "ai.h2o.sparkling.ml.metrics"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,9 @@ import ai.h2o.sparkling.{SharedH2OTestContext, TestUtils}
import hex.Model
import hex.tree.gbm.GBMModel.GBMParameters
import org.apache.spark.sql.functions._
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, Row, SparkSession}
import org.apache.spark.ml.param.{ParamMap, Params}
import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.sql.SparkSession
import org.junit.runner.RunWith
import org.scalatest.junit.JUnitRunner
import org.scalatest.{FunSuite, Matchers}
Expand Down Expand Up @@ -268,6 +267,8 @@ class H2OGridSearchTestSuite extends FunSuite with Matchers with SharedH2OTestCo
}

test("The first row returned by getGridModelsMetrics() method is the same as current metrics of the best model") {
import spark.implicits._

val drf = new H2ODRF()
.setFeaturesCols(Array("AGE", "RACE", "DPROS", "DCAPS", "PSA", "VOL", "GLEASON"))
.setLabelCol("CAPSULE")
Expand All @@ -288,7 +289,8 @@ class H2OGridSearchTestSuite extends FunSuite with Matchers with SharedH2OTestCo
val gridModelMetrics = search.getGridModelsMetrics().drop("MOJO Model ID")
val modelMetricsFromGrid = gridModelMetrics.columns.zip(gridModelMetrics.head().toSeq).toMap

modelMetricsFromGrid shouldEqual expectedMetrics
modelMetricsFromGrid.filter(!_._2.asInstanceOf[Double].isNaN) should contain theSameElementsAs expectedMetrics
.filter(!_._2.isNaN)
}

test("The first row returned by getGridModelsParams() method is the same as training params of the best model") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class MetricsTestSuite extends FunSuite with Matchers with SparkTestContext {
|
| "r2" : 1.0,
| "logloss": 2.0,
| "loglikelihood": 2.0,
| "AIC": 4.0,
| "AUC": 3.0,
| "pr_auc": 4.0,
| "Gini": 5.0,
Expand Down
2 changes: 1 addition & 1 deletion py/tests/unit/with_runtime_sparkling/test_gridsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def testGetGridModelsMetrics(prostateDataset):
grid.fit(prostateDataset)
metrics = grid.getGridModelsMetrics()
assert metrics.count() == 3
expectedCols = ['MOJO Model ID', 'RMSLE', 'Nobs', 'RMSE', 'MAE', 'MeanResidualDeviance', 'ScoringTime', 'MSE', 'R2']
expectedCols = ['MOJO Model ID', 'RMSLE', 'Nobs', 'RMSE', 'MAE', 'MeanResidualDeviance', 'ScoringTime', "Loglikelihood", 'MSE', 'R2', 'AIC']
assert metrics.columns == expectedCols
metrics.collect() # try materializing

Expand Down
6 changes: 4 additions & 2 deletions py/tests/unit/with_runtime_sparkling/test_mojo.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import shutil
import unit_test_utils
import os
import math

from pyspark.mllib.linalg import *
from pyspark.sql.types import *
Expand Down Expand Up @@ -63,7 +64,7 @@ def testModelCategory(gbmModel):
def testTrainingMetrics(gbmModel):
metrics = gbmModel.getTrainingMetrics()
assert metrics is not None
assert len(metrics) is 10
assert len(metrics) is 12


def testFeatureTypes(gbmModel):
Expand Down Expand Up @@ -245,7 +246,8 @@ def compareMetricValues(metricsObject, metricsMap):
for metric in metricsMap:
metricValue = metricsMap[metric]
objectValue = getattr(metricsObject, "get" + metric)()
assert(metricValue == objectValue)
if not math.isnan(metricValue) and not math.isnan(objectValue):
assert(metricValue == objectValue)
assert metricsObject.getConfusionMatrix().count() > 0
assert len(metricsObject.getConfusionMatrix().columns) > 0
assert metricsObject.getGainsLiftTable().count() > 0
Expand Down
Loading