Skip to content

Commit

Permalink
ML.NET 2.0 Samples (#967)
Browse files Browse the repository at this point in the history
* Initial commit AutoML samples

* Add text classification

* Add sentence similarity

* Add placeholder directory for data files

* Update TrialRunner to use Text Classification

* All samples verified to work. Data paths updated.

* Minor updates

* Add Text classification razor MB tutorial sample

* Updated READMEs
  • Loading branch information
luisquintanilla authored Nov 10, 2022
1 parent 87aeeab commit 46df062
Show file tree
Hide file tree
Showing 105 changed files with 75,732 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -331,3 +331,7 @@ ASALocalRun/
samples/modelbuilder/ObjectDetection_StopSigns/StopSignDetectionML.Model/MLModel.zip
samples/modelbuilder/ObjectDetection_StopSigns/StopSignDetectionML.Model/bestModel.onnx
samples/modelbuilder/ObjectDetection_StopSigns/assets.zip
samples/csharp/getting-started/MLNET2/Data/*.csv
samples/csharp/getting-started/MLNET2/Data/*.txt
samples/csharp/getting-started/MLNET2/**/*.mbconfig
samples/modelbuilder/TextClassification_Sentiment_Razor/SentimentRazor/**/*.mbconfig
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,8 @@ The ML.NET CLI (command-line interface) is a tool you can run on any command-pro

## AutoML API samples: (Preview state)

**THESE SAMPLES USE THE 0.1.x VERSION OF THE AUTOML API. WHILE THESE APIS STILL WORK IN VERSION 0.2.x WE RECOMMEND USING THE NEW APIS INTRODUCED IN 0.2.x AND LATER. FOR 0.2.x SAMPLES, SEE [ML.NET 2.0 Samples](samples/csharp/getting-started/MLNET2/README.md)**.

ML.NET AutoML API is basically a set of libraries packaged as a NuGet package you can use from your .NET code. AutoML eliminates the task of selecting different algorithms, hyperparameters. AutoML will intelligently generate many combinations of algorithms and hyperparameters and will find high quality models for you.

| AutoML API samples |
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.AutoML" Version="0.20.0" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using Microsoft.ML.AutoML;

namespace AutoMLAdvanced
{
public class AutoMLMonitor : IMonitor
{
private readonly SweepablePipeline _pipeline;
private readonly List<TrialResult> _completedTrials;

public AutoMLMonitor(SweepablePipeline pipeline)
{
_pipeline = pipeline;
_completedTrials = new List<TrialResult>();
}

public IEnumerable<TrialResult> GetCompletedTrials() => _completedTrials;

public void ReportBestTrial(TrialResult result)
{
return;
}

public void ReportCompletedTrial(TrialResult result)
{
var trialId = result.TrialSettings.TrialId;
var timeToTrain = result.DurationInMilliseconds;
var pipeline = _pipeline.ToString(result.TrialSettings.Parameter);
Console.WriteLine($"Trial {trialId} finished training in {timeToTrain}ms with pipeline {pipeline}");
_completedTrials.Add(result);
}

public void ReportFailTrial(TrialSettings settings, Exception exception = null)
{
if (exception.Message.Contains("Operation was canceled."))
{
Console.WriteLine($"{settings.TrialId} cancelled. Time budget exceeded.");
}
Console.WriteLine($"{settings.TrialId} failed with exception {exception.Message}");
}

public void ReportRunningTrial(TrialSettings setting)
{
return;
}
}
}
62 changes: 62 additions & 0 deletions samples/csharp/getting-started/MLNET2/AutoMLAdvanced/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Initialize MLContext
using AutoMLAdvanced;
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;

// Initialize MLContext
MLContext ctx = new MLContext();

var dataPath = Path.GetFullPath(@"..\..\..\..\Data\taxi-fare-train.csv");

// Infer column information
ColumnInferenceResults columnInference =
ctx.Auto().InferColumns(dataPath, labelColumnName: "fare_amount", groupColumns: false);

// Modify column inference results
columnInference.ColumnInformation.NumericColumnNames.Remove("rate_code");
columnInference.ColumnInformation.CategoricalColumnNames.Add("rate_code");

// Create text loader
TextLoader loader = ctx.Data.CreateTextLoader(columnInference.TextLoaderOptions);

// Load data into IDataView
IDataView data = loader.Load(dataPath);

// Split into train (80%), validation (20%) sets
TrainTestData trainValidationData = ctx.Data.TrainTestSplit(data, testFraction: 0.2);

//Define pipeline
SweepablePipeline pipeline =
ctx.Auto().Featurizer(data, columnInformation: columnInference.ColumnInformation)
.Append(ctx.Auto().Regression(labelColumnName: columnInference.ColumnInformation.LabelColumnName, useLgbm:false));

// Create AutoML experiment
AutoMLExperiment experiment = ctx.Auto().CreateExperiment();

// Configure experiment
experiment
.SetPipeline(pipeline)
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.ColumnInformation.LabelColumnName)
.SetTrainingTimeInSeconds(60)
.SetGridSearchTuner()
.SetDataset(trainValidationData);

// Log experiment trials
var monitor = new AutoMLMonitor(pipeline);
experiment.SetMonitor(monitor);

// Set checkpoints
var checkpointPath = Path.Join(Directory.GetCurrentDirectory(), "automl");
experiment.SetCheckpoint(checkpointPath);

// Run experiment
var cts = new CancellationTokenSource();
TrialResult experimentResults = await experiment.RunAsync(cts.Token);

// Get best model
var model = experimentResults.Model;

// Get all completed trials
var completedTrials = monitor.GetCompletedTrials();
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.AutoML" Version="0.20.0" />
</ItemGroup>

</Project>
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
using Microsoft.ML.AutoML;

namespace AutoMLAdvanced
{
public class AutoMLMonitor : IMonitor
{
private readonly SweepablePipeline _pipeline;
private readonly List<TrialResult> _completedTrials;

public AutoMLMonitor(SweepablePipeline pipeline)
{
_pipeline = pipeline;
_completedTrials = new List<TrialResult>();
}

public IEnumerable<TrialResult> GetCompletedTrials() => _completedTrials;

public void ReportBestTrial(TrialResult result)
{
return;
}

public void ReportCompletedTrial(TrialResult result)
{
var trialId = result.TrialSettings.TrialId;
var timeToTrain = result.DurationInMilliseconds;
var pipeline = _pipeline.ToString(result.TrialSettings.Parameter);
Console.WriteLine($"Trial {trialId} finished training in {timeToTrain}ms with pipeline {pipeline}");
_completedTrials.Add(result);
}

public void ReportFailTrial(TrialSettings settings, Exception exception = null)
{
if (exception.Message.Contains("Operation was canceled."))
{
Console.WriteLine($"{settings.TrialId} cancelled. Time budget exceeded.");
}
Console.WriteLine($"{settings.TrialId} failed with exception {exception.Message}");
}

public void ReportRunningTrial(TrialSettings setting)
{
return;
}
}
}
75 changes: 75 additions & 0 deletions samples/csharp/getting-started/MLNET2/AutoMLEstimators/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Initialize MLContext
using AutoMLAdvanced;
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.AutoML.CodeGen;
using Microsoft.ML.Data;
using Microsoft.ML.SearchSpace;
using Microsoft.ML.SearchSpace.Option;
using Microsoft.ML.Trainers;
using static Microsoft.ML.DataOperationsCatalog;

// Initialize MLContext
MLContext ctx = new MLContext();

var dataPath = Path.GetFullPath(@"..\..\..\..\Data\taxi-fare-train.csv");

// Infer column information
ColumnInferenceResults columnInference =
ctx.Auto().InferColumns(dataPath, labelColumnName: "fare_amount", groupColumns: false);

// Create text loader
TextLoader loader = ctx.Data.CreateTextLoader(columnInference.TextLoaderOptions);

// Load data into IDataView
IDataView data = loader.Load(dataPath);

// Split into train (80%), validation (20%) sets
TrainTestData trainValidationData = ctx.Data.TrainTestSplit(data, testFraction: 0.2);

// Initialize default Scda search space
var sdcaSearchSpace = new SearchSpace<SdcaOption>();

// Modify L1 search space range
sdcaSearchSpace["L1Regularization"] = new UniformSingleOption(min: 0.01f, max: 2.0f, logBase: false, defaultValue: 0.01f);

// Use the search space to define a custom factory to create an SdcaRegressionTrainer
var sdcaFactory = (MLContext ctx, SdcaOption param) =>
{
var sdcaOption = new SdcaRegressionTrainer.Options();
sdcaOption.L1Regularization = param.L1Regularization;
sdcaOption.L2Regularization = 0.02f;

sdcaOption.LabelColumnName = columnInference.ColumnInformation.LabelColumnName;

return ctx.Regression.Trainers.Sdca(sdcaOption);
};

// Define Sdca sweepable estimator (SdcaRegressionTrainer + SdcaOption search space)
var sdcaSweepableEstimator = ctx.Auto().CreateSweepableEstimator(sdcaFactory, sdcaSearchSpace);

// Add sweepable estimator to sweepable pipeline
SweepablePipeline pipeline =
ctx.Auto().Featurizer(data, columnInformation: columnInference.ColumnInformation)
.Append(sdcaSweepableEstimator);

// Create AutoML experiment
AutoMLExperiment experiment = ctx.Auto().CreateExperiment();

// Configure experiment
experiment
.SetPipeline(pipeline)
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.ColumnInformation.LabelColumnName)
.SetTrainingTimeInSeconds(60)
.SetDataset(trainValidationData);

// Log experiment trials
var monitor = new AutoMLMonitor(pipeline);
experiment.SetMonitor(monitor);

// Run experiment
var cts = new CancellationTokenSource();
TrialResult experimentResults = await experiment.RunAsync(cts.Token);

// Get best model
var model = experimentResults.Model;
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.ML.AutoML" Version="0.20.0" />
</ItemGroup>

</Project>
53 changes: 53 additions & 0 deletions samples/csharp/getting-started/MLNET2/AutoMLQuickStart/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Initialize MLContext
using Microsoft.ML;
using Microsoft.ML.AutoML;
using Microsoft.ML.Data;
using static Microsoft.ML.DataOperationsCatalog;

// Initialize MLContext
MLContext ctx = new MLContext();

// Define data path
var dataPath = Path.GetFullPath(@"..\..\..\..\Data\taxi-fare-train.csv");

// Infer column information
ColumnInferenceResults columnInference =
ctx.Auto().InferColumns(dataPath, labelColumnName: "fare_amount", groupColumns: false);

// Create text loader
TextLoader loader = ctx.Data.CreateTextLoader(columnInference.TextLoaderOptions);

// Load data into IDataView
IDataView data = loader.Load(dataPath);

// Split into train (80%), validation (20%) sets
TrainTestData trainValidationData = ctx.Data.TrainTestSplit(data, testFraction: 0.2);

//Define pipeline
SweepablePipeline pipeline =
ctx.Auto().Featurizer(data, columnInformation: columnInference.ColumnInformation)
.Append(ctx.Auto().Regression(labelColumnName: columnInference.ColumnInformation.LabelColumnName));

// Create AutoML experiment
AutoMLExperiment experiment = ctx.Auto().CreateExperiment();

// Configure experiment
experiment
.SetPipeline(pipeline)
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.ColumnInformation.LabelColumnName)
.SetTrainingTimeInSeconds(60)
.SetDataset(trainValidationData);

// Log experiment trials
ctx.Log += (_, e) => {
if (e.Source.Equals("AutoMLExperiment"))
{
Console.WriteLine(e.RawMessage);
}
};

// Run experiment
TrialResult experimentResults = await experiment.RunAsync();

// Get best model
var model = experimentResults.Model;
Loading

0 comments on commit 46df062

Please sign in to comment.