-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Initial commit AutoML samples * Add text classification * Add sentence similarity * Add placeholder directory for data files * Update TrialRunner to use Text Classification * All samples verified to work. Data paths updated. * Minor updates * Add Text classification razor MB tutorial sample * Updated READMEs
- Loading branch information
1 parent
87aeeab
commit 46df062
Showing
105 changed files
with
75,732 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
14 changes: 14 additions & 0 deletions
14
samples/csharp/getting-started/MLNET2/AutoMLAdvanced/AutoMLAdvanced.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net6.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.ML.AutoML" Version="0.20.0" /> | ||
</ItemGroup> | ||
|
||
</Project> |
46 changes: 46 additions & 0 deletions
46
samples/csharp/getting-started/MLNET2/AutoMLAdvanced/AutoMLMonitor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
using Microsoft.ML.AutoML; | ||
|
||
namespace AutoMLAdvanced | ||
{ | ||
public class AutoMLMonitor : IMonitor | ||
{ | ||
private readonly SweepablePipeline _pipeline; | ||
private readonly List<TrialResult> _completedTrials; | ||
|
||
public AutoMLMonitor(SweepablePipeline pipeline) | ||
{ | ||
_pipeline = pipeline; | ||
_completedTrials = new List<TrialResult>(); | ||
} | ||
|
||
public IEnumerable<TrialResult> GetCompletedTrials() => _completedTrials; | ||
|
||
public void ReportBestTrial(TrialResult result) | ||
{ | ||
return; | ||
} | ||
|
||
public void ReportCompletedTrial(TrialResult result) | ||
{ | ||
var trialId = result.TrialSettings.TrialId; | ||
var timeToTrain = result.DurationInMilliseconds; | ||
var pipeline = _pipeline.ToString(result.TrialSettings.Parameter); | ||
Console.WriteLine($"Trial {trialId} finished training in {timeToTrain}ms with pipeline {pipeline}"); | ||
_completedTrials.Add(result); | ||
} | ||
|
||
public void ReportFailTrial(TrialSettings settings, Exception exception = null) | ||
{ | ||
if (exception.Message.Contains("Operation was canceled.")) | ||
{ | ||
Console.WriteLine($"{settings.TrialId} cancelled. Time budget exceeded."); | ||
} | ||
Console.WriteLine($"{settings.TrialId} failed with exception {exception.Message}"); | ||
} | ||
|
||
public void ReportRunningTrial(TrialSettings setting) | ||
{ | ||
return; | ||
} | ||
} | ||
} |
62 changes: 62 additions & 0 deletions
62
samples/csharp/getting-started/MLNET2/AutoMLAdvanced/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
// Initialize MLContext | ||
using AutoMLAdvanced; | ||
using Microsoft.ML; | ||
using Microsoft.ML.AutoML; | ||
using Microsoft.ML.Data; | ||
using static Microsoft.ML.DataOperationsCatalog; | ||
|
||
// Initialize MLContext | ||
MLContext ctx = new MLContext(); | ||
|
||
var dataPath = Path.GetFullPath(@"..\..\..\..\Data\taxi-fare-train.csv"); | ||
|
||
// Infer column information | ||
ColumnInferenceResults columnInference = | ||
ctx.Auto().InferColumns(dataPath, labelColumnName: "fare_amount", groupColumns: false); | ||
|
||
// Modify column inference results | ||
columnInference.ColumnInformation.NumericColumnNames.Remove("rate_code"); | ||
columnInference.ColumnInformation.CategoricalColumnNames.Add("rate_code"); | ||
|
||
// Create text loader | ||
TextLoader loader = ctx.Data.CreateTextLoader(columnInference.TextLoaderOptions); | ||
|
||
// Load data into IDataView | ||
IDataView data = loader.Load(dataPath); | ||
|
||
// Split into train (80%), validation (20%) sets | ||
TrainTestData trainValidationData = ctx.Data.TrainTestSplit(data, testFraction: 0.2); | ||
|
||
//Define pipeline | ||
SweepablePipeline pipeline = | ||
ctx.Auto().Featurizer(data, columnInformation: columnInference.ColumnInformation) | ||
.Append(ctx.Auto().Regression(labelColumnName: columnInference.ColumnInformation.LabelColumnName, useLgbm:false)); | ||
|
||
// Create AutoML experiment | ||
AutoMLExperiment experiment = ctx.Auto().CreateExperiment(); | ||
|
||
// Configure experiment | ||
experiment | ||
.SetPipeline(pipeline) | ||
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.ColumnInformation.LabelColumnName) | ||
.SetTrainingTimeInSeconds(60) | ||
.SetGridSearchTuner() | ||
.SetDataset(trainValidationData); | ||
|
||
// Log experiment trials | ||
var monitor = new AutoMLMonitor(pipeline); | ||
experiment.SetMonitor(monitor); | ||
|
||
// Set checkpoints | ||
var checkpointPath = Path.Join(Directory.GetCurrentDirectory(), "automl"); | ||
experiment.SetCheckpoint(checkpointPath); | ||
|
||
// Run experiment | ||
var cts = new CancellationTokenSource(); | ||
TrialResult experimentResults = await experiment.RunAsync(cts.Token); | ||
|
||
// Get best model | ||
var model = experimentResults.Model; | ||
|
||
// Get all completed trials | ||
var completedTrials = monitor.GetCompletedTrials(); |
14 changes: 14 additions & 0 deletions
14
samples/csharp/getting-started/MLNET2/AutoMLEstimators/AutoMLEstimators.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net6.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.ML.AutoML" Version="0.20.0" /> | ||
</ItemGroup> | ||
|
||
</Project> |
46 changes: 46 additions & 0 deletions
46
samples/csharp/getting-started/MLNET2/AutoMLEstimators/AutoMLMonitor.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
using Microsoft.ML.AutoML; | ||
|
||
namespace AutoMLAdvanced | ||
{ | ||
public class AutoMLMonitor : IMonitor | ||
{ | ||
private readonly SweepablePipeline _pipeline; | ||
private readonly List<TrialResult> _completedTrials; | ||
|
||
public AutoMLMonitor(SweepablePipeline pipeline) | ||
{ | ||
_pipeline = pipeline; | ||
_completedTrials = new List<TrialResult>(); | ||
} | ||
|
||
public IEnumerable<TrialResult> GetCompletedTrials() => _completedTrials; | ||
|
||
public void ReportBestTrial(TrialResult result) | ||
{ | ||
return; | ||
} | ||
|
||
public void ReportCompletedTrial(TrialResult result) | ||
{ | ||
var trialId = result.TrialSettings.TrialId; | ||
var timeToTrain = result.DurationInMilliseconds; | ||
var pipeline = _pipeline.ToString(result.TrialSettings.Parameter); | ||
Console.WriteLine($"Trial {trialId} finished training in {timeToTrain}ms with pipeline {pipeline}"); | ||
_completedTrials.Add(result); | ||
} | ||
|
||
public void ReportFailTrial(TrialSettings settings, Exception exception = null) | ||
{ | ||
if (exception.Message.Contains("Operation was canceled.")) | ||
{ | ||
Console.WriteLine($"{settings.TrialId} cancelled. Time budget exceeded."); | ||
} | ||
Console.WriteLine($"{settings.TrialId} failed with exception {exception.Message}"); | ||
} | ||
|
||
public void ReportRunningTrial(TrialSettings setting) | ||
{ | ||
return; | ||
} | ||
} | ||
} |
75 changes: 75 additions & 0 deletions
75
samples/csharp/getting-started/MLNET2/AutoMLEstimators/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
// Initialize MLContext | ||
using AutoMLAdvanced; | ||
using Microsoft.ML; | ||
using Microsoft.ML.AutoML; | ||
using Microsoft.ML.AutoML.CodeGen; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.SearchSpace; | ||
using Microsoft.ML.SearchSpace.Option; | ||
using Microsoft.ML.Trainers; | ||
using static Microsoft.ML.DataOperationsCatalog; | ||
|
||
// Initialize MLContext | ||
MLContext ctx = new MLContext(); | ||
|
||
var dataPath = Path.GetFullPath(@"..\..\..\..\Data\taxi-fare-train.csv"); | ||
|
||
// Infer column information | ||
ColumnInferenceResults columnInference = | ||
ctx.Auto().InferColumns(dataPath, labelColumnName: "fare_amount", groupColumns: false); | ||
|
||
// Create text loader | ||
TextLoader loader = ctx.Data.CreateTextLoader(columnInference.TextLoaderOptions); | ||
|
||
// Load data into IDataView | ||
IDataView data = loader.Load(dataPath); | ||
|
||
// Split into train (80%), validation (20%) sets | ||
TrainTestData trainValidationData = ctx.Data.TrainTestSplit(data, testFraction: 0.2); | ||
|
||
// Initialize default Scda search space | ||
var sdcaSearchSpace = new SearchSpace<SdcaOption>(); | ||
|
||
// Modify L1 search space range | ||
sdcaSearchSpace["L1Regularization"] = new UniformSingleOption(min: 0.01f, max: 2.0f, logBase: false, defaultValue: 0.01f); | ||
|
||
// Use the search space to define a custom factory to create an SdcaRegressionTrainer | ||
var sdcaFactory = (MLContext ctx, SdcaOption param) => | ||
{ | ||
var sdcaOption = new SdcaRegressionTrainer.Options(); | ||
sdcaOption.L1Regularization = param.L1Regularization; | ||
sdcaOption.L2Regularization = 0.02f; | ||
|
||
sdcaOption.LabelColumnName = columnInference.ColumnInformation.LabelColumnName; | ||
|
||
return ctx.Regression.Trainers.Sdca(sdcaOption); | ||
}; | ||
|
||
// Define Sdca sweepable estimator (SdcaRegressionTrainer + SdcaOption search space) | ||
var sdcaSweepableEstimator = ctx.Auto().CreateSweepableEstimator(sdcaFactory, sdcaSearchSpace); | ||
|
||
// Add sweepable estimator to sweepable pipeline | ||
SweepablePipeline pipeline = | ||
ctx.Auto().Featurizer(data, columnInformation: columnInference.ColumnInformation) | ||
.Append(sdcaSweepableEstimator); | ||
|
||
// Create AutoML experiment | ||
AutoMLExperiment experiment = ctx.Auto().CreateExperiment(); | ||
|
||
// Configure experiment | ||
experiment | ||
.SetPipeline(pipeline) | ||
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.ColumnInformation.LabelColumnName) | ||
.SetTrainingTimeInSeconds(60) | ||
.SetDataset(trainValidationData); | ||
|
||
// Log experiment trials | ||
var monitor = new AutoMLMonitor(pipeline); | ||
experiment.SetMonitor(monitor); | ||
|
||
// Run experiment | ||
var cts = new CancellationTokenSource(); | ||
TrialResult experimentResults = await experiment.RunAsync(cts.Token); | ||
|
||
// Get best model | ||
var model = experimentResults.Model; |
14 changes: 14 additions & 0 deletions
14
samples/csharp/getting-started/MLNET2/AutoMLQuickStart/AutoMLQuickStart.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>net6.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.ML.AutoML" Version="0.20.0" /> | ||
</ItemGroup> | ||
|
||
</Project> |
53 changes: 53 additions & 0 deletions
53
samples/csharp/getting-started/MLNET2/AutoMLQuickStart/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// Initialize MLContext | ||
using Microsoft.ML; | ||
using Microsoft.ML.AutoML; | ||
using Microsoft.ML.Data; | ||
using static Microsoft.ML.DataOperationsCatalog; | ||
|
||
// Initialize MLContext | ||
MLContext ctx = new MLContext(); | ||
|
||
// Define data path | ||
var dataPath = Path.GetFullPath(@"..\..\..\..\Data\taxi-fare-train.csv"); | ||
|
||
// Infer column information | ||
ColumnInferenceResults columnInference = | ||
ctx.Auto().InferColumns(dataPath, labelColumnName: "fare_amount", groupColumns: false); | ||
|
||
// Create text loader | ||
TextLoader loader = ctx.Data.CreateTextLoader(columnInference.TextLoaderOptions); | ||
|
||
// Load data into IDataView | ||
IDataView data = loader.Load(dataPath); | ||
|
||
// Split into train (80%), validation (20%) sets | ||
TrainTestData trainValidationData = ctx.Data.TrainTestSplit(data, testFraction: 0.2); | ||
|
||
//Define pipeline | ||
SweepablePipeline pipeline = | ||
ctx.Auto().Featurizer(data, columnInformation: columnInference.ColumnInformation) | ||
.Append(ctx.Auto().Regression(labelColumnName: columnInference.ColumnInformation.LabelColumnName)); | ||
|
||
// Create AutoML experiment | ||
AutoMLExperiment experiment = ctx.Auto().CreateExperiment(); | ||
|
||
// Configure experiment | ||
experiment | ||
.SetPipeline(pipeline) | ||
.SetRegressionMetric(RegressionMetric.RSquared, labelColumn: columnInference.ColumnInformation.LabelColumnName) | ||
.SetTrainingTimeInSeconds(60) | ||
.SetDataset(trainValidationData); | ||
|
||
// Log experiment trials | ||
ctx.Log += (_, e) => { | ||
if (e.Source.Equals("AutoMLExperiment")) | ||
{ | ||
Console.WriteLine(e.RawMessage); | ||
} | ||
}; | ||
|
||
// Run experiment | ||
TrialResult experimentResults = await experiment.RunAsync(); | ||
|
||
// Get best model | ||
var model = experimentResults.Model; |
Oops, something went wrong.