-
Notifications
You must be signed in to change notification settings - Fork 2.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
621 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
############################################################################### | ||
# Set default behavior to automatically normalize line endings. | ||
############################################################################### | ||
* text=auto | ||
|
||
############################################################################### | ||
# Set default behavior for command prompt diff. | ||
# | ||
# This is need for earlier builds of msysgit that does not have it on by | ||
# default for csharp files. | ||
# Note: This is only used by command line | ||
############################################################################### | ||
#*.cs diff=csharp | ||
|
||
############################################################################### | ||
# Set the merge driver for project and solution files | ||
# | ||
# Merging from the command prompt will add diff markers to the files if there | ||
# are conflicts (Merging from VS is not affected by the settings below, in VS | ||
# the diff markers are never inserted). Diff markers may cause the following | ||
# file extensions to fail to load in VS. An alternative would be to treat | ||
# these files as binary and thus will always conflict and require user | ||
# intervention with every merge. To do so, just uncomment the entries below | ||
############################################################################### | ||
#*.sln merge=binary | ||
#*.csproj merge=binary | ||
#*.vbproj merge=binary | ||
#*.vcxproj merge=binary | ||
#*.vcproj merge=binary | ||
#*.dbproj merge=binary | ||
#*.fsproj merge=binary | ||
#*.lsproj merge=binary | ||
#*.wixproj merge=binary | ||
#*.modelproj merge=binary | ||
#*.sqlproj merge=binary | ||
#*.wwaproj merge=binary | ||
|
||
############################################################################### | ||
# behavior for image files | ||
# | ||
# image files are treated as binary by default. | ||
############################################################################### | ||
#*.jpg binary | ||
#*.png binary | ||
#*.gif binary | ||
|
||
############################################################################### | ||
# diff behavior for common document formats | ||
# | ||
# Convert binary document formats to text before diffing them. This feature | ||
# is only available from the command line. Turn it on by uncommenting the | ||
# entries below. | ||
############################################################################### | ||
#*.doc diff=astextplain | ||
#*.DOC diff=astextplain | ||
#*.docx diff=astextplain | ||
#*.DOCX diff=astextplain | ||
#*.dot diff=astextplain | ||
#*.DOT diff=astextplain | ||
#*.pdf diff=astextplain | ||
#*.PDF diff=astextplain | ||
#*.rtf diff=astextplain | ||
#*.RTF diff=astextplain |
25 changes: 25 additions & 0 deletions
25
samples/csharp/getting-started/Clustering_Iris/Clustering_Iris.sln
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
|
||
Microsoft Visual Studio Solution File, Format Version 12.00 | ||
# Visual Studio 15 | ||
VisualStudioVersion = 15.0.28010.2046 | ||
MinimumVisualStudioVersion = 10.0.40219.1 | ||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Clustering_Iris", "Clustering_Iris\Clustering_Iris.csproj", "{E730C84B-0F03-4C0C-9B22-68130091C900}" | ||
EndProject | ||
Global | ||
GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
Debug|Any CPU = Debug|Any CPU | ||
Release|Any CPU = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
{E730C84B-0F03-4C0C-9B22-68130091C900}.Debug|Any CPU.ActiveCfg = Debug|Any CPU | ||
{E730C84B-0F03-4C0C-9B22-68130091C900}.Debug|Any CPU.Build.0 = Debug|Any CPU | ||
{E730C84B-0F03-4C0C-9B22-68130091C900}.Release|Any CPU.ActiveCfg = Release|Any CPU | ||
{E730C84B-0F03-4C0C-9B22-68130091C900}.Release|Any CPU.Build.0 = Release|Any CPU | ||
EndGlobalSection | ||
GlobalSection(SolutionProperties) = preSolution | ||
HideSolutionNode = FALSE | ||
EndGlobalSection | ||
GlobalSection(ExtensibilityGlobals) = postSolution | ||
SolutionGuid = {740BE814-1300-4D85-B982-D0384949FC5D} | ||
EndGlobalSection | ||
EndGlobal |
18 changes: 18 additions & 0 deletions
18
samples/csharp/getting-started/Clustering_Iris/Clustering_Iris/Clustering_Iris.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<OutputType>Exe</OutputType> | ||
<TargetFramework>netcoreapp2.1</TargetFramework> | ||
</PropertyGroup> | ||
|
||
<ItemGroup> | ||
<PackageReference Include="Microsoft.ML" Version="0.6.0" /> | ||
</ItemGroup> | ||
|
||
<ItemGroup> | ||
<None Update="datasets\iris-full.txt"> | ||
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory> | ||
</None> | ||
</ItemGroup> | ||
|
||
</Project> |
151 changes: 151 additions & 0 deletions
151
samples/csharp/getting-started/Clustering_Iris/Clustering_Iris/Program.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
using Microsoft.ML; | ||
using Microsoft.ML.Core.Data; | ||
using Microsoft.ML.Runtime.Api; | ||
using Microsoft.ML.Runtime.Data; | ||
using Microsoft.ML.Runtime.KMeans; | ||
using Microsoft.ML.Runtime.Learners; | ||
using System; | ||
using System.IO; | ||
|
||
namespace Clustering_Iris | ||
{ | ||
internal static class Program | ||
{ | ||
private static string AppPath => Path.GetDirectoryName(Environment.GetCommandLineArgs()[0]); | ||
private static string DataPath => Path.Combine(AppPath, "datasets", "iris-full.txt"); | ||
private static string ModelPath => Path.Combine(AppPath, "IrisModel.zip"); | ||
|
||
|
||
private static void Main(string[] args) | ||
{ | ||
// Create ML.NET context/environment | ||
using (var env = new LocalEnvironment()) | ||
{ | ||
// Create DataReader with data schema mapped to file's columns | ||
var reader = new TextLoader(env, | ||
new TextLoader.Arguments() | ||
{ | ||
Separator = "\t", | ||
HasHeader = true, | ||
Column = new[] | ||
{ | ||
new TextLoader.Column("Label", DataKind.R4, 0), | ||
new TextLoader.Column("SepalLength", DataKind.R4, 1), | ||
new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("SepalWidth", DataKind.R4, 2), | ||
new TextLoader.Column("PetalLength", DataKind.R4, 3), | ||
new TextLoader.Column("PetalWidth", DataKind.R4, 4), | ||
|
||
} | ||
}); | ||
//Load training data | ||
IDataView trainingDataView = reader.Read(new MultiFileSource(DataPath)); | ||
|
||
// Transform your data and add a learner | ||
// Add a learning algorithm to the pipeline. e.g.(What are characteristics of iris is this?) | ||
// Convert the Label back into original text (after converting to number in step 3) | ||
var pipeline = new ConcatEstimator(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") | ||
.Append(new KMeansPlusPlusTrainer(env, "Features",clustersCount:3)); | ||
|
||
// Create and train the model | ||
Console.WriteLine("=============== Create and Train the Model ==============="); | ||
|
||
var model = pipeline.Fit(trainingDataView); | ||
|
||
Console.WriteLine("=============== End of training ==============="); | ||
Console.WriteLine(); | ||
|
||
// Test with one sample text | ||
var sampleIrisData = new IrisData() | ||
{ | ||
SepalLength = 3.3f, | ||
SepalWidth = 1.6f, | ||
PetalLength = 0.2f, | ||
PetalWidth = 5.1f, | ||
}; | ||
|
||
var prediction = model.MakePredictionFunction<IrisData, IrisPrediction>(env).Predict( | ||
sampleIrisData); | ||
|
||
Console.WriteLine($"Clusters assigned for setosa flowers:"+prediction.SelectedClusterId); | ||
// Save model to .ZIP file | ||
SaveModelAsFile(env, model); | ||
|
||
// Predict again but now testing the model loading from the .ZIP file | ||
PredictWithModelLoadedFromFile(sampleIrisData); | ||
|
||
Console.WriteLine("=============== End of process, hit any key to finish ==============="); | ||
Console.ReadKey(); | ||
} | ||
|
||
|
||
} | ||
|
||
private static void SaveModelAsFile(LocalEnvironment env, TransformerChain<ClusteringPredictionTransformer<KMeansPredictor>> model) | ||
{ | ||
using (var fs = new FileStream(ModelPath, FileMode.Create, FileAccess.Write, FileShare.Write)) | ||
model.SaveTo(env, fs); | ||
|
||
Console.WriteLine("The model is saved to {0}", ModelPath); | ||
} | ||
|
||
private static void PredictWithModelLoadedFromFile(IrisData sampleData) | ||
{ | ||
// Test with Loaded Model from .zip file | ||
|
||
using (var env = new LocalEnvironment()) | ||
{ | ||
ITransformer loadedModel; | ||
using (var stream = new FileStream(ModelPath, FileMode.Open, FileAccess.Read, FileShare.Read)) | ||
{ | ||
loadedModel = TransformerChain.LoadFrom(env, stream); | ||
} | ||
|
||
// Create prediction engine and make prediction. | ||
var prediction = loadedModel.MakePredictionFunction<IrisData, IrisPrediction>(env).Predict( | ||
new IrisData() | ||
{ | ||
SepalLength = 3.3f, | ||
SepalWidth = 1.6f, | ||
PetalLength = 0.2f, | ||
PetalWidth = 5.1f, | ||
}); | ||
|
||
Console.WriteLine(); | ||
Console.WriteLine($"Clusters assigned for setosa flowers:" + prediction.SelectedClusterId); | ||
} | ||
} | ||
|
||
} | ||
|
||
|
||
|
||
// Define your data structures | ||
public class IrisData | ||
{ | ||
[Column("0")] | ||
public float Label; | ||
|
||
[Column("1")] | ||
public float SepalLength; | ||
|
||
[Column("2")] | ||
public float SepalWidth; | ||
|
||
[Column("3")] | ||
public float PetalLength; | ||
|
||
[Column("4")] | ||
public float PetalWidth; | ||
|
||
} | ||
|
||
// IrisPrediction is the result returned from prediction operations | ||
public class IrisPrediction | ||
{ | ||
[ColumnName("PredictedLabel")] | ||
public uint SelectedClusterId; | ||
|
||
[ColumnName("Score")] | ||
public float[] Distance; | ||
} | ||
} |
100 changes: 100 additions & 0 deletions
100
samples/csharp/getting-started/Clustering_Iris/Clustering_Iris/READMe.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
# Clustering Iris Data | ||
In this introductory sample, you'll see how to use [ML.NET](https://www.microsoft.com/net/learn/apps/machine-learning-and-ai/ml-dotnet) to divide iris flowers into different groups that correspond to different types of iris. In the world of machine learning, this task is known as **clustering**. | ||
|
||
## Problem | ||
To demonstrate clustering API in action, we will use three types of iris flowers: setosa, versicolor, and virginica. All of them are stored in the same dataset. Even though the type of these flowers is known, we will not use it and run clustering algorithm only on flower parameters such as petal length, petal width, etc. The task is to group all flowers into three different clusters. We would expect the flowers of different types belong to different clusters. | ||
|
||
The inputs of the model are following iris parameters: | ||
* petal length | ||
* petal width | ||
* sepal length | ||
* sepal width | ||
|
||
## ML task - Clustering | ||
The generalized problem of **clustering** is to group a set of objects in such a way that objects in the same group are more similar to each other than to those in other groups. | ||
|
||
Some other examples of clustering: | ||
* group news articles into topics: sports, politics, tech, etc. | ||
* group customers by purchase preferences. | ||
* divide a digital image into distinct regions for border detection or object recognition. | ||
|
||
Clustering can look similar to multiclass classification, but the difference is that for clustering tasks we don't know the answers for the past data. So there is no "tutor"/"supervisor" that can tell if our algorithm's prediction was right or wrong. This type of ML task is called **unsupervised learning**. | ||
|
||
## Solution | ||
To solve this problem, first we will build and train an ML model. Then we will use trained model for predicting a cluster for iris flowers. | ||
|
||
### 1. Build model | ||
|
||
Building a model includes: uploading data (`iris-full.txt` with `TextLoader`), transforming the data so it can be used effectively by an ML algorithm (with `ColumnConcatenator`), and choosing a learning algorithm (`KMeansPlusPlusClusterer`). All of those steps are stored in a `LearningPipeline`: | ||
```CSharp | ||
// LearningPipeline holds all steps of the learning process: data, transforms, learners. | ||
using (var env = new LocalEnvironment()) | ||
{ | ||
// Create DataReader with data schema mapped to file's columns | ||
var reader = new TextLoader(env, | ||
new TextLoader.Arguments() | ||
{ | ||
Separator = "\t", | ||
HasHeader = true, | ||
Column = new[] | ||
{ | ||
new TextLoader.Column("Label", DataKind.R4, 0), | ||
new TextLoader.Column("SepalLength", DataKind.R4, 1), | ||
new TextLoader.Column("SepalWidth", DataKind.R4, 2), new TextLoader.Column("SepalWidth", DataKind.R4, 2), | ||
new TextLoader.Column("PetalLength", DataKind.R4, 3), | ||
new TextLoader.Column("PetalWidth", DataKind.R4, 4), | ||
|
||
} | ||
}); | ||
//Load training data | ||
IDataView trainingDataView = reader.Read(new MultiFileSource(DataPath)); | ||
|
||
} | ||
``` | ||
### 2. Train model | ||
Training the model is a process of running the chosen algorithm on the given data. It is implemented in the `Train()` API. To perform training we just call the method and provide our data object `IrisData` and prediction object `ClusterPrediction`. | ||
```CSharp | ||
// Transform your data and add a learner | ||
// Add a learning algorithm to the pipeline. e.g.(What are characteristics of iris is this?) | ||
// Convert the Label back into original text (after converting to number in step 3) | ||
var pipeline = new ConcatEstimator(env, "Features", "SepalLength", "SepalWidth", "PetalLength", "PetalWidth") | ||
.Append(new KMeansPlusPlusTrainer(env, "Features",clustersCount:3)); | ||
|
||
// Create and train the model | ||
Console.WriteLine("=============== Create and Train the Model ==============="); | ||
|
||
var model = pipeline.Fit(trainingDataView); | ||
|
||
``` | ||
### 3. Consume model | ||
After the model is build and trained, we can use the `Predict()` API to predict the cluster for an iris flower and calculate the distance from given flower parameters to each cluster (each centroid of a cluster). | ||
|
||
```CSharp | ||
// Test with one sample text | ||
var sampleIrisData = new IrisData() | ||
{ | ||
SepalLength = 3.3f, | ||
SepalWidth = 1.6f, | ||
PetalLength = 0.2f, | ||
PetalWidth = 5.1f, | ||
}; | ||
|
||
var prediction = model.MakePredictionFunction<IrisData, IrisPrediction>(env).Predict( | ||
sampleIrisData); | ||
|
||
Console.WriteLine($"Clusters assigned for setosa flowers:"+prediction.SelectedClusterId); | ||
``` | ||
Where `TestIrisData.Setosa1` stores the information about a setosa iris flower. | ||
```CSharp | ||
internal class TestIrisData | ||
{ | ||
internal static readonly IrisData Setosa1 = new IrisData() | ||
{ | ||
SepalLength = 3.3f, | ||
SepalWidth = 1.6f, | ||
PetalLength = 0.2f, | ||
PetalWidth = 5.1f, | ||
}; | ||
(...) | ||
} | ||
``` |
Oops, something went wrong.