-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy path03_Advanced_FullPipeline.ps1
More file actions
113 lines (90 loc) · 4.58 KB
/
03_Advanced_FullPipeline.ps1
File metadata and controls
113 lines (90 loc) · 4.58 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
<#
#Requires -Version 5.1
.SYNOPSIS
VBAF Tutorial 03 - Advanced: Full ML Pipeline
Advanced Series | Estimated time: 30 minutes
.DESCRIPTION
Learn how to build a production-quality ML pipeline:
- Data loading and validation
- Preprocessing: imputation, scaling, encoding
- Feature engineering: polynomial, PCA
- Model training with cross-validation
- Hyperparameter tuning with grid search
- Model saving to registry
- Serving predictions via API
This is the complete end-to-end workflow!
#>
Write-Host ""
Write-Host "+------------------------------------------+" -ForegroundColor Cyan
Write-Host "¦ VBAF Tutorial 03 - Full ML Pipeline ¦" -ForegroundColor Cyan
Write-Host "+------------------------------------------+" -ForegroundColor Cyan
Write-Host ""
# ============================================================
# STAGE 1: Load and validate data
# ============================================================
Write-Host "[Stage 1/6] Data Loading & Validation" -ForegroundColor Yellow
$data = Get-VBAFPipelineDataset -Name "MessyHousePrice"
Write-Host " Loaded $($data.X.Length) samples with missing values and outliers" -ForegroundColor White
# ============================================================
# STAGE 2: Preprocessing pipeline
# ============================================================
Write-Host "[Stage 2/6] Preprocessing" -ForegroundColor Yellow
# Impute missing values with median
$imputer = [MissingValueImputer]::new("median")
$Ximp = $imputer.FitTransform($data.X)
Write-Host " Missing values imputed (median)" -ForegroundColor White
# Detect and clip outliers
$outlier = [OutlierDetector]::new("iqr", "clip", 1.5)
$outlier.Fit($Ximp)
$Xclip = $outlier.Transform($Ximp)
Write-Host " Outliers clipped (IQR method)" -ForegroundColor White
# Scale features
$scaler = [RobustScaler]::new()
$Xs = $scaler.FitTransform($Xclip.Data)
Write-Host " Features scaled (RobustScaler)" -ForegroundColor White
# ============================================================
# STAGE 3: Feature Engineering
# ============================================================
Write-Host "[Stage 3/6] Feature Engineering" -ForegroundColor Yellow
$poly = [PolynomialFeatures]::new(2)
$Xp = $poly.FitTransform($Xs, $data.Features)
Write-Host (" Polynomial features: {0} -> {1} columns" -f $Xs[0].Length, $Xp[0].Length) -ForegroundColor White
# ============================================================
# STAGE 4: Model selection + cross-validation
# ============================================================
Write-Host "[Stage 4/6] Model Selection" -ForegroundColor Yellow
$algoResult = Invoke-VBAFAlgorithmSelection -X $Xp -y $data.y `
-Task "regression" -Folds 5 -Metric "R2"
# ============================================================
# STAGE 5: Hyperparameter tuning
# ============================================================
Write-Host "[Stage 5/6] Hyperparameter Tuning" -ForegroundColor Yellow
$hpoResult = Invoke-VBAFRandomSearch `
-ModelFactory { param($p) [RidgeRegression]::new($p.Lambda) } `
-ParamSpace @{ Lambda=@(0.001, 0.01, 0.1, 0.5, 1.0, 5.0) } `
-X $Xp -y $data.y -NTrials 12 -Folds 5 -Metric "R2"
# Train final model
$bestModel = [RidgeRegression]::new($hpoResult.BestParams.Lambda)
$bestModel.Fit($Xp, $data.y)
# ============================================================
# STAGE 6: Save to registry
# ============================================================
Write-Host "[Stage 6/6] Saving to Registry"
Initialize-VBAFRegistry -Path "C:\Users\henni\VBAFRegistry" | Out-Null
Save-VBAFModel `
-ModelName "Tutorial_HousePrice" `
-Model $bestModel `
-ModelType "RidgeRegression" `
-Metrics @{ R2=[Math]::Round($hpoResult.BestScore, 4) } `
-Params @{ Lambda=$hpoResult.BestParams.Lambda; Features="Polynomial(2)" } `
-DatasetName "MessyHousePrice" `
-Description "Tutorial 03 - full pipeline" | Out-Null
Write-Host ""
Write-Host "+------------------------------------------+" -ForegroundColor Green
Write-Host "¦ Pipeline Complete! ? ¦" -ForegroundColor Green
Write-Host ("¦ Best R2 : {0,-27}¦" -f [Math]::Round($hpoResult.BestScore, 4)) -ForegroundColor White
Write-Host ("¦ Best Lambda : {0,-27}¦" -f $hpoResult.BestParams.Lambda) -ForegroundColor White
Write-Host ("¦ Features : {0,-27}¦" -f $Xp[0].Length) -ForegroundColor White
Write-Host "+------------------------------------------+" -ForegroundColor Green
Write-Host ""
Write-Host "Try Tutorial 04 next: Real-world House Price MLOps project!" -ForegroundColor Cyan