Skip to content

Enter training time for BF16 using AVX512 #2620

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: development
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@
"## Training the Model\n",
"The function trainModel() will train the Resnet50 model based on the whether Intel® AMX should be enabled, and whether to use FP32 or BF16 data type. The environment variable `ONEDNN_MAX_CPU_ISA` is used to enable or disable Intel® AMX. **Note that this environment variable is only initialized once.** This means to run with Intel® AMX and VNNI, there will need to be separate processes. The best practice is to set this environment variable before running your script. For more information, refer to the [oneDNN documentation on CPU Dispatcher Control](https://www.intel.com/content/www/us/en/develop/documentation/onednn-developer-guide-and-reference/top/performance-profiling-and-inspection/cpu-dispatcher-control.html). \n",
"\n",
"To use BF16 in operations, use the `torch.cpu.amp.autocast()` function to perform forward and backward propagation."
"To use BF16 in operations, use the `torch.amp.autocast('cpu')` function to perform forward and backward propagation."
]
},
{
Expand All @@ -128,7 +128,7 @@
"metadata": {},
"outputs": [],
"source": [
"os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_BF16\""
"os.environ[\"ONEDNN_MAX_CPU_ISA\"] = \"AVX512_CORE_AMX\""
]
},
{
Expand Down Expand Up @@ -171,7 +171,7 @@
" for batch_idx, (data, target) in enumerate(train_loader):\n",
" optimizer.zero_grad()\n",
" if \"bf16\" == dataType:\n",
" with torch.cpu.amp.autocast(): # Auto Mixed Precision\n",
" with torch.amp.autocast('cpu'): # Auto Mixed Precision\n",
" # Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional.\n",
" data = data.to(memory_format=torch.channels_last)\n",
" output = model(data)\n",
Expand Down Expand Up @@ -240,8 +240,8 @@
"## Training with FP32 and BF16, including Intel® AMX\n",
"Train the Resnet50 model in three different cases:\n",
"1. FP32 (baseline) \n",
"2. BF16 without Intel® AMX \n",
"3. BF16 with Intel® AMX \n",
"2. BF16 with Intel® AMX\n",
"x. BF16 without Intel® AMX\n",
"\n",
"The training time is recorded."
]
Expand All @@ -260,12 +260,12 @@
{
"cell_type": "code",
"execution_count": null,
"id": "75aafe25-4f7d-42ad-92ed-3438bd78c00b",
"id": "3faaf5de",
"metadata": {},
"outputs": [],
"source": [
"print(\"Training model with BF16 with AVX512\")\n",
"!python pytorch_training_avx512_bf16.py"
"print(\"Training model with BF16 with Intel® AMX\")\n",
"bf16_amx_training_time = trainModel(train_loader, modelName=\"bf16_withAmx\", dataType=\"bf16\")"
]
},
{
Expand All @@ -275,19 +275,12 @@
"metadata": {},
"outputs": [],
"source": [
"# Record the training time for BF16 using AVX512\n",
"bf16_avx512_training_time = None #TODO: enter in training time"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2fdc8a70-509a-4714-8524-084f34e287c3",
"metadata": {},
"outputs": [],
"source": [
"print(\"Training model with BF16 with Intel® AMX\")\n",
"bf16_amx_training_time = trainModel(train_loader, modelName=\"bf16_withAmx\", dataType=\"bf16\")"
"print(\"Training model with BF16 with AVX512\")\n",
"!python pytorch_training_avx512_bf16.py\n",
"\n",
"# Read the variable\n",
"with open('bf16_avx512_training_time.txt', 'r') as f:\n",
" bf16_avx512_training_time = float(f.read().strip())"
]
},
{
Expand Down Expand Up @@ -391,9 +384,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3.8.10 64-bit (microsoft store)",
"display_name": "pytorch_test",
"language": "python",
"name": "python3"
"name": "pytorch_test"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -405,12 +398,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
},
"vscode": {
"interpreter": {
"hash": "ed6ae0d06e7bec0fef5f1fb38f177ceea45508ce95c68ed2f49461dd6a888a39"
}
"version": "3.11.0"
}
},
"nbformat": 4,
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def trainModel(train_loader, modelName="myModel", dataType="fp32"):
for batch_idx, (data, target) in enumerate(train_loader):
optimizer.zero_grad()
if "bf16" == dataType:
with torch.cpu.amp.autocast(): # Auto Mixed Precision
with torch.amp.autocast('cpu'): # Auto Mixed Precision
# Setting memory_format to torch.channels_last could improve performance with 4D input data. This is optional.
data = data.to(memory_format=torch.channels_last)
output = model(data)
Expand Down Expand Up @@ -106,7 +106,11 @@ def main():

# Train models and acquire training times
print("Training model with BF16 with AVX512")
bf16_noAmx_training_time = trainModel(train_loader, modelName="bf16_noAmx", dataType="bf16")
bf16_avx512_training_time = trainModel(train_loader, modelName="bf16_noAmx", dataType="bf16")

# Save variable
with open('bf16_avx512_training_time.txt', 'w') as f:
f.write(str(bf16_avx512_training_time))

if __name__ == '__main__':
main()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
"conda activate pytorch",
"python -m pip install -r requirements.txt",
"python -m ipykernel install --user --name=pytorch",
"python pytorch_training_avx512_bf16.py",
"python pytorch_training_amx_bf16.py",
"jupyter nbconvert --ExecutePreprocessor.enabled=True --ExecutePreprocessor.kernel_name=pytorch --to notebook IntelPyTorch_TrainingOptimizations_AMX_BF16.ipynb"
]
}
Expand Down