Skip to content
81 changes: 81 additions & 0 deletions deepmd/pd/utils/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,78 @@
log = logging.getLogger(__name__)


def _warn_descriptor_config_differences(
input_descriptor: dict,
pretrained_descriptor: dict,
model_branch: str = "Default",
) -> None:
"""
Warn about differences between input descriptor config and pretrained model's descriptor config.

Parameters
----------
input_descriptor : dict
Descriptor configuration from input.json
pretrained_descriptor : dict
Descriptor configuration from pretrained model
model_branch : str
Model branch name for logging context
"""
if input_descriptor == pretrained_descriptor:
return

# Collect differences
differences = []

# Check for keys that differ in values
for key in input_descriptor:
if key in pretrained_descriptor:
if input_descriptor[key] != pretrained_descriptor[key]:
differences.append(
f" {key}: {input_descriptor[key]} -> {pretrained_descriptor[key]}"
)
else:
differences.append(f" {key}: {input_descriptor[key]} -> (removed)")

# Check for keys only in pretrained model
for key in pretrained_descriptor:
if key not in input_descriptor:
differences.append(f" {key}: (added) -> {pretrained_descriptor[key]}")

if differences:
log.warning(
f"Descriptor configuration in input.json differs from pretrained model "
f"(branch '{model_branch}'). The input configuration will be overwritten "
f"with the pretrained model's configuration:\n" + "\n".join(differences)
)

# Special warning for nlayer changes (check both top-level and nested)
if (
"nlayer" in input_descriptor
and "nlayer" in pretrained_descriptor
and input_descriptor["nlayer"] != pretrained_descriptor["nlayer"]
):
log.warning(
f"IMPORTANT: nlayer changed from {input_descriptor['nlayer']} to "
f"{pretrained_descriptor['nlayer']}. This may significantly affect "
f"model architecture and performance."
)

# Check for nested nlayers in repformer (DPA2/DPA3 models)
input_repformer = input_descriptor.get("repformer", {})
pretrained_repformer = pretrained_descriptor.get("repformer", {})
if (
"nlayers" in input_repformer
and "nlayers" in pretrained_repformer
and input_repformer["nlayers"] != pretrained_repformer["nlayers"]
):
log.warning(
f"IMPORTANT: repformer.nlayers changed from {input_repformer['nlayers']} to "
f"{pretrained_repformer['nlayers']}. This may significantly affect "
f"model architecture and performance."
)


def get_finetune_rule_single(
_single_param_target,
_model_param_pretrained,
Expand Down Expand Up @@ -61,6 +133,15 @@ def get_finetune_rule_single(
"descriptor": single_config.get("descriptor", {}).get("trainable", True),
"fitting_net": single_config.get("fitting_net", {}).get("trainable", True),
}

# Warn about descriptor configuration differences before overwriting
if "descriptor" in single_config and "descriptor" in single_config_chosen:
_warn_descriptor_config_differences(
single_config["descriptor"],
single_config_chosen["descriptor"],
model_branch_chosen,
)

single_config["descriptor"] = single_config_chosen["descriptor"]
if not new_fitting:
single_config["fitting_net"] = single_config_chosen["fitting_net"]
Expand Down
81 changes: 81 additions & 0 deletions deepmd/pt/utils/finetune.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,78 @@
log = logging.getLogger(__name__)


def _warn_descriptor_config_differences(
input_descriptor: dict,
pretrained_descriptor: dict,
model_branch: str = "Default",
) -> None:
"""
Warn about differences between input descriptor config and pretrained model's descriptor config.

Parameters
----------
input_descriptor : dict
Descriptor configuration from input.json
pretrained_descriptor : dict
Descriptor configuration from pretrained model
model_branch : str
Model branch name for logging context
"""
if input_descriptor == pretrained_descriptor:
return

# Collect differences
differences = []

# Check for keys that differ in values
for key in input_descriptor:
if key in pretrained_descriptor:
if input_descriptor[key] != pretrained_descriptor[key]:
differences.append(
f" {key}: {input_descriptor[key]} -> {pretrained_descriptor[key]}"
)
Copy link

Copilot AI Aug 29, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For complex nested dictionaries or large configuration objects, string representation in f-strings could be expensive and potentially produce very long log messages. Consider truncating or using a more efficient representation for complex objects.

Copilot uses AI. Check for mistakes.
else:
differences.append(f" {key}: {input_descriptor[key]} -> (removed)")

# Check for keys only in pretrained model
for key in pretrained_descriptor:
if key not in input_descriptor:
differences.append(f" {key}: (added) -> {pretrained_descriptor[key]}")

if differences:
log.warning(
f"Descriptor configuration in input.json differs from pretrained model "
f"(branch '{model_branch}'). The input configuration will be overwritten "
f"with the pretrained model's configuration:\n" + "\n".join(differences)
)

# Special warning for nlayer changes (check both top-level and nested)
if (
"nlayer" in input_descriptor
and "nlayer" in pretrained_descriptor
and input_descriptor["nlayer"] != pretrained_descriptor["nlayer"]
):
log.warning(
f"IMPORTANT: nlayer changed from {input_descriptor['nlayer']} to "
f"{pretrained_descriptor['nlayer']}. This may significantly affect "
f"model architecture and performance."
)

# Check for nested nlayers in repformer (DPA2/DPA3 models)
input_repformer = input_descriptor.get("repformer", {})
pretrained_repformer = pretrained_descriptor.get("repformer", {})
if (
"nlayers" in input_repformer
and "nlayers" in pretrained_repformer
and input_repformer["nlayers"] != pretrained_repformer["nlayers"]
):
log.warning(
f"IMPORTANT: repformer.nlayers changed from {input_repformer['nlayers']} to "
f"{pretrained_repformer['nlayers']}. This may significantly affect "
f"model architecture and performance."
)


def get_finetune_rule_single(
_single_param_target,
_model_param_pretrained,
Expand Down Expand Up @@ -64,6 +136,15 @@ def get_finetune_rule_single(
"descriptor": single_config.get("descriptor", {}).get("trainable", True),
"fitting_net": single_config.get("fitting_net", {}).get("trainable", True),
}

# Warn about descriptor configuration differences before overwriting
if "descriptor" in single_config and "descriptor" in single_config_chosen:
_warn_descriptor_config_differences(
single_config["descriptor"],
single_config_chosen["descriptor"],
model_branch_chosen,
)

single_config["descriptor"] = single_config_chosen["descriptor"]
if not new_fitting:
single_config["fitting_net"] = single_config_chosen["fitting_net"]
Expand Down