diff --git a/paddle/phi/kernels/xpu/amp_kernel.cc b/paddle/phi/kernels/xpu/amp_kernel.cc index b95774da1a23db..cc507374075cae 100644 --- a/paddle/phi/kernels/xpu/amp_kernel.cc +++ b/paddle/phi/kernels/xpu/amp_kernel.cc @@ -54,7 +54,7 @@ void UpdateLossScalingKernel(const Context& dev_ctx, const bool* found_inf_data = found_infinite.data(); bool cpu_found_inf_data = false; if (found_infinite.place().GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&cpu_found_inf_data), found_infinite.place(), static_cast(found_inf_data), @@ -94,7 +94,7 @@ void UpdateLossScalingKernel(const Context& dev_ctx, int cpu_good_in_data; MPDType cpu_pre_loss_scaling_data; if (in_bad_steps.place().GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&cpu_bad_in_data), in_bad_steps.place(), static_cast(bad_in_data), @@ -104,7 +104,7 @@ void UpdateLossScalingKernel(const Context& dev_ctx, } if (in_good_steps.place().GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&cpu_good_in_data), in_good_steps.place(), static_cast(good_in_data), @@ -114,7 +114,7 @@ void UpdateLossScalingKernel(const Context& dev_ctx, } if (prev_loss_scaling.place().GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&cpu_pre_loss_scaling_data), prev_loss_scaling.place(), static_cast(pre_loss_scaling_data), @@ -151,17 +151,17 @@ void UpdateLossScalingKernel(const Context& dev_ctx, // copy to device memory_utils::Copy(dev_ctx.GetPlace(), bad_out_data, - phi::CPUPlace(), + CPUPlace(), &cpu_bad_out_data, sizeof(int)); memory_utils::Copy(dev_ctx.GetPlace(), good_out_data, - phi::CPUPlace(), + CPUPlace(), &cpu_good_out_data, sizeof(int)); memory_utils::Copy(dev_ctx.GetPlace(), updated_loss_scaling_data, - phi::CPUPlace(), + CPUPlace(), &cpu_updated_loss_scaling_data, sizeof(MPDType)); } @@ -186,7 +186,7 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx, bool has_inf_nans = false; MPDType cpu_scale_data; if (scale.place().GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&cpu_scale_data), scale.place(), static_cast(scale_data), @@ -221,7 +221,7 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx, inf_nan_check_ptr + i); PADDLE_ENFORCE_XDNN_SUCCESS(r, "check_finite_unscale"); } - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), cpu_found_tensor.data(), dev_ctx.GetPlace(), inf_nan_check.data(), @@ -249,7 +249,7 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx, inf_nan_check.data(), x->numel()); PADDLE_ENFORCE_XDNN_SUCCESS(r, "check_nan_or_inf"); - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), &has_inf_nans, dev_ctx.GetPlace(), inf_nan_check.data(), @@ -303,7 +303,7 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx, } memory_utils::Copy(dev_ctx.GetPlace(), found_inf_data, - phi::CPUPlace(), + CPUPlace(), &cpu_found_inf_data, sizeof(bool)); } diff --git a/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc b/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc index d97fb285fbe0b4..d6faab84616689 100644 --- a/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc +++ b/paddle/phi/kernels/xpu/distribute_fpn_proposals_kernel.cc @@ -26,7 +26,7 @@ static void Sort(const XPUContext& dev_ctx, DenseTensor* index_out) { auto* value_data = value.data(); auto place = dev_ctx.GetPlace(); - auto cpu_place = phi::CPUPlace(); + auto cpu_place = CPUPlace(); DenseTensor scores_slice_cpu; scores_slice_cpu.Resize({value.numel()}); diff --git a/paddle/phi/kernels/xpu/dropout_kernel.cc b/paddle/phi/kernels/xpu/dropout_kernel.cc index 1dd5603a76b3ed..eaeda67b4aa4f2 100644 --- a/paddle/phi/kernels/xpu/dropout_kernel.cc +++ b/paddle/phi/kernels/xpu/dropout_kernel.cc @@ -49,7 +49,7 @@ void DropoutRawKernel(const Context& dev_ctx, int seed_data = 0; if (seed_tensor.get_ptr() != nullptr) { if ((seed_tensor->place()).GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), &seed_data, seed_tensor->place(), seed_tensor->data(), diff --git a/paddle/phi/kernels/xpu/eig_kernel.cc b/paddle/phi/kernels/xpu/eig_kernel.cc index 212d3d7ad5b8df..988ddba293ed63 100644 --- a/paddle/phi/kernels/xpu/eig_kernel.cc +++ b/paddle/phi/kernels/xpu/eig_kernel.cc @@ -32,7 +32,7 @@ void EigKernel(const Context& dev_ctx, return; } - auto cpu_place = phi::CPUPlace(); + auto cpu_place = CPUPlace(); phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance(); auto* cpu_ctx = static_cast(pool.Get(cpu_place)); diff --git a/paddle/phi/kernels/xpu/flash_attn_utils.h b/paddle/phi/kernels/xpu/flash_attn_utils.h index 048c9d9c7b217d..06a65be07781cf 100644 --- a/paddle/phi/kernels/xpu/flash_attn_utils.h +++ b/paddle/phi/kernels/xpu/flash_attn_utils.h @@ -80,7 +80,7 @@ static void GenerateRNGState( const int64_t num_heads) { if (fixed_seed_offset.get_ptr()) { if ((fixed_seed_offset->place()).GetType() == AllocationType::XPU) { - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), seed_offset_data, fixed_seed_offset->place(), fixed_seed_offset->data(), diff --git a/paddle/phi/kernels/xpu/generate_proposals_kernel.cc b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc index 0d6eaa18f865c2..af489043213214 100644 --- a/paddle/phi/kernels/xpu/generate_proposals_kernel.cc +++ b/paddle/phi/kernels/xpu/generate_proposals_kernel.cc @@ -31,7 +31,7 @@ static void SortDescending(const XPUContext& dev_ctx, int pre_nms_top_n) { auto* value_data = value.data(); auto place = dev_ctx.GetPlace(); - auto cpu_place = phi::CPUPlace(); + auto cpu_place = CPUPlace(); DenseTensor scores_slice_cpu; scores_slice_cpu.Resize({value.numel()}); @@ -180,11 +180,8 @@ std::pair ProposalForOneImage( int keep_num; const auto xpu_place = dev_ctx.GetPlace(); - memory_utils::Copy(phi::CPUPlace(), - &keep_num, - xpu_place, - keep_num_t.data(), - sizeof(int)); + memory_utils::Copy( + CPUPlace(), &keep_num, xpu_place, keep_num_t.data(), sizeof(int)); keep_index.Resize({keep_num}); DenseTensor scores_filter, proposals_filter; @@ -354,7 +351,7 @@ void GenerateProposalsKernel(const Context& dev_ctx, tmp_variances.Resize(common::make_ddim({tmp_variances.numel() / 4, 4})); auto place = dev_ctx.GetPlace(); - auto cpu_place = phi::CPUPlace(); + auto cpu_place = CPUPlace(); int num_proposals = 0; std::vector offset(1, 0); diff --git a/paddle/phi/kernels/xpu/increment_kernel.cc b/paddle/phi/kernels/xpu/increment_kernel.cc index bcee129b626c79..4076a60e5d95d4 100644 --- a/paddle/phi/kernels/xpu/increment_kernel.cc +++ b/paddle/phi/kernels/xpu/increment_kernel.cc @@ -44,7 +44,7 @@ void IncrementKernel(const Context& dev_ctx, T* value_xpu = RAII_GUARD.alloc_l3_or_gm(1); memory_utils::Copy(dev_ctx.GetPlace(), value_xpu, - phi::CPUPlace(), + CPUPlace(), reinterpret_cast(&value_as_t), sizeof(T)); diff --git a/paddle/phi/kernels/xpu/lamb_kernel.cc b/paddle/phi/kernels/xpu/lamb_kernel.cc index da49b65ab7e264..ba58885154c421 100644 --- a/paddle/phi/kernels/xpu/lamb_kernel.cc +++ b/paddle/phi/kernels/xpu/lamb_kernel.cc @@ -62,7 +62,7 @@ void LambKernel(const Context& dev_ctx, cpu_skip_update = *(skip_update->data()); } else { const bool* skip_update_flag = skip_update->data(); - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&cpu_skip_update), dev_ctx.GetPlace(), static_cast(skip_update_flag), diff --git a/paddle/phi/kernels/xpu/masked_select_kernel.cc b/paddle/phi/kernels/xpu/masked_select_kernel.cc index f66d2dee5862c3..62cf9e3ee11109 100644 --- a/paddle/phi/kernels/xpu/masked_select_kernel.cc +++ b/paddle/phi/kernels/xpu/masked_select_kernel.cc @@ -58,7 +58,7 @@ void MaskedSelectKernel(const Context& dev_ctx, xpu::nonzero_count( dev_ctx.x_context(), mask_data, out_size, mask.numel()), "nonzero_count "); - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&out_size_cpu), mask.place(), static_cast(out_size), diff --git a/paddle/phi/kernels/xpu/mean_all_grad_kernel.cc b/paddle/phi/kernels/xpu/mean_all_grad_kernel.cc index ff2f31cb6701d5..0607daa2fc9fc9 100644 --- a/paddle/phi/kernels/xpu/mean_all_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/mean_all_grad_kernel.cc @@ -54,7 +54,7 @@ void MeanAllGradKernel(const Context& dev_ctx, const T* dy = OG->data(); T dy0_value; xpu_wait(dev_ctx.x_context()->xpu_stream); - memory_utils::Copy(phi::CPUPlace(), &dy0_value, OG->place(), dy, sizeof(T)); + memory_utils::Copy(CPUPlace(), &dy0_value, OG->place(), dy, sizeof(T)); float dy0_fp32 = static_cast(dy0_value); dy0_fp32 = dy0_fp32 / static_cast(IG->numel()); diff --git a/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc b/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc index d9cc1217a14209..3403c9da8b9399 100644 --- a/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc +++ b/paddle/phi/kernels/xpu/multiclass_nms3_kernel.cc @@ -157,7 +157,7 @@ void MultiClassNMSKernel(const Context& dev_ctx, std::vector temp_value(out_dim, 0.0f); memory_utils::Copy(dev_ctx.GetPlace(), out_ptr, - phi::CPUPlace(), + CPUPlace(), temp_value.data(), 1 * out_dim * sizeof(T)); @@ -167,7 +167,7 @@ void MultiClassNMSKernel(const Context& dev_ctx, std::vector temp_idx(1, 0); memory_utils::Copy(dev_ctx.GetPlace(), out_index_ptr, - phi::CPUPlace(), + CPUPlace(), temp_idx.data(), 1 * sizeof(int)); } else { @@ -182,7 +182,7 @@ void MultiClassNMSKernel(const Context& dev_ctx, T* out_ptr = out->template data(); memory_utils::Copy(dev_ctx.GetPlace(), out_ptr, - phi::CPUPlace(), + CPUPlace(), outs_vec_.data(), num_kept * out_dim * sizeof(T)); if (return_index) { @@ -191,7 +191,7 @@ void MultiClassNMSKernel(const Context& dev_ctx, int* out_index_ptr = index->template data(); memory_utils::Copy(dev_ctx.GetPlace(), out_index_ptr, - phi::CPUPlace(), + CPUPlace(), out_index_vec_.data(), num_kept * sizeof(int)); } diff --git a/paddle/phi/kernels/xpu/nonzero_kernel.cc b/paddle/phi/kernels/xpu/nonzero_kernel.cc index 4ce9b76023c7c5..c59e67b31e305d 100644 --- a/paddle/phi/kernels/xpu/nonzero_kernel.cc +++ b/paddle/phi/kernels/xpu/nonzero_kernel.cc @@ -47,7 +47,7 @@ void NonZeroKernel(const Context& dev_ctx, PADDLE_ENFORCE_XDNN_SUCCESS(ret, "nonzero_count"); int64_t true_num_cpu; - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), static_cast(&true_num_cpu), dev_ctx.GetPlace(), static_cast(true_num), diff --git a/paddle/phi/kernels/xpu/randint_kernel.cc b/paddle/phi/kernels/xpu/randint_kernel.cc index f284846637f09f..ff2d5cd5e66a20 100644 --- a/paddle/phi/kernels/xpu/randint_kernel.cc +++ b/paddle/phi/kernels/xpu/randint_kernel.cc @@ -49,7 +49,7 @@ void RandintKernel(const Context& dev_ctx, } memory_utils::Copy(dev_ctx.GetPlace(), data, - phi::CPUPlace(), + CPUPlace(), reinterpret_cast(data_cpu.get()), size * sizeof(T)); } diff --git a/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc b/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc index 55f7cffd04df9a..f789902814e3ca 100644 --- a/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/roi_align_grad_kernel.cc @@ -49,7 +49,7 @@ void RoiAlignGradKernel(const Context& dev_ctx, } DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({rois_num}); - auto cplace = phi::CPUPlace(); + auto cplace = CPUPlace(); auto xplace = dev_ctx.GetPlace(); int rois_batch_size = 0; diff --git a/paddle/phi/kernels/xpu/roi_align_kernel.cc b/paddle/phi/kernels/xpu/roi_align_kernel.cc index 7d3dc4bc9f7b98..c825ba2df2d5de 100644 --- a/paddle/phi/kernels/xpu/roi_align_kernel.cc +++ b/paddle/phi/kernels/xpu/roi_align_kernel.cc @@ -48,7 +48,7 @@ void RoiAlignKernel(const Context& dev_ctx, DenseTensor roi_batch_id_list; roi_batch_id_list.Resize({rois_num}); - auto cplace = phi::CPUPlace(); + auto cplace = CPUPlace(); int* roi_batch_id_data = dev_ctx.template HostAlloc(&roi_batch_id_list); auto xplace = dev_ctx.GetPlace(); int rois_batch_size = 0; diff --git a/paddle/phi/kernels/xpu/scatter_kernel.cc b/paddle/phi/kernels/xpu/scatter_kernel.cc index 84adeab4af6168..d5d3faa28e4a3b 100644 --- a/paddle/phi/kernels/xpu/scatter_kernel.cc +++ b/paddle/phi/kernels/xpu/scatter_kernel.cc @@ -91,7 +91,7 @@ void ScatterKernel(const Context &dev_ctx, int64_t dim1 = common::product(common::slice_ddim(x_dims, 1, x_dims.size())); DenseTensor indices_cpu(index.type()); - phi::Copy(dev_ctx, index, phi::CPUPlace(), true, &indices_cpu); + phi::Copy(dev_ctx, index, CPUPlace(), true, &indices_cpu); int r = 0; if (index_type == phi::DataType::INT32) { diff --git a/paddle/phi/kernels/xpu/set_value_kernel.cc b/paddle/phi/kernels/xpu/set_value_kernel.cc index 01b213d1de12e3..6dd0c80a09d463 100644 --- a/paddle/phi/kernels/xpu/set_value_kernel.cc +++ b/paddle/phi/kernels/xpu/set_value_kernel.cc @@ -412,7 +412,7 @@ void SetValueKernel(const Context& dev_ctx, reinterpret_cast(RAII_GUARD.alloc_l3_or_gm(values_size)); memory_utils::Copy(dev_ctx.GetPlace(), value_data, - phi::CPUPlace(), + CPUPlace(), value_data_uint8_cpu, values_length); auto value_dims = common::make_ddim(shape); diff --git a/paddle/phi/kernels/xpu/truncated_gaussian_random_kernel.cc b/paddle/phi/kernels/xpu/truncated_gaussian_random_kernel.cc index c65ca283f333fb..16261cdb46cd3a 100644 --- a/paddle/phi/kernels/xpu/truncated_gaussian_random_kernel.cc +++ b/paddle/phi/kernels/xpu/truncated_gaussian_random_kernel.cc @@ -56,7 +56,7 @@ void TruncatedGaussianRandomKernel(const Context& dev_ctx, memory_utils::Copy(dev_ctx.GetPlace(), data, - phi::CPUPlace(), + CPUPlace(), reinterpret_cast(data_cpu.get()), size * sizeof(T)); } diff --git a/paddle/phi/kernels/xpu/uniform_inplace_grad_kernel.cc b/paddle/phi/kernels/xpu/uniform_inplace_grad_kernel.cc index 73720458db5f6c..126d84ab120b77 100644 --- a/paddle/phi/kernels/xpu/uniform_inplace_grad_kernel.cc +++ b/paddle/phi/kernels/xpu/uniform_inplace_grad_kernel.cc @@ -37,7 +37,7 @@ void XPUUniformRandomInplaceGradKernel(const Context& dev_ctx, } phi::memory_utils::Copy(dev_ctx.GetPlace(), data, - phi::CPUPlace(), + CPUPlace(), reinterpret_cast(data_cpu.get()), size * sizeof(T)); } diff --git a/paddle/phi/kernels/xpu/uniform_inplace_kernel.cc b/paddle/phi/kernels/xpu/uniform_inplace_kernel.cc index d05a4b792b2df5..f768d7ef78bdb7 100644 --- a/paddle/phi/kernels/xpu/uniform_inplace_kernel.cc +++ b/paddle/phi/kernels/xpu/uniform_inplace_kernel.cc @@ -60,7 +60,7 @@ void XPUUniformRandomInplaceKernel(const Context& dev_ctx, } phi::memory_utils::Copy(dev_ctx.GetPlace(), data, - phi::CPUPlace(), + CPUPlace(), reinterpret_cast(data_cpu.get()), size * sizeof(T)); } diff --git a/paddle/phi/kernels/xpu/unique_kernel.cc b/paddle/phi/kernels/xpu/unique_kernel.cc index a45df5e249e5f5..43b1bab48b46aa 100644 --- a/paddle/phi/kernels/xpu/unique_kernel.cc +++ b/paddle/phi/kernels/xpu/unique_kernel.cc @@ -57,7 +57,7 @@ void XPUFlattenUniqueKernelImpl(const Context& dev_ctx, nullptr, false); PADDLE_ENFORCE_XDNN_SUCCESS(r, "unique_count"); - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), &unique_len_cpu, dev_ctx.GetPlace(), unique_len_xpu, @@ -218,7 +218,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, std::vector inverse_cpu(axis_len); std::vector counts_cpu; std::vector ori_idx_cpu(axis_len); - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), ori_idx_cpu.data(), dev_ctx.GetPlace(), ori_idx_xpu, @@ -241,7 +241,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, {1}); PADDLE_ENFORCE_XDNN_SUCCESS(r, "reduce_all"); - memory_utils::Copy(phi::CPUPlace(), + memory_utils::Copy(CPUPlace(), adj_identical_cpu_data, dev_ctx.GetPlace(), adj_identical_xpu, @@ -271,7 +271,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, RAII_GUARD.alloc_l3_or_gm(unique_len * slice_size); memory_utils::Copy(dev_ctx.GetPlace(), unique_axis_idx_xpu, - phi::CPUPlace(), + CPUPlace(), unique_axis.data(), unique_len * sizeof(IndexT)); r = xpu::paddle_gather(dev_ctx.x_context(), @@ -304,7 +304,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, auto* indices_data = dev_ctx.template Alloc(indices); memory_utils::Copy(dev_ctx.GetPlace(), indices_data, - phi::CPUPlace(), + CPUPlace(), indices_cpu.data(), sizeof(IndexT) * unique_len); } @@ -314,7 +314,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, auto* reverse_data = dev_ctx.template Alloc(index); memory_utils::Copy(dev_ctx.GetPlace(), reverse_data, - phi::CPUPlace(), + CPUPlace(), inverse_cpu.data(), sizeof(IndexT) * axis_len); } @@ -324,7 +324,7 @@ void XPUDimUniqueKernelImpl(const Context& dev_ctx, auto* counts_data = dev_ctx.template Alloc(counts); memory_utils::Copy(dev_ctx.GetPlace(), counts_data, - phi::CPUPlace(), + CPUPlace(), counts_cpu.data(), sizeof(IndexT) * unique_len); }