Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions paddle/phi/kernels/gpu/add_n_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ void AddNKernel(const Context &dev_ctx,
dev_ctx.GetPlace(), in_data.size() * sizeof(void *));
memory_utils::Copy(dev_ctx.GetPlace(),
tmp_in_array->ptr(),
phi::CPUPlace(),
CPUPlace(),
reinterpret_cast<void *>(in_data.data()),
in_data.size() * sizeof(void *),
dev_ctx.stream());
Expand Down Expand Up @@ -280,7 +280,7 @@ void AddNKernel(const Context &dev_ctx,

memory_utils::Copy(dev_ctx.GetPlace(),
tmp_sr_in_out_array->ptr(),
phi::CPUPlace(),
CPUPlace(),
reinterpret_cast<void *>(sr_in_out_data.data()),
sr_in_out_data.size() * sizeof(T *),
dev_ctx.stream());
Expand All @@ -301,7 +301,7 @@ void AddNKernel(const Context &dev_ctx,

memory_utils::Copy(dev_ctx.GetPlace(),
tmp_in_array->ptr(),
phi::CPUPlace(),
CPUPlace(),
reinterpret_cast<void *>(in_data.data()),
in_data.size() * sizeof(T *),
dev_ctx.stream());
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/gpu/amp_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ class LazyZeros<phi::GPUContext, T> {
size_t xs_size = xs.size();
if (xs_size == 0) return;

const auto& cpu_place = phi::CPUPlace();
const auto& cpu_place = CPUPlace();
// alloc each tensor's start index and copy to device
auto h_in_starts_mem =
phi::memory_utils::Alloc(cpu_place, (xs_size + 1) * sizeof(int64_t));
Expand Down Expand Up @@ -284,7 +284,7 @@ void CheckFiniteAndUnscaleKernel(const Context& dev_ctx,
size_t xs_size = xs.size();
if (xs_size == 0) return;

const auto& cpu_place = phi::CPUPlace();
const auto& cpu_place = CPUPlace();
// calculate each tensor's start index and copy to device
auto h_starts_tensor =
phi::memory_utils::Alloc(cpu_place, (xs_size + 1) * sizeof(int64_t));
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/gpu/assign_pos_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,11 @@ void AssignPosKernel(const Context& dev_ctx,

DenseTensor cpu_eff_num_len;
int64_t cpu_eff_num_len_data = 0;
bool is_cpu_place = eff_num_len_ptr->place() == phi::CPUPlace();
bool is_cpu_place = eff_num_len_ptr->place() == CPUPlace();
if (is_cpu_place) {
cpu_eff_num_len_data = eff_num_len_ptr->data<T>()[0];
} else {
Copy(dev_ctx, eff_num_len, phi::CPUPlace(), false, &cpu_eff_num_len);
Copy(dev_ctx, eff_num_len, CPUPlace(), false, &cpu_eff_num_len);
cpu_eff_num_len_data = cpu_eff_num_len.data<T>()[0];
}

Expand Down
12 changes: 6 additions & 6 deletions paddle/phi/kernels/gpu/average_accumulates_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,19 @@ void GetAccumulators<phi::GPUContext>(const phi::GPUContext& dev_ctx,
int64_t* old_num_accumulates) {
auto stream = dev_ctx.stream();
auto cuda_place = in_old_num_accumulates.place();
memory_utils::Copy(phi::CPUPlace(),
memory_utils::Copy(CPUPlace(),
old_num_accumulates,
cuda_place,
in_old_num_accumulates.data<int64_t>(),
sizeof(int64_t),
stream);
memory_utils::Copy(phi::CPUPlace(),
memory_utils::Copy(CPUPlace(),
num_accumulates,
cuda_place,
in_num_accumulates.data<int64_t>(),
sizeof(int64_t),
stream);
memory_utils::Copy(phi::CPUPlace(),
memory_utils::Copy(CPUPlace(),
num_updates,
cuda_place,
in_num_updates.data<int64_t>(),
Expand Down Expand Up @@ -70,21 +70,21 @@ void SetAccumulators<phi::GPUContext>(const phi::GPUContext& dev_ctx,
auto cuda_place = out_old_num_accumulates->place();
memory_utils::Copy(dev_ctx.GetPlace(),
out_num_accumulates_ptr,
phi::CPUPlace(),
CPUPlace(),
&num_accumulates,
sizeof(int64_t),
stream);

memory_utils::Copy(dev_ctx.GetPlace(),
out_old_num_accumulates_ptr,
phi::CPUPlace(),
CPUPlace(),
&old_num_accumulates,
sizeof(int64_t),
stream);

memory_utils::Copy(cuda_place,
out_num_updates_ptr,
phi::CPUPlace(),
CPUPlace(),
&num_updates,
sizeof(int64_t),
stream);
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/batch_norm_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -937,7 +937,7 @@ void BatchNormKernel(const Context &dev_ctx,
// if (dev_ctx.HasInput("MomentumTensor")) {
// const auto *mom_tensor = MomentumTensor;
// DenseTensor mom_cpu;
// paddle::framework::TensorCopySync(*mom_tensor, phi::CPUPlace(),
// paddle::framework::TensorCopySync(*mom_tensor, CPUPlace(),
// &mom_cpu);
// momentum = mom_cpu.data<float>()[0];
// }
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/bincount_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ void BincountCUDAInner(const Context& dev_ctx,
<<<num_blocks, PADDLE_CUDA_NUM_THREADS, 0, dev_ctx.stream()>>>(
input_data, input_numel, input_min_max_data, input_min_max_data + 1);

Copy(dev_ctx, input_min_max_t, phi::CPUPlace(), true, &input_min_max_cpu);
Copy(dev_ctx, input_min_max_t, CPUPlace(), true, &input_min_max_cpu);

InputT input_min = input_min_max_cpu.data<InputT>()[0];

Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/box_coder_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ void BoxCoderKernel(const Context &dev_ctx,
bytes,
phi::Stream(reinterpret_cast<phi::StreamId>(dev_ctx.stream())));
float *dev_var_data = reinterpret_cast<float *>(dev_var->ptr());
auto cplace = phi::CPUPlace();
auto cplace = CPUPlace();
const auto gplace = dev_ctx.GetPlace();
memory_utils::Copy(
gplace, dev_var_data, cplace, &variance[0], bytes, dev_ctx.stream());
Expand Down
9 changes: 4 additions & 5 deletions paddle/phi/kernels/gpu/check_numerics_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -349,10 +349,9 @@ static void PrintStack(const phi::GPUContext& dev_ctx,
const std::string& op_type,
const std::string& var_name,
int dev_id) {
auto cpu_stats =
phi::memory_utils::Alloc(phi::CPUPlace(), sizeof(int64_t) * 3);
auto cpu_stats = phi::memory_utils::Alloc(CPUPlace(), sizeof(int64_t) * 3);
int64_t* cpu_stats_ptr = reinterpret_cast<int64_t*>(cpu_stats->ptr());
phi::memory_utils::Copy(phi::CPUPlace(),
phi::memory_utils::Copy(CPUPlace(),
cpu_stats_ptr,
stats.place(),
stats.data(),
Expand Down Expand Up @@ -381,11 +380,11 @@ static void WriteToOutputDir(const phi::GPUContext& dev_ctx,
// Copy stats and values from GPU to CPU.
DenseTensor cpu_stats;
cpu_stats.Resize({static_cast<int64_t>(3)});
Copy(dev_ctx, stats, phi::CPUPlace(), false, &cpu_stats);
Copy(dev_ctx, stats, CPUPlace(), false, &cpu_stats);

DenseTensor cpu_values;
cpu_values.Resize({static_cast<int64_t>(3)});
Copy(dev_ctx, values, phi::CPUPlace(), false, &cpu_values);
Copy(dev_ctx, values, CPUPlace(), false, &cpu_values);
dev_ctx.Wait();

int dev_id = tensor.place().device;
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/cholesky_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ FUNC_WITH_TYPES(POTRF_INSTANCE);
workspace_device_size, \
phi::Stream(reinterpret_cast<phi::StreamId>(dev_ctx.stream()))); \
auto workspace_host = \
phi::memory_utils::Alloc(phi::CPUPlace(), workspace_host_size); \
phi::memory_utils::Alloc(CPUPlace(), workspace_host_size); \
PADDLE_ENFORCE_GPU_SUCCESS( \
dynload::cusolverDnXpotrf(handle, \
params, \
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/class_center_sample_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ void ClassCenterSampleKernel(const Context& dev_ctx,
// step 14: Get sampled class center for output
Copy<Context>(dev_ctx,
num_classes_per_device,
phi::CPUPlace(),
CPUPlace(),
true,
&num_classes_per_device);
T actual_num_samples = num_classes_per_device.data<T>()[rank + 1];
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ void GPUCollectFpnProposalsOpKernel(
auto score_in = score_ins[i];
if (multi_rois_num.size() > 0) {
DenseTensor temp;
Copy(dev_ctx, *multi_rois_num[i], phi::CPUPlace(), true, &temp);
Copy(dev_ctx, *multi_rois_num[i], CPUPlace(), true, &temp);
const int* length_in = temp.data<int>();
lod_size = multi_rois_num[i]->numel();
for (size_t n = 0; n < lod_size; ++n) {
Expand Down Expand Up @@ -240,7 +240,7 @@ void GPUCollectFpnProposalsOpKernel(
GetLengthLoD<<<blocks, threads, 0, dev_ctx.stream()>>>(
real_post_num, out_id_data, length_lod_data);
std::vector<int> length_lod_cpu(lod_size);
phi::memory_utils::Copy(phi::CPUPlace(),
phi::memory_utils::Copy(CPUPlace(),
length_lod_cpu.data(),
place,
length_lod_data,
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/cumprod_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ bool CumprodGradCompatible(const Context &dev_ctx,
bool has_zero = false;
#ifdef PADDLE_WITH_CUDA
DenseTensor any_zero_cpu;
phi::Copy(dev_ctx, any_zero, phi::CPUPlace(), true, &any_zero_cpu);
phi::Copy(dev_ctx, any_zero, CPUPlace(), true, &any_zero_cpu);
has_zero = *any_zero_cpu.data<bool>();
#else
has_zero = *any_zero.data<bool>();
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/determinant_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ struct DeterminantCudaFunctor<phi::dtype::complex<T>, Context> {
phi::Stream(reinterpret_cast<phi::StreamId>(dev_ctx.stream())));
memory_utils::Copy(dev_ctx.GetPlace(),
tmp_gpu_ptrs_data->ptr(),
phi::CPUPlace(),
CPUPlace(),
static_cast<void*>(cpu_ptrs.data()),
cpu_ptrs.size() * sizeof(phi::dtype::complex<T>*),
dev_ctx.stream());
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/distribute_fpn_proposals_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void DistributeFpnProposalsKernel(
size_t start = 0;

std::vector<int> sub_lod_list_cpu(lod_size * num_level);
memory_utils::Copy(phi::CPUPlace(),
memory_utils::Copy(CPUPlace(),
sub_lod_list_cpu.data(),
place,
sub_lod_list_data,
Expand Down
12 changes: 2 additions & 10 deletions paddle/phi/kernels/gpu/edit_distance_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -104,16 +104,8 @@ void EditDistanceKernel(const Context& dev_ctx,
if (use_length) {
DenseTensor hyp_length_cpu;
DenseTensor ref_length_cpu;
Copy(dev_ctx,
*(hypslength.get_ptr()),
phi::CPUPlace(),
false,
&hyp_length_cpu);
Copy(dev_ctx,
*(refslength.get_ptr()),
phi::CPUPlace(),
false,
&ref_length_cpu);
Copy(dev_ctx, *(hypslength.get_ptr()), CPUPlace(), false, &hyp_length_cpu);
Copy(dev_ctx, *(refslength.get_ptr()), CPUPlace(), false, &ref_length_cpu);

for (auto i = 0; i < batch_size; i++) {
hyp_lod[i + 1] = hyp_lod[i] + hyp_length_cpu.data<int64_t>()[i];
Expand Down
16 changes: 8 additions & 8 deletions paddle/phi/kernels/gpu/eig_grad_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -211,7 +211,7 @@ void SolveLinearSystemGPU<phi::dtype::complex<float>>(
}

std::vector<int> h_info(batch_count, 0);
phi::memory_utils::Copy(phi::CPUPlace(),
phi::memory_utils::Copy(CPUPlace(),
h_info.data(),
dev_ctx.GetPlace(),
d_info,
Expand Down Expand Up @@ -392,7 +392,7 @@ void SolveLinearSystemGPU<phi::dtype::complex<double>>(
}

std::vector<int> h_info(batch_count, 0);
phi::memory_utils::Copy(phi::CPUPlace(),
phi::memory_utils::Copy(CPUPlace(),
h_info.data(),
dev_ctx.GetPlace(),
d_info,
Expand Down Expand Up @@ -565,13 +565,13 @@ void SolveLinearSystemGPU<phi::dtype::complex<float>>(
}

// Check error info
phi::CPUPlace cpu_place;
CPUPlace cpu_place;
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto* cpu_ctx = static_cast<phi::CPUContext*>(pool.Get(cpu_place));

std::vector<rocblas_int> h_info(batch_count, 0);
phi::memory_utils::Copy(
phi::CPUPlace(),
CPUPlace(),
h_info.data(),
dev_ctx.GetPlace(),
d_info,
Expand Down Expand Up @@ -741,13 +741,13 @@ void SolveLinearSystemGPU<phi::dtype::complex<double>>(
X_row,
rhs_cols)); // X_row ldc = rhs_cols (row-major leading dimension)
}
phi::CPUPlace cpu_place;
CPUPlace cpu_place;
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto* cpu_ctx = static_cast<phi::CPUContext*>(pool.Get(cpu_place));

std::vector<rocblas_int> h_info(batch_count, 0);
phi::memory_utils::Copy(
phi::CPUPlace(),
CPUPlace(),
h_info.data(),
dev_ctx.GetPlace(),
d_info,
Expand Down Expand Up @@ -799,7 +799,7 @@ void ComputeBackwardForComplexInputGPU(const DenseTensor& L,
DenseTensor VhgV = phi::Matmul<T>(dev_ctx, Vh, gV_safe);
DenseTensor diag_real = phi::Real<T>(dev_ctx, VhgV);

auto cpu_place = phi::CPUPlace();
auto cpu_place = CPUPlace();
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto* cpu_ctx = static_cast<phi::CPUContext*>(pool.Get(cpu_place));

Expand All @@ -812,7 +812,7 @@ void ComputeBackwardForComplexInputGPU(const DenseTensor& L,

DenseTensor diag_res;
dev_ctx.template Alloc<T>(&diag_res);
Copy(dev_ctx, diag_res_cpu, phi::GPUPlace(), false, &diag_res);
Copy(dev_ctx, diag_res_cpu, GPUPlace(), false, &diag_res);

DenseTensor diag_unsqueezed = phi::funcs::Unsqueeze(diag_res, -2);

Expand Down
6 changes: 3 additions & 3 deletions paddle/phi/kernels/gpu/eig_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ void EigKernel(const Context& dev_ctx,
return;
}

auto cpu_place = phi::CPUPlace();
auto cpu_place = CPUPlace();
phi::DeviceContextPool& pool = phi::DeviceContextPool::Instance();
auto* cpu_ctx = static_cast<phi::CPUContext*>(pool.Get(cpu_place));

Expand Down Expand Up @@ -108,8 +108,8 @@ void EigKernel(const Context& dev_ctx,
}

// copy result from cpu to gpu tensor
Copy(dev_ctx, out_w_cpu, phi::GPUPlace(), false, out_w);
Copy(dev_ctx, out_v_cpu, phi::GPUPlace(), false, out_v);
Copy(dev_ctx, out_w_cpu, GPUPlace(), false, out_w);
Copy(dev_ctx, out_v_cpu, GPUPlace(), false, out_v);
}

} // namespace phi
Expand Down
2 changes: 1 addition & 1 deletion paddle/phi/kernels/gpu/generate_proposals_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,7 @@ void GenerateProposalsKernel(const Context &dev_ctx,
T *rpn_roi_probs_data = rpn_roi_probs->data<T>();

auto place = dev_ctx.GetPlace();
auto cpu_place = phi::CPUPlace();
auto cpu_place = CPUPlace();

int64_t num_proposals = 0;
std::vector<size_t> offset(1, 0);
Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/gpu/global_gather_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct GlobalGatherFunctor<phi::GPUContext, T> {
cpu_local_count_data = local_count->data<int64_t>();
local_count_len = local_count->numel();
} else {
Copy(dev_ctx, *local_count, phi::CPUPlace(), true, &cpu_local_count);
Copy(dev_ctx, *local_count, CPUPlace(), true, &cpu_local_count);
cpu_local_count_data = cpu_local_count.data<int64_t>();
local_count_len = cpu_local_count.numel();
}
Expand All @@ -72,7 +72,7 @@ struct GlobalGatherFunctor<phi::GPUContext, T> {
if (global_count->place().GetType() == AllocationType::CPU) {
cpu_global_count_data = global_count->data<int64_t>();
} else {
Copy(dev_ctx, *global_count, phi::CPUPlace(), true, &cpu_global_count);
Copy(dev_ctx, *global_count, CPUPlace(), true, &cpu_global_count);
cpu_global_count_data = cpu_global_count.data<int64_t>();
}

Expand Down
4 changes: 2 additions & 2 deletions paddle/phi/kernels/gpu/global_scatter_kernel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct GlobalScatterFunctor<phi::GPUContext, T> {
if (local_count->place().GetType() == AllocationType::CPU) {
cpu_local_count_data = local_count->data<int64_t>();
} else {
Copy(dev_ctx, *local_count, phi::CPUPlace(), true, &cpu_local_count);
Copy(dev_ctx, *local_count, CPUPlace(), true, &cpu_local_count);
cpu_local_count_data = cpu_local_count.data<int64_t>();
}
auto global_count_len = 0;
Expand All @@ -70,7 +70,7 @@ struct GlobalScatterFunctor<phi::GPUContext, T> {
cpu_global_count_data = global_count->data<int64_t>();
global_count_len = global_count->numel();
} else {
Copy(dev_ctx, *global_count, phi::CPUPlace(), true, &cpu_global_count);
Copy(dev_ctx, *global_count, CPUPlace(), true, &cpu_global_count);
cpu_global_count_data = cpu_global_count.data<int64_t>();
global_count_len = cpu_global_count.numel();
}
Expand Down
Loading
Loading