-
Notifications
You must be signed in to change notification settings - Fork 212
Expand file tree
/
Copy pathCMakeLists.txt
More file actions
1065 lines (1031 loc) · 64.3 KB
/
CMakeLists.txt
File metadata and controls
1065 lines (1031 loc) · 64.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License
cmake_minimum_required(VERSION 3.10)
set(PROJ_NAME "paddle-iluvatar-gpu")
project(${PROJ_NAME} CXX C CUDA)
set(TARGET_NAME ${PROJ_NAME})
set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
message(STATUS "CMAKE_MODULE_PATH: ${CMAKE_MODULE_PATH}")
set(CMAKE_SKIP_RPATH TRUE)
set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE)
set(CMAKE_INSTALL_RPATH "")
set(WITH_MKLML ON)
include(paddle)
set(THIRD_PARTY_PATH
"${PADDLE_SOURCE_DIR}/build/third_party"
CACHE PATH "Third party libraries directory.")
include(version)
include(generic)
include(cblas)
include(external/eigen)
include(external/xxhash)
include(external/zlib)
include(external/protobuf)
if(WITH_FLAGCX)
add_definitions("-DPADDLE_WITH_FLAGCX")
include(external/flagcx)
endif()
set(PLUGIN_VERSION ${PADDLE_VERSION})
set(PROTO_FILE "${PADDLE_SOURCE_DIR}/paddle/phi/core/external_error.proto")
get_filename_component(PROTO_WE "${PROTO_FILE}" NAME_WE)
set(GENERATED_SRC
"${CMAKE_CURRENT_BINARY_DIR}/paddle/phi/core/${PROTO_WE}.pb.cc")
set(GENERATED_HDR
"${CMAKE_CURRENT_BINARY_DIR}/paddle/phi/core/${PROTO_WE}.pb.h")
message(STATUS "CMAKE_CURRENT_BINARY_DIR: ${CMAKE_CURRENT_BINARY_DIR}")
message(STATUS "PROTOBUF_PROTOC_EXECUTABLE: ${PROTOBUF_PROTOC_EXECUTABLE}")
message(
STATUS
"Full protoc command: ${PROTOBUF_PROTOC_EXECUTABLE} -I${CMAKE_CURRENT_SOURCE_DIR}/paddle/phi/core/ --cpp_out=${CMAKE_CURRENT_BINARY_DIR} ${PROTO_FILE}"
)
add_custom_command(
OUTPUT "${GENERATED_SRC}" "${GENERATED_HDR}"
COMMAND ${CMAKE_COMMAND} -E make_directory
"${CMAKE_CURRENT_BINARY_DIR}/paddle/phi/core"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${PADDLE_SOURCE_DIR}/paddle/phi/core/
--cpp_out=${CMAKE_CURRENT_BINARY_DIR}/paddle/phi/core ${PROTO_FILE}
DEPENDS "${PROTO_FILE}"
COMMENT "Generating C++ protocol buffer for ${PROTO_FILE}"
VERBATIM)
add_library(external_error_proto STATIC "${GENERATED_SRC}")
target_include_directories(external_error_proto
PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
target_link_libraries(external_error_proto PUBLIC protobuf)
set_target_properties(external_error_proto PROPERTIES POSITION_INDEPENDENT_CODE
ON)
if(WITH_FLAGCX)
add_custom_target(external_deps DEPENDS eigen3 zlib protobuf flagcx)
else()
add_custom_target(external_deps DEPENDS eigen3 zlib protobuf)
endif()
if(WITH_COREX)
add_definitions(-DPADDLE_WITH_COREX)
add_definitions(-DEIGEN_USE_COREX)
add_definitions(-DEIGEN_USE_GPU)
endif()
list(APPEND CMAKE_MODULE_PATH "${PADDLE_SOURCE_DIR}/cmake"
"${PADDLE_SOURCE_DIR}/cmake/external")
enable_language(CUDA)
find_package(CUDA REQUIRED)
include_directories(${CUDA_INCLUDE_DIRS} ${CMAKE_SOURCE_DIR}/runtime)
add_definitions(-std=c++17)
option(WITH_TESTING "compile with unit testing" OFF)
option(ON_INFER "compile with inference c++ lib" OFF)
option(WITH_GPU "Compile PaddlePaddle with ILUVATAR_GPU" ON)
include(cuda)
include(gflags)
include(glog)
file(
GLOB
CUDA_SRCS1
# backends
${PADDLE_SOURCE_DIR}/paddle/phi/backends/gpu/cuda/cuda_info.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/gpu/cuda/cuda_graph.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cuda_driver.cc
# Core
${PADDLE_SOURCE_DIR}/paddle/phi/core/enforce.cc
${PADDLE_SOURCE_DIR}/paddle/phi/core/flags.cc
${PADDLE_SOURCE_DIR}/paddle/phi/core/mixed_vector.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cusparse.cc
# kernels/funcs
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/concat_and_split_functor.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/deformable_conv_functor.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/eigen/*.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math_function.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/*.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math/*.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/eigen/*.cu
# cudnn/cublas
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cudnn.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cublas.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cublasLt.cc
# kernels/gpu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/abs_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/abs_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/activation_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/activation_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adamw_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/addmm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/addmm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/argsort_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/amp_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/arange_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/arg_min_max_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_embedding_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_identity_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_softmax_with_cross_entropy_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cast_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/clip_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/clip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/concat_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/concat_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/contiguous_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/deformable_conv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/einsum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/flip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/full_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_nd_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_nd_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gaussian_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gelu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_put_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/interpolate_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/logsumexp_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/matmul_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/matmul_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mean_all_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mean_all_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/multinomial_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nonzero_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/numel_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/one_hot_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/p_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/p_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/put_along_axis_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/randint_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roi_align_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scale_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_nd_add_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_nd_add_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sign_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/slice_grad_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/slice_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/split_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/squared_l2_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stack_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/strided_copy_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/strided_elementwise_copy_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/strided_slice_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/swiglu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/swiglu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/set_value_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/set_value_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/take_along_axis_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tile_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tile_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/transpose_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/transpose_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tril_triu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unbind_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/uniform_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/where_kernel.cu
# kernels/selected_rows
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
# kernels/kps
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/bitwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/elementwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/compare_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/logical_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/reduce_kernel.cu
# kernels/legacy/kps
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/compare_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/reduce_max_kernel.cu
# kernels
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/assign_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/empty_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/flatten_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/flatten_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_all_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_any_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_max_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_mean_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_sum_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reshape_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reshape_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/shape_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/strided_slice_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/strided_slice_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/squeeze_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/squeeze_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/unsqueeze_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/unsqueeze_kernel.cc
# ernie_core
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_modality_expert_id_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_combine_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_combine_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_gate_dispatch_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_gate_dispatch_permute_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_ops_partial_nosoftmaxtopk_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_tensor_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_tensor_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gelu_grad_kernel.cu)
file(
GLOB
CUDA_SRCS2
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_stack_transpose_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_transpose_wlch_split_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/affine_channel_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/affine_channel_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ap_facade_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ap_trivial_fusion_begin_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ap_trivial_fusion_end_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ap_variadic_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/argsort_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/barrier_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bce_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/binomial_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/box_clip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_concat_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_scatter_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cast_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/class_center_sample_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/comm_init_all_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/complex_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/correlation_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/correlation_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ctc_align_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cvm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cvm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_with_scaled_gradient_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/exponential_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/flip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gelu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/global_gather_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/global_scatter_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gru_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_add_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/interpolate_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kldiv_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kldiv_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/l1_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/l1_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/label_smooth_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/label_smooth_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lamb_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lgamma_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/logsumexp_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lookup_table_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lookup_table_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lu_solve_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/margin_cross_entropy_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/matrix_power_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/matrix_power_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mean_all_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_unpermute_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/momentum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mp_allreduce_sum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/multiplex_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nonzero_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad3d_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_allgather_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_concat_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_concat_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_recv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/partial_send_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/psroi_pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/quantize_linear_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/repeat_interleave_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/repeat_interleave_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rmsprop_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/row_conv_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/row_conv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/seed_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_expand_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/set_value_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/shuffle_channel_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/shuffle_channel_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/soft_relu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/spectral_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stack_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stft_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sync_batch_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_k_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/uniform_random_batch_size_like_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/weighted_sample_neighbors_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/yolo_box_head_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/elementwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/fp8_quant_blockwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/compare_kernel.cu
# kernels/gpu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/activation_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/activation_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adamw_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adam_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adagrad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/abs_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/add_n_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/arange_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adadelta_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/accuracy_check_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/allclose_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/all_gather_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/all_reduce_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/all_to_all_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/arg_min_max_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/apply_per_channel_scale_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/as_complex_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/as_real_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/asgd_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/assign_pos_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/amp_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/angle_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/angle_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/adamax_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bincount_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_embedding_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_embedding_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cast_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/clip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/clip_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/concat_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/concat_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/compare_kerc_idfuncsnel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dist_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/compare_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/compare_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/numel_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/diag_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/diag_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/einsum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/einsum_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/decode_jpeg_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/nvjpeg.cc
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cupti.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/embedding_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_as_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/expand_as_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/eye_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_tensor_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fill_diagonal_tensor_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/full_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_nd_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_nd_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gaussian_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_add_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_put_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_put_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/p_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/p_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/put_along_axis_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/one_hot_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/randint_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/set_value_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/set_value_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/abs_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stack_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/strided_slice_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/strided_slice_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/strided_copy_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/swiglu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/swiglu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/slice_grad_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/slice_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/take_along_axis_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/take_along_axis_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tile_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tile_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/diagonal_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/diagonal_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/logsumexp_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/uniform_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/uniform_inplace_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/uniform_inplace_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/uniform_random_batch_size_like_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/unsqueeze_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/unsqueeze_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/squeeze_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/squeeze_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sign_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/split_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_nd_add_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scatter_nd_add_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/soft_relu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mean_all_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mean_all_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/multiplex_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/multiplex_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pow2_decay_with_linear_warmup_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_k_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_k_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/where_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/where_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/empty_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lerp_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lerp_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/flatten_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/flatten_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_all_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_any_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_sum_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_mean_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reshape_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reshape_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/contiguous_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gelu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gelu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/transpose_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/transpose_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/triu_indices_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tril_indices_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tril_triu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/tril_triu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unbind_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/gather_scatter_functor.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rms_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/fc_functor.cu
${CMAKE_SOURCE_DIR}/kernels/gpudnn/soft.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/squared_l2_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/squared_l2_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/backends/dynload/cusolver.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/clip_by_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/check_numerics_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_split_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/decayed_adagrad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/debug_tools_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cumprod_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cumprod_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/crop_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/crop_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fetch_barrier_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fake_dequantize_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fake_quantize_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/erfinv_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/erfinv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/erf_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/erf_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/edit_distance_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dgc_clip_by_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/diag_embed_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dequantize_log_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dequantize_abs_max_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/depend_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fused_adam_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/ftrl_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/frobenius_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/frobenius_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/frame_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/frame_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fold_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fold_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fft_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fft_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/huber_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/huber_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/histogram_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/hinge_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/hinge_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gru_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/generate_proposals_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gaussian_inplace_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaln_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/beam_search_decode_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/beam_search_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i0_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i0_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i0e_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i0e_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i1_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i1_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i1e_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/i1e_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/im2sequence_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/im2sequence_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/increment_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_elementwise_get_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_elementwise_get_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_elementwise_put_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_elementwise_put_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_sample_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_sample_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_select_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_select_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/inverse_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/isclose_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/isfinite_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/atan2_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/atan2_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/auc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/average_accumulates_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_entropy2_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_entropy2_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cross_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/p_recv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/matrix_rank_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/poisson_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/logcumsumexp_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/quant_linear_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/polygamma_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lrn_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/merged_momentum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/put_along_axis_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/polygamma_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nanmedian_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mode_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pixel_unshuffle_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nop_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lod_reset_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mode_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/overlap_add_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/maxout_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lu_unpack_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pixel_shuffle_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lod_reset_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/qr_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/prelu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/prelu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/psroi_pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/overlap_add_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/maxout_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/p_send_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/momentum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pixel_shuffle_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lu_unpack_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nll_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nanmedian_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/margin_cross_entropy_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/masked_fill_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/number_count_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lrn_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/poisson_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nms_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/masked_fill_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/prod_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nadam_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/prune_gate_by_capacity_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/prior_box_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/logspace_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/masked_select_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/multinomial_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/nll_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moe_unpermute_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/logsumexp_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/moving_average_abs_max_scale_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/masked_select_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/solve_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/radam_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/random_routing_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/renorm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/scale_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/randperm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_as_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_as_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/reduce_scatter_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/renorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roi_align_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roi_align_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roi_pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roi_pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roll_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/roll_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rprop_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rrelu_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/rrelu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/searchsorted_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/segment_pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/segment_pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/selu_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/send_u_recv_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/send_u_recv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/send_ue_recv_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/send_ue_recv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/send_uv_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/send_uv_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_expand_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_mask_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_softmax_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sequence_softmax_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sgd_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/share_data_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/shard_index_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/shuffle_batch_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/shuffle_batch_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sigmoid_cross_entropy_with_logits_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/slogdeterminant_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sparse_momentum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/straight_through_estimator_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/svd_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sync_batch_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sync_comm_stream_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/sync_calc_stream_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/temporal_shift_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/temporal_shift_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/trace_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/trace_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/trunc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/trunc_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/truncated_gaussian_random_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unfold_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unfold_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unpool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lstsq_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unpool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unstack_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lgamma_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/linspace_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kron_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kron_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/stack_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/unstack_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/warprnnt_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/yolo_box_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kthvalue_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kthvalue_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dgc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaincc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaincc_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/llm_int8_linear_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/baddbmm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/baddbmm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/load_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/load_combine_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/c_softmax_with_multi_label_cross_entropy_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/save_combine_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/save_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dropout_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dropout_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/index_add_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bce_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bce_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/meshgrid_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/meshgrid_grad_kernel.cu.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad3d_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/pad3d_kernel.cu
# ############################################################################
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/array_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/set_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/is_empty_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/dist_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/coalesce_tensor_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_amin_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/batch_norm_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_variance_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/shape_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_amax_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/prod_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/assign_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reverse_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/full_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/strided_slice_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fake_quantize_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/reduce_min_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/check_memory_continue_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/strided_slice_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/npu_identity_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/activation_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/scale_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/elementwise_multiply_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/full_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/uniform_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/merge_selected_rows_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/isfinite_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adam_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/lamb_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/add_n_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/lookup_table_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/ftrl_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/dgc_clip_by_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/share_data_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/lookup_table_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/clip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/clip_by_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/uniform_random_batch_size_like_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/get_tensor_from_selected_rows_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/batch_norm_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/batch_norm_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/empty_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/sparse_utils_grad_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/softmax_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sync_batch_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/pool_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sparse_utils_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/reshape_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/full_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/transpose_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/elementwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sparse_attention_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/reshape_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/slice_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/softmax_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/slice_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/unary_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/coalesce_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sum_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/pool_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/transpose_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/mask_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sync_batch_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/unary_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sum_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/transfer_layout_kernel.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/elementwise_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/mask_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/ext_build_src_rank_and_local_expert_id_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/layer_norm_cuda_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_combine_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/one_hot_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/legacy_expand_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/legacy_crop_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_and_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/legacy_crop_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_combine_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/randint_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/legacy_generate_proposals_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/anchor_generator_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_combine_no_weight_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/legacy_expand_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/uniform_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/fp8_quant_blockwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_rope_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_seqpool_cvm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_transpose_split_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_transpose_wlch_split_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/distributed_fused_lamb_init_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_act_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fusion_group_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/skip_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_embedding_eltwise_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_stack_transpose_quant_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_rope_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_swiglu_weighted_bwd_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/core/flags.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math_function.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/backends/context_pool.cc
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/repeat_tensor2index_tensor.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/binomial_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bernoulli_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_grad_kernel_impl.h
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/bmm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/box_coder_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_tensors_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/broadcast_tensors_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/channel_shuffle_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/channel_shuffle_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/complex_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/complex_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_maxmin_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/cum_maxmin_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/digamma_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/digamma_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dot_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dot_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/eigh_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/eigvalsh_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/exponential_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/flip_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gammaincc_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gather_tree_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_reindex_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/graph_sample_neighbors_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/group_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/gumbel_softmax_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_act_dequant_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/block_multi_head_attention_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_weighted_swiglu_act_quant_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_softmax_mask_upper_triangle_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/fp8_gemm/fp8_gemm_with_cublasLt/fp8_fp8_half_gemm.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/fused_conv2d_add_act_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/variable_length_memory_efficient_attention_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/memory_efficient_attention_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/cutlass/gemm_epilogue_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/blha_get_max_len.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_elemwise_activation_grad_kernel.cu
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/as_real_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/as_complex_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/complex_grad_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/stride/complex_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/shape_kernel.cc
# ${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/conv_kernel_igemm.cu
# ############################################################################
# kernels/fusion kernels/selected_rows
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/selected_rows/gpu/adamw_kernel.cu
# kernels/kps
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/elementwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/elementwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/bitwise_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/logical_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/kps/reduce_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/kps/reduce_max_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/array_kernel.cc)
set(CUDA_SRCS ${CUDA_SRCS1} ${CUDA_SRCS2})
list(REMOVE_DUPLICATES CUDA_SRCS1)
list(
REMOVE_ITEM
CUDA_SRCS
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/gru_compute.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/multihead_matmul_functor.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/softmax.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/weight_only_gemv.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/math/context_project.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/fft.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/lstm_compute.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/funcs/fake_quantize_functor.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/masked_multihead_attention_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/qkv_unpack_mha_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_bias_dropout_residual_layer_norm_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/fusion/gpu/fused_layernorm_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/check_numerics_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/collect_fpn_proposals_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/dgc_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/fused_adam_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/huber_loss_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/huber_loss_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/histogram_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/interpolate_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/load_combine_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/load_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kthvalue_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/kthvalue_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/log_softmax_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/lstsq_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mode_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/mode_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/save_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/save_combine_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_k_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/viterbi_decode_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/gpu/top_k_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_gate_dispatch_permute_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/legacy/gpu/moe_ops_partial_nosoftmaxtopk_grad_kernel.cu
${PADDLE_SOURCE_DIR}/paddle/phi/kernels/sparse/gpu/sparse_attention_kernel.cu)
file(
GLOB_RECURSE CC_SRCS
RELATIVE ${CMAKE_SOURCE_DIR}
runtime/runtime.cc
runtime/iluvatar_context.cc
common/*.cc
kernels/cuda_kernels/*.cc
kernels/cuda_kernels/*.cu
kernels/ernie_core/*.cu
kernels/ernie_core/*.cc
kernels/gpudnn/*.cu)
if(WITH_FLAGCX)
list(APPEND CC_SRCS runtime/runtime_flagcx.cc)
endif()
message(STATUS "CUDA_SRCS files:")
foreach(file ${CUDA_SRCS})
message(STATUS " ${file}")
endforeach()