@@ -757,6 +757,10 @@ def generate_function(name, ret_type, ret_type_ext, multiAS, *args):
757
757
llvm_i += 1
758
758
759
759
####### process args
760
+ if not args :
761
+ spir_mangled_func_suffix .append ("v" )
762
+ ocl_mangled_func_suffix .append ("v" )
763
+ args = []
760
764
for cast in args :
761
765
762
766
if arg_addr_spaces :
@@ -856,7 +860,6 @@ def generate_function(name, ret_type, ret_type_ext, multiAS, *args):
856
860
decl_args .append (ocl_arg_type )
857
861
arg_i += 1
858
862
859
-
860
863
######## generate final mangled function names
861
864
spir_mangled_func_suffix = "" .join (spir_mangled_func_suffix )
862
865
ocl_mangled_func_suffix = "" .join (ocl_mangled_func_suffix )
@@ -873,7 +876,7 @@ def generate_function(name, ret_type, ret_type_ext, multiAS, *args):
873
876
decl_ret_type = 'void'
874
877
else :
875
878
decl_ret_type = coerced_ret_type
876
-
879
+
877
880
#if GENERIC_AS and GEN_AS_CALLEE_IDENTICAL and (AS == "generic") and ("private" in addr_spaces):
878
881
declaration = "declare %s %s(%s) local_unnamed_addr #0" % (decl_ret_type , ocl_mangled_name , decl_args )
879
882
if declaration in ALREADY_DECLARED .keys ():
@@ -1300,10 +1303,9 @@ def generate_function(name, ret_type, ret_type_ext, multiAS, *args):
1300
1303
generate_function ("shuffle2" , SIG_TO_LLVM_TYPE_MAP [ret_type ], '' , False , in_type , in_type , mask_type )
1301
1304
1302
1305
# convert
1306
+ CONVERT_TYPES = ['c' , 'h' , 's' , 't' , 'i' , 'j' , 'l' , 'm' , 'f' , 'd' ]
1303
1307
if FP16 :
1304
- CONVERT_TYPES = ['c' , 'h' , 's' , 't' , 'i' , 'j' , 'l' , 'm' , 'f' , 'd' , 'Dh' ]
1305
- else :
1306
- CONVERT_TYPES = ['c' , 'h' , 's' , 't' , 'i' , 'j' , 'l' , 'm' , 'f' , 'd' ]
1308
+ CONVERT_TYPES .append ('Dh' )
1307
1309
1308
1310
for dst_type in CONVERT_TYPES :
1309
1311
for src_type in CONVERT_TYPES :
@@ -1494,6 +1496,77 @@ def gen_three_variants(f, ret_type, ret_ext, AS, args, orders):
1494
1496
generate_function ("work_group_barrier" , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , None , 'j' , '12memory_scope' )
1495
1497
generate_function ("work_group_barrier" , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , None , 'j' )
1496
1498
1499
+ # generate wrapper function
1500
+ #def generate_function(name, ret_type, ret_type_ext, multiAS, *args):
1501
+ # subgroups
1502
+ generate_function ("get_sub_group_size" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None )
1503
+ generate_function ("get_max_sub_group_size" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None )
1504
+ generate_function ("get_num_sub_groups" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None )
1505
+ generate_function ("get_enqueued_num_sub_groups" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None )
1506
+ generate_function ("get_sub_group_id" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None )
1507
+ generate_function ("get_sub_group_local_id" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None )
1508
+ generate_function ("sub_group_ballot" , SIG_TO_LLVM_TYPE_MAP ['Dv4_i' ], '' , None , 'i' )
1509
+ generate_function ("sub_group_any" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None , 'i' )
1510
+ generate_function ("sub_group_all" , SIG_TO_LLVM_TYPE_MAP ['i' ], '' , None , 'i' )
1511
+ generate_function ("sub_group_barrier" , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , None , 'j' )
1512
+ generate_function ("sub_group_barrier" , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , None , 'j' , '12memory_scope' )
1513
+
1514
+ SUBGROUP_TYPES = ['c' , 'h' , 's' , 't' , 'i' , 'j' , 'l' , 'm' , 'f' , 'd' ]
1515
+ if FP16 :
1516
+ SUBGROUP_TYPES .append ('Dh' )
1517
+
1518
+ for arg_type in SUBGROUP_TYPES :
1519
+ ret_type = arg_type
1520
+ signext = LLVM_TYPE_EXT_MAP [ret_type ]
1521
+ mask_type = 'j'
1522
+ for suffix in ['' , '_xor' ]:
1523
+ generate_function ("sub_group_shuffle" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type , mask_type )
1524
+ generate_function ("intel_sub_group_shuffle" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type , mask_type )
1525
+ generate_function ("sub_group_broadcast" , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type , mask_type )
1526
+ for suffix in ['_add' , '_min' , '_max' ]:
1527
+ generate_function ("sub_group_reduce" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type )
1528
+ generate_function ("sub_group_scan_inclusive" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type )
1529
+ generate_function ("sub_group_scan_exclusive" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type )
1530
+ for suffix in ['_up' , '_down' ]:
1531
+ generate_function ("intel_sub_group_shuffle" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], signext , False , arg_type , arg_type , mask_type )
1532
+
1533
+ # Intel extension adds support for some types which are not supported by the Khronos extension:
1534
+ # For the sub_group_shuffle, sub_group_shuffle_down, sub_group_shuffle_up, and sub_group_shuffle_xor functions, gentype is float, float2, float3,
1535
+ # float4, float8, float16, int, int2, int3, int4, int8, int16, uint, uint2, uint3, uint4, uint8, uint16, long, or ulong.
1536
+ SUBGROUP_VEC_TYPES = ['f' ,'i' ,'j' ]
1537
+ for int_type in SUBGROUP_VEC_TYPES :
1538
+ for vecsize in ['2' ,'3' ,'4' ,'8' ,'16' ]:
1539
+ ret_type = arg_type = 'Dv' + vecsize + '_' + int_type
1540
+ mask_type = 'j'
1541
+ for suffix in ['' , '_xor' ]:
1542
+ generate_function ("intel_sub_group_shuffle" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], '' , False , arg_type , mask_type )
1543
+ for suffix in ['_up' , '_down' ]:
1544
+ generate_function ("intel_sub_group_shuffle" + suffix , SIG_TO_LLVM_TYPE_MAP [ret_type ], '' , False , arg_type , arg_type , mask_type )
1545
+
1546
+ for vecsize in ['' ,'2' ,'4' ,'8' ]:
1547
+ # uints
1548
+ if vecsize :
1549
+ arg_type = 'Dv' + vecsize + '_j'
1550
+ else :
1551
+ arg_type = 'j'
1552
+ ret_type = arg_type
1553
+ PConstArg = 'PKj'
1554
+ PArg = 'Pj'
1555
+ generate_function ("intel_sub_group_block_read" + vecsize , SIG_TO_LLVM_TYPE_MAP [ret_type ], '' , ("global" ), PConstArg )
1556
+ generate_function ("intel_sub_group_block_write" + vecsize , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , ("global" , 'none' ), PArg , arg_type )
1557
+ generate_function ("intel_sub_group_block_read_ui" + vecsize , SIG_TO_LLVM_TYPE_MAP [ret_type ], '' , ("global" ), PConstArg )
1558
+ generate_function ("intel_sub_group_block_write_ui" + vecsize , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , ("global" , 'none' ), PArg , arg_type )
1559
+ # ushorts
1560
+ if vecsize :
1561
+ arg_type = 'Dv' + vecsize + '_t'
1562
+ else :
1563
+ arg_type = 't'
1564
+ ret_type = arg_type
1565
+ PConstArg = 'PKt'
1566
+ PArg = 'Pt'
1567
+ generate_function ("intel_sub_group_block_read_us" + vecsize , SIG_TO_LLVM_TYPE_MAP [ret_type ], '' , ("global" ), PConstArg )
1568
+ generate_function ("intel_sub_group_block_write_us" + vecsize , SIG_TO_LLVM_TYPE_MAP ['v' ], '' , ("global" , 'none' ), PArg , arg_type )
1569
+
1497
1570
print ("""
1498
1571
1499
1572
attributes #0 = { alwaysinline nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
0 commit comments