diff --git a/ChangeLog b/ChangeLog index c899a8651ed7..289e291e2aa8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,37 @@ +2025-08-21 Richard Sandiford + + * MAINTAINERS: Update my email address and stand down as AArch64 + maintainer. + +2025-08-18 Spencer Abson + + * MAINTAINERS: Update my email address. + +2025-08-17 Filip Kastl + + * MAINTAINERS: Switch around Andrew Pinski's entries in + Contributing under DCO. + +2025-08-16 Mikael Pettersson + + * MAINTAINERS: Add myself to write after approval. + +2025-08-14 Avinash Jayakar + + * MAINTAINERS: Add myself to write after approval. + +2025-08-08 Andrew Pinski + + * MAINTAINERS (Andrew Pinski): Update email address. + +2025-08-07 Pengfei Li + + * MAINTAINERS: Add myself. + +2025-08-05 Thomas Schwinge + + * .gitignore: Remove 'libgrust/*/target/'. + 2025-08-01 Luis Machado * MAINTAINERS: Update my e-mail address. diff --git a/MAINTAINERS b/MAINTAINERS index 6148ce069cb3..205e6a4c4b02 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -40,7 +40,7 @@ Michael Meissner Jason Merrill David S. Miller Joseph Myers -Richard Sandiford +Richard Sandiford Bernd Schmidt Ian Lance Taylor Jim Wilson @@ -56,7 +56,6 @@ docs, and the testsuite related to that. aarch64 ldp/stp Alex Coplan aarch64 port Richard Earnshaw -aarch64 port Richard Sandiford aarch64 port Tamar Christina aarch64 port Kyrylo Tkachov alpha port Richard Henderson @@ -276,7 +275,7 @@ check in changes outside of the parts of the compiler they maintain. Reviewers aarch64 port Alex Coplan -aarch64 port Andrew Pinski +aarch64 port Andrew Pinski arm port (MVE) Christophe Lyon callgraph Martin Jambor C front end Marek Polacek @@ -333,7 +332,7 @@ from other maintainers or reviewers. Name BZ account Email Soumya AR soumyaa -Spencer Abson sabson +Spencer Abson sabson Mark G. Adams mgadams Ajit Kumar Agarwal aagarwa Pedro Alves palves @@ -566,6 +565,7 @@ Fariborz Jahanian - Martin Jambor jamborm Sam James sjames Surya Kumari Jangala jskumari +Avinash Jayakar avinashd Jakub Jelinek jakub Andrew Jenner andrewjenner Saurabh Jha saurabhjha @@ -628,6 +628,7 @@ James Lemke jwlemke Ilya Leoshkevich iii Kriang Lerdsuwanakij lerdsuwa Pan Li - +Pengfei Li pfustc Renlin Li renlin Xinliang David Li davidxl Kewen Lin linkw @@ -730,10 +731,11 @@ Fernando Pereira pronesto Nicola Pero nicola Sebastian Perta - Sebastian Peryt speryt +Mikael Pettersson mikpe Johannes Pfau jpfau Gerald Pfeifer gerald Kaushik Phatak kaushikp -Andrew Pinski pinskia +Andrew Pinski pinskia Nicolas Pitre nico Michael Ploujnikov plouj Paul Pluzhnikov ppluzhnikov @@ -957,6 +959,8 @@ Matthew Malcomson Immad Mir Gaius Mulley Szabolcs Nagy +Mikael Pettersson +Andrew Pinski Andrew Pinski Siddhesh Poyarekar Ramana Radhakrishnan diff --git a/config/ChangeLog b/config/ChangeLog index da86623e8b93..10bce27d8e91 100644 --- a/config/ChangeLog +++ b/config/ChangeLog @@ -1,3 +1,8 @@ +2025-08-10 Ijaz, Abdul B + + * lib-link.m4: Handle dash in the library name for + AC_LIB_LINKFLAGS_BODY. + 2025-05-23 Andi Kleen * bootstrap-native.mk: New file. diff --git a/config/lib-link.m4 b/config/lib-link.m4 index 20e281fd323a..a60a80694532 100644 --- a/config/lib-link.m4 +++ b/config/lib-link.m4 @@ -126,6 +126,7 @@ AC_DEFUN([AC_LIB_LINKFLAGS_BODY], [ define([NAME],[translit([$1],[abcdefghijklmnopqrstuvwxyz./-], [ABCDEFGHIJKLMNOPQRSTUVWXYZ___])]) + define([Name],[translit([$1],[./-], [___])]) dnl By default, look in $includedir and $libdir. use_additional=yes AC_LIB_WITH_FINAL_PREFIX([ @@ -152,8 +153,8 @@ AC_DEFUN([AC_LIB_LINKFLAGS_BODY], ]) AC_LIB_ARG_WITH([lib$1-type], [ --with-lib$1-type=TYPE type of library to search for (auto/static/shared) ], - [ with_lib$1_type=$withval ], [ with_lib$1_type=auto ]) - lib_type=`eval echo \$with_lib$1_type` + [ with_lib[]Name[]_type=$withval ], [ with_lib[]Name[]_type=auto ]) + lib_type=`eval echo \$with_lib[]Name[]_type` dnl Search the library and its dependencies in $additional_libdir and dnl $LDFLAGS. Using breadth-first-seach. diff --git a/contrib/ChangeLog b/contrib/ChangeLog index f4f48e5414b0..691203f9036b 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,16 @@ +2025-08-17 Filip Kastl + + * check-MAINTAINERS.py: Document the way the script sorts + entries. + +2025-08-07 Tobias Burnus + + PR other/120237 + * download_prerequisites: Update to download GMP 6.3.0 (before 6.2.1), + MPFR 4.2.2 (before 4.1.0), and MPC 1.3.1 (before 1.2.1). + * prerequisites.md5: Update hash. + * prerequisites.sha512: Likewise. + 2025-07-28 David Malcolm * gcc-changelog/git_commit.py: Add "diagnostics" to bug diff --git a/contrib/check-MAINTAINERS.py b/contrib/check-MAINTAINERS.py index ba2cdb401298..881c7570ac2d 100755 --- a/contrib/check-MAINTAINERS.py +++ b/contrib/check-MAINTAINERS.py @@ -19,8 +19,8 @@ # the Free Software Foundation, 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301, USA. -# Check that names in the file are sorted -# alphabetically by surname. +# Check that names in the file are sorted alphabetically by surname, name +# bugzilla handle and email (in this order). import locale import sys diff --git a/contrib/download_prerequisites b/contrib/download_prerequisites index b83fcc9db5dd..a6d756c7b134 100755 --- a/contrib/download_prerequisites +++ b/contrib/download_prerequisites @@ -27,9 +27,9 @@ version='(unversioned)' # remember to also update the files `contrib/prerequisites.sha512` and # `contrib/prerequisites.md5` with the new checksums. -gmp='gmp-6.2.1.tar.bz2' -mpfr='mpfr-4.1.0.tar.bz2' -mpc='mpc-1.2.1.tar.gz' +gmp='gmp-6.3.0.tar.bz2' +mpfr='mpfr-4.2.2.tar.bz2' +mpc='mpc-1.3.1.tar.gz' isl='isl-0.24.tar.bz2' gettext='gettext-0.22.tar.gz' diff --git a/contrib/prerequisites.md5 b/contrib/prerequisites.md5 index 716a9ff910c7..96b9802d9017 100644 --- a/contrib/prerequisites.md5 +++ b/contrib/prerequisites.md5 @@ -1,5 +1,5 @@ -28971fc21cf028042d4897f02fd355ea gmp-6.2.1.tar.bz2 -44b892bc5a45bafb4294d134e13aad1d mpfr-4.1.0.tar.bz2 -9f16c976c25bb0f76b50be749cd7a3a8 mpc-1.2.1.tar.gz +c1cd6ef33085e9cb818b9b08371f9000 gmp-6.3.0.tar.bz2 +afe8268360bc8702fbc8297d351c8b5e mpfr-4.2.2.tar.bz2 +5c9bc658c9fd0f940e8e3e0f09530c62 mpc-1.3.1.tar.gz dd2f7b78e118c25bd96134a52aae7f4d isl-0.24.tar.bz2 c092102240f8f66134d22718421d5115 gettext-0.22.tar.gz diff --git a/contrib/prerequisites.sha512 b/contrib/prerequisites.sha512 index f71398bdbc31..7a3e9c2cbf7e 100644 --- a/contrib/prerequisites.sha512 +++ b/contrib/prerequisites.sha512 @@ -1,5 +1,5 @@ -8904334a3bcc5c896ececabc75cda9dec642e401fb5397c4992c4fabea5e962c9ce8bd44e8e4233c34e55c8010cc28db0545f5f750cbdbb5f00af538dc763be9 gmp-6.2.1.tar.bz2 -410208ee0d48474c1c10d3d4a59decd2dfa187064183b09358ec4c4666e34d74383128436b404123b831e585d81a9176b24c7ced9d913967c5fce35d4040a0b4 mpfr-4.1.0.tar.bz2 -3279f813ab37f47fdcc800e4ac5f306417d07f539593ca715876e43e04896e1d5bceccfb288ef2908a3f24b760747d0dbd0392a24b9b341bc3e12082e5c836ee mpc-1.2.1.tar.gz +3b684c9bcb9ede2b7e54d0ba4c9764bfa17c20d4f3000017c553b6f1e135b536949580ff37341680c25dc236cfe0ba1db8cfdfe619ce013656189ef0871b89f8 gmp-6.3.0.tar.bz2 +0176e50808dcc07afbf5bc3e38bf9b7b21918e5f194aa0bfd860d99b00c470630aef149776c4be814a61c44269c3a5b9a4b0b1c0fcd4c9feb1459d8466452da8 mpfr-4.2.2.tar.bz2 +4bab4ef6076f8c5dfdc99d810b51108ced61ea2942ba0c1c932d624360a5473df20d32b300fc76f2ba4aa2a97e1f275c9fd494a1ba9f07c4cb2ad7ceaeb1ae97 mpc-1.3.1.tar.gz aab3bddbda96b801d0f56d2869f943157aad52a6f6e6a61745edd740234c635c38231af20bc3f1a08d416a5e973a90e18249078ed8e4ae2f1d5de57658738e95 isl-0.24.tar.bz2 e2a58dde1cae3e6b79c03e7ef3d888f7577c1f4cba283b3b0f31123ceea8c33d7c9700e83de57104644de23e5f5c374868caa0e091f9c45edbbe87b98ee51c04 gettext-0.22.tar.gz diff --git a/fixincludes/ChangeLog b/fixincludes/ChangeLog index a5fdb312605f..8b490069a7ae 100644 --- a/fixincludes/ChangeLog +++ b/fixincludes/ChangeLog @@ -1,3 +1,13 @@ +2025-08-15 Francois-Xavier Coudert + + * fixincl.x: Regenerate. + * inclhack.def (stdio_va_list): Skip on recent darwin. + +2025-08-15 Francois-Xavier Coudert + + * fixincl.x: Regenerate. + * inclhack.def (stdio_stdarg_h): Skip on darwin. + 2024-07-11 Iain Sandoe Revert: diff --git a/fixincludes/fixincl.x b/fixincludes/fixincl.x index 9dc05ea17f10..819c02c483c7 100644 --- a/fixincludes/fixincl.x +++ b/fixincludes/fixincl.x @@ -2,11 +2,11 @@ * * DO NOT EDIT THIS FILE (fixincl.x) * - * It has been AutoGen-ed July 10, 2024 at 02:49:05 PM by AutoGen 5.18.16 + * It has been AutoGen-ed August 15, 2025 at 05:30:32 PM by AutoGen 5.18.16 * From the definitions inclhack.def * and the template file fixincl */ -/* DO NOT SVN-MERGE THIS FILE, EITHER Wed Jul 10 14:49:05 CEST 2024 +/* DO NOT SVN-MERGE THIS FILE, EITHER Fri Aug 15 17:30:32 CEST 2025 * * You must regenerate it. Use the ./genfixes script. * @@ -8981,7 +8981,7 @@ tSCC* apzStdio_Stdarg_HMachs[] = { * content bypass pattern - skip fix if pattern found */ tSCC zStdio_Stdarg_HBypass0[] = - "include.*(stdarg.h|machine/ansi.h)"; + "include.*(stdarg.h|machine/ansi.h|_stdio.h)"; #define STDIO_STDARG_H_TEST_CT 1 static tTestDesc aStdio_Stdarg_HTests[] = { @@ -9019,7 +9019,7 @@ tSCC* apzStdio_Va_ListMachs[] = { * content bypass pattern - skip fix if pattern found */ tSCC zStdio_Va_ListBypass0[] = - "__gnuc_va_list|_BSD_VA_LIST_|__DJ_va_list|_G_va_list"; + "__gnuc_va_list|_BSD_VA_LIST_|__DJ_va_list|_G_va_list|_stdio.h"; #define STDIO_VA_LIST_TEST_CT 1 static tTestDesc aStdio_Va_ListTests[] = { diff --git a/fixincludes/inclhack.def b/fixincludes/inclhack.def index 1ac8e335419e..35ccaf03c4fa 100644 --- a/fixincludes/inclhack.def +++ b/fixincludes/inclhack.def @@ -4472,13 +4472,13 @@ fix = { /* * Arrange for stdio.h to use stdarg.h to define __gnuc_va_list. - * On 4BSD-derived systems, stdio.h defers to machine/ansi.h; that's - * OK too. + * On 4BSD-derived systems, stdio.h defers to machine/ansi.h, that's + * OK too. Modern macOS includes _stdio.h, and does not need the fix. */ fix = { hackname = stdio_stdarg_h; files = stdio.h; - bypass = "include.*(stdarg\.h|machine/ansi\.h)"; + bypass = "include.*(stdarg\.h|machine/ansi\.h|_stdio\.h)"; /* * On Solaris 10, this fix is unncessary; includes * , which includes . @@ -4499,12 +4499,12 @@ fix = { * of __gnuc_va_list, __DJ_va_list, or _G_va_list is taken to * indicate that the header knows what it's doing -- under SUSv2, * stdio.h is required to define va_list, and we shouldn't break - * that. + * that. Modern macOS includes _stdio.h, and does not need the fix. */ fix = { hackname = stdio_va_list; files = stdio.h; - bypass = '__gnuc_va_list|_BSD_VA_LIST_|__DJ_va_list|_G_va_list'; + bypass = '__gnuc_va_list|_BSD_VA_LIST_|__DJ_va_list|_G_va_list|_stdio.h'; /* * On Solaris 10, the definition in * is guarded appropriately by the _XPG4 feature macro; diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 4b6bc90b2a41..451b1a0818a7 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,2200 @@ +2025-08-26 liuhongt + + * config/i386/i386.cc (ix86_vector_costs::ix86_vector_costs): + Addd new memeber m_num_reduc, m_prefer_unroll. + (ix86_vector_costs::add_stmt_cost): Set m_prefer_unroll and + m_num_reduc + (ix86_vector_costs::finish_cost): Determine + m_suggested_unroll_vector with consideration of + reduc_lat_mult_thr, m_num_reduction and + ix86_vect_unroll_limit. + * config/i386/i386.h (enum ix86_reduc_unroll_factor): New + enum. + (processor_costs): Add reduc_lat_mult_thr and + vect_unroll_limit. + * config/i386/x86-tune-costs.h: Initialize + reduc_lat_mult_thr and vect_unroll_limit. + * config/i386/i386.opt: Add -param=ix86-vect-unroll-limit. + +2025-08-26 Paul-Antoine Arras + + * config/riscv/autovec-opt.md (*vfrdiv_vf_): Add new pattern to + combine vec_duplicate + vfdiv.vv into vfrdiv.vf. + * config/riscv/vector.md (@pred__reverse_scalar): Allow VLS + modes. + +2025-08-26 Tamar Christina + + PR target/121290 + * config/aarch64/aarch64.cc + (class aarch64_vector_costs ): Add m_loop_fully_scalar_dup. + (aarch64_vector_costs::add_stmt_cost): Detect invariant inner loops. + (adjust_body_cost): Adjust final costing if m_loop_fully_scalar_dup. + +2025-08-26 Paul-Antoine Arras + + * config/riscv/autovec-opt.md (*vfmul_vf_): Add new pattern to + combine vec_duplicate + vfmul.vv into vfmul.vf. + * config/riscv/vector.md (@pred__scalar): Allow VLS modes. + +2025-08-26 Jeff Law + + * config/riscv/riscv.cc (riscv_arg_partial_bytes): Remove name + from unused parameter. + +2025-08-26 Richard Biener + + * tree-vectorizer.h (vect_reduc_type): Get SLP node as argument. + * config/aarch64/aarch64.cc (aarch64_sve_in_loop_reduction_latency): + Take SLO node as argument and adjust. + (aarch64_in_loop_reduction_latency): Likewise. + (aarch64_detect_vector_stmt_subtype): Adjust. + (aarch64_vector_costs::count_ops): Likewise. Treat reductions + during scalar costing as single-cycle. + +2025-08-26 Richard Biener + + PR tree-optimization/121659 + * tree-vect-slp.cc (vect_build_slp_tree_1): Do not allow + matching up comparison operators by swapping if that would + disturb STMT_VINFO_REDUC_IDX. Make sure to only + actually mark operands for swapping when there was a + mismatch and we're not processing the first stmt. + +2025-08-26 Richard Biener + + * tree-vect-stmts.cc (vectorizable_store): Access lanes_ifn + only when VMAT_LOAD_STORE_LANES. + (vectorizable_load): Likewise. + +2025-08-26 Richard Biener + + * tree-vectorizer.h (stmt_vec_info_::reduc_vectype_in): Remove. + (STMT_VINFO_REDUC_VECTYPE_IN): Likewise. + * tree-vect-loop.cc (vect_is_emulated_mixed_dot_prod): Get + at the input vectype via the SLP node child. + (vectorizable_lane_reducing): Likewise. + (vect_transform_reduction): Likewise. + (vectorizable_reduction): Do not set STMT_VINFO_REDUC_VECTYPE_IN. + +2025-08-26 Jakub Jelinek + + PR target/121658 + * config/i386/sse.md (3 any_shift): Use const0_rtx + instead of GEN_INT (0). + (cond_ any_shift): Likewise. Formatting fix. + (3 any_rotate): Use register_operand predicate instead of + general_operand for match_operand 1. Use const0_rtx instead of + GEN_INT (0). + (v16qi3 any_rotate): Use force_reg on operands[1]. Formatting + fix. + * config/i386/i386.cc (ix86_shift_rotate_cost): Comment formatting + fixes. + +2025-08-26 Pan Li + + * config/riscv/vector.md (@pred_mul_plus_vx_): Add new pattern to + generate vmacc rtl. + (*pred_macc__scalar_undef): Ditto. + * config/riscv/autovec-opt.md (*vmacc_vx_): Add new + pattern to match the vmacc vx combine. + +2025-08-25 Jakub Jelinek + + PR middle-end/121453 + * omp-expand.cc (expand_omp_for_init_counts): Clear fd->loop.n2 + before first zero count check if zero_iter1_bb is non-NULL upon + entry and fd->loop.n2 has not been written yet. + +2025-08-25 David Faust + + PR debug/121411 + * dwarf2ctf.cc (gen_ctf_subrange_type): Use unsigned HWI for + array_num_elements. Fallback to CTF_K_UNKNOWN if the array + type has too many elements for CTF to represent. + +2025-08-25 Andrew Pinski + + * tree-ssa-forwprop.cc (simplify_permutation): Boolify. + (pass_forwprop::execute): No longer handle 2 as the return + from simplify_permutation. + +2025-08-25 Andrew Pinski + + * tree-ssa-forwprop.cc (forward_propagate_into_comparison): Boolify. + (pass_forwprop::execute): Don't handle return of 2 from + forward_propagate_into_comparison. + +2025-08-25 Andrew Pinski + + * tree-ssa-forwprop.cc (remove_prop_source_from_use): Remove + return type. + (forward_propagate_into_comparison): Update dealing with + no return type of remove_prop_source_from_use. + (forward_propagate_into_gimple_cond): Likewise. + (simplify_permutation): Likewise. + +2025-08-25 Andrew Pinski + + * tree-ssa-forwprop.cc (simplify_gimple_switch): Add simple_dce_worklist + argument. Mark the old index when doing the replacement. + (pass_forwprop::execute): Update call to simplify_gimple_switch. + +2025-08-25 Andrew Pinski + + PR tree-optimization/121279 + * gimple-fold.cc (gimple_needing_rewrite_undefined): Return + true for non mode precision boolean loads. + (rewrite_to_defined_unconditional): Handle non mode precision loads. + +2025-08-25 Andrew Pinski + + * tree-ssa-loop-im.cc (execute_sm): Call + get_or_create_ssa_default_def for the new uninitialized + decl. + +2025-08-25 Takayuki 'January June' Suwa + + * config/xtensa/xtensa.md (addsi3, mulhisi3, andsi3, + zero_extendsi2, extendhisi2_internal, movsi_internal, + movhi_internal, movqi_internal, movsf_internal, ashlsi3_internal, + ashrsi3, lshrsi3, rotlsi3, rotrsi3): + Rewrite in compact syntax. + +2025-08-25 Takayuki 'January June' Suwa + + * config/xtensa/xtensa.md + (The auxiliary define_split for *masktrue_const_bitcmpl): + Use a more concise function call, i.e., + (1 << GET_MODE_BITSIZE (mode)) - 1 is equivalent to + GET_MODE_MASK (mode). + +2025-08-25 Takayuki 'January June' Suwa + + * config/xtensa/xtensa.md (mode_bits): + New mode attribute. + (zero_extendsi2): Use the appropriate mode iterator and + attribute to unify "zero_extend[hq]isi2" to this description. + +2025-08-25 Richard Biener + + PR tree-optimization/121638 + * tree-vect-stmts.cc (process_use): Do not make induction + PHI backedge values relevant. + +2025-08-25 Indu Bhagat + Claudiu Zissulescu + + * asan.h (HWASAN_TAG_SIZE): Use targetm.memtag.tag_bitsize. + * config/i386/i386.cc (ix86_memtag_tag_size): Rename to + ix86_memtag_tag_bitsize. + (TARGET_MEMTAG_TAG_SIZE): Renamed to TARGET_MEMTAG_TAG_BITSIZE. + * doc/tm.texi (TARGET_MEMTAG_TAG_SIZE): Likewise. + * doc/tm.texi.in (TARGET_MEMTAG_TAG_SIZE): Likewise. + * target.def (tag_size): Rename to tag_bitsize. + * targhooks.cc (default_memtag_tag_size): Rename to + default_memtag_tag_bitsize. + * targhooks.h (default_memtag_tag_size): Likewise. + +2025-08-25 Kito Cheng + + * config/riscv/riscv-protos.h (riscv_init_cumulative_args): Change + fntype parameter from tree to const_tree. + * config/riscv/riscv.cc (riscv_init_cumulative_args): Likewise. + (riscv_function_value): Replace with new implementation that + conforms to TARGET_FUNCTION_VALUE hook signature. + (riscv_libcall_value): New function implementing TARGET_LIBCALL_VALUE. + (TARGET_FUNCTION_VALUE): Define. + (TARGET_LIBCALL_VALUE): Define. + * config/riscv/riscv.h (FUNCTION_VALUE): Remove. + (LIBCALL_VALUE): Remove. + +2025-08-25 Andi Kleen + + * config/i386/i386-expand.cc (ix86_vgf2p8affine_shift_matrix): + New function to lookup shift/rotate matrixes for gf2p8affine. + * config/i386/i386-protos.h (ix86_vgf2p8affine_shift_matrix): + Declare new function. + * config/i386/i386.cc (ix86_shift_rotate_cost): Add cost model + for shift/rotate implemented using gf2p8affine. + * config/i386/sse.md (VI1_AVX512_3264): New mode iterator. + (3): Add GFNI case for shift patterns. + (cond_3): New pattern. + (3): Dito. + (v16qi): New rotate pattern to handle XOP V16QI case + and GFNI. + (rotl3, rotr3): Exclude V16QI case. + +2025-08-25 Xi Ruoyao + + PR target/121634 + * config/loongarch/simd.md (simd_maddw_evod__): Use + WVEC_HALF instead of WVEC for the mode of the sign_extend for + the rhs of multiplication. + +2025-08-25 Jeff Law + + * ifcvt.cc (noce_try_sign_bit_splat): Fix right shift computation. + +2025-08-23 Sam James + + PR target/120933 + * config.gcc (supported_defaults): Add tls for i386, x86_64. + * config/i386/i386.h (OPTION_DEFAULT_SPECS): Add tls. + * doc/install.texi: Document --with-tls= for i386, x86_64. + +2025-08-23 John Ericson + + * gcc.cc (for_each_path): templated, to make passing lambdas + possible/easy/safe, and to have a polymorphic return type. + (struct add_to_obstack_info): Deleted, lambda captures replace + it. + (add_to_obstack): Moved to lambda in build_search_list. + (build_search_list): Has above lambda now. + (struct file_at_path_info): Deleted, lambda captures replace + it. + (file_at_path): Moved to lambda in find_a_file. + (find_a_file): Has above lambda now. + (struct spec_path_info): Reamed to just struct spec_path. + (struct spec_path): New name. + (spec_path): Rnamed to spec_path::operator() + (spec_path::operator()): New name + (do_spec_1): Updated for_each_path call sites. + +2025-08-22 Kishan Parmar + + PR target/118890 + * config/rs6000/rs6000.cc (can_be_rotated_to_negative_lis): Add bounds + checks for shift counts to prevent undefined behavior. + (rs6000_emit_set_long_const): Likewise. + +2025-08-22 Jeff Law + + PR rtl-optimization/120553 + * ifcvt.cc (noce_try_sign_bit_splat): New function. + (noce_process_if_block): Use it. + +2025-08-22 Richard Biener + + * tree-vect-loop.cc (vectorizable_live_operation): Pass + the representative of the PHIs node to + vect_create_epilog_for_reduction. + +2025-08-22 Richard Biener + + * tree-vect-loop.cc (vectorizable_lane_reducing): Get + reduction info properly. Adjust checks according to + comments. + (vectorizable_reduction): Do not set STMT_VINFO_REDUC_VECTYPE_IN + on the reduc info. + (vect_transform_reduction): Query STMT_VINFO_REDUC_VECTYPE_IN + on the actual reduction stmt, not the info. + +2025-08-22 Pan Li + + * match.pd: Add form 3 for unsigned SAT_MUL. + +2025-08-22 H.J. Lu + + PR target/121635 + * config/i386/i386-features.cc (ix86_emit_tls_call): Emit the + TLS call after NOTE_INSN_FUNCTION_BEG. + +2025-08-22 Richard Biener + + * tree-vect-loop.cc (get_initial_defs_for_reduction): Adjust + comment. + (vect_create_epilog_for_reduction): Get at the reduction + kind via the instance, re-use the slp_reduc flag instead + of checking REDUC_GROUP_FIRST_ELEMENT again. + Remove unreachable code. + (vectorizable_reduction): Compute a reduc_chain flag from + the SLP instance kind, avoid REDUC_GROUP_FIRST_ELEMENT + checks. + (vect_transform_cycle_phi): Likewise. + (vectorizable_live_operation): Check the SLP instance + kind instead of REDUC_GROUP_FIRST_ELEMENT. + +2025-08-22 Richard Biener + + * tree-parloops.cc (parloops_is_simple_reduction): Pass + in double reduction inner loop LC phis and query that. + (parloops_force_simple_reduction): Similar, but set it. + Check for valid reduction types here. + (valid_reduction_p): Remove. + (gather_scalar_reductions): Adjust, fixup double + reduction inner loop processing. + +2025-08-22 Sebastian Huber + + * config/riscv/t-rtems: Add -mstrict-align multilibs for + targets without support for misaligned access in hardware. + +2025-08-21 Dimitar Dimitrov + + * config/pru/pru.cc (pru_init_libfuncs): Set softmpy libgcc + functions for optab multiplication entries if TARGET_OPT_MUL + option is not set. + +2025-08-21 Dimitar Dimitrov + + * config.gcc: Include pru/t-multilib. + * config/pru/pru.h (MULTILIB_DEFAULTS): Define. + * config/pru/t-multilib: New file. + +2025-08-21 Dimitar Dimitrov + + * common/config/pru/pru-common.cc (TARGET_DEFAULT_TARGET_FLAGS): + Keep multiplication, FILL and ZERO instructions enabled by + default. + * config/pru/pru.md (prumov): Gate code generation on + TARGET_OPT_FILLZERO. + (mov): Ditto. + (zero_extendqidi2): Ditto. + (zero_extendhidi2): Ditto. + (zero_extendsidi2): Ditto. + (@pru_ior_fillbytes): Ditto. + (@pru_and_zerobytes): Ditto. + (@di3): Ditto. + (mulsi3): Gate code generation on TARGET_OPT_MUL. + * config/pru/pru.opt: Add mmul and mfillzero options. + * config/pru/pru.opt.urls: Regenerate. + * config/rl78/rl78.opt.urls: Regenerate. + * doc/invoke.texi: Document new options. + +2025-08-21 Richard Sandiford + + PR rtl-optimization/121619 + * rtl-ssa/functions.h (function_info::commit_make_use_available): + Declare. + * rtl-ssa/blocks.cc (function_info::commit_make_use_available): + New function. + * rtl-ssa/changes.cc (function_info::apply_changes_to_insn): Use it. + +2025-08-21 Richard Biener + + PR tree-optimization/111494 + * gimple-fold.h (arith_code_with_undefined_signed_overflow): Declare. + * gimple-fold.cc (arith_code_with_undefined_signed_overflow): Export. + * tree-vect-stmts.cc (vectorizable_operation): Use unsigned + arithmetic for operations participating in a reduction. + +2025-08-21 H.J. Lu + + PR target/121607 + * config/i386/i386-features.cc (ix86_emit_tls_call): Emit the + TLS call after NOTE_INSN_BASIC_BLOCK in a basic block with only + a label. + +2025-08-21 Takayuki 'January June' Suwa + + * config/xtensa/xtensa.md (*btrue_INT_MIN): + Change the branch insn condition to test for a negative number + rather than testing for the MSB. + +2025-08-21 Richard Biener + + * tree-vectorizer.h (vectorizable_phi): Take bb_vec_info. + (vectorizable_early_exit): Take loop_vec_info. + * tree-vect-loop.cc (vectorizable_phi): Adjust. + * tree-vect-slp.cc (vect_slp_analyze_operations): Likewise. + (vectorize_slp_instance_root_stmt): Likewise. + * tree-vect-stmts.cc (vectorizable_early_exit): Likewise. + (vect_transform_stmt): Likewise. + (vect_analyze_stmt): Merge the sequences of vectorizable_* + where common. + +2025-08-20 Qing Zhao + + * common.opt.urls: Regenerate. + +2025-08-20 Qing Zhao + + PR tree-optimization/109071 + PR tree-optimization/85788 + PR tree-optimization/88771 + PR tree-optimization/106762 + PR tree-optimization/108770 + PR tree-optimization/115274 + PR tree-optimization/117179 + * Makefile.in (OBJS): Add diagnostic-context-rich-location.o. + * common.opt (fdiagnostics-show-context): New option. + (fdiagnostics-show-context=): New option. + * diagnostic-context-rich-location.cc: New file. + * diagnostic-context-rich-location.h: New file. + * doc/invoke.texi (fdiagnostics-details): Add + documentation for the new options. + * gimple-array-bounds.cc (check_out_of_bounds_and_warn): Add + one new parameter. Use rich location with details for warning_at. + (array_bounds_checker::check_array_ref): Use rich location with + ditails for warning_at. + (array_bounds_checker::check_mem_ref): Add one new parameter. + Use rich location with details for warning_at. + (array_bounds_checker::check_addr_expr): Use rich location with + move_history_diagnostic_path for warning_at. + (array_bounds_checker::check_array_bounds): Call check_mem_ref with + one more parameter. + * gimple-array-bounds.h: Update prototype for check_mem_ref. + * gimple-ssa-warn-access.cc (warn_string_no_nul): Use rich location + with details for warning_at. + (maybe_warn_nonstring_arg): Likewise. + (maybe_warn_for_bound): Likewise. + (warn_for_access): Likewise. + (check_access): Likewise. + (pass_waccess::check_strncat): Likewise. + (pass_waccess::maybe_check_access_sizes): Likewise. + * gimple-ssa-warn-restrict.cc (pass_wrestrict::execute): Calculate + dominance info for diagnostics show context. + (maybe_diag_overlap): Use rich location with details for warning_at. + (maybe_diag_access_bounds): Use rich location with details for + warning_at. + +2025-08-20 Andrew Pinski + + PR tree-optimization/121568 + * ipa-prop.h (build_ref_for_offset): Remove. + * tree-sra.cc (build_ref_for_offset): Make static. + +2025-08-20 Richard Sandiford + + * config.gcc (aarch64*-*-*): Remove aarch64-cc-fusion.o from + extra_objs. + * config/aarch64/aarch64-passes.def (pass_cc_fusion): Delete. + * config/aarch64/aarch64-protos.h (make_pass_cc_fusion): Delete. + * config/aarch64/t-aarch64 (aarch64-cc-fusion.o): Delete. + * config/aarch64/aarch64-cc-fusion.cc: Delete. + * late-combine.cc (late_combine::optimizable_set): Take a set_info * + rather than an insn_info * and move destination tests from... + (late_combine::combine_into_uses): ...here. Take a set_info * rather + an insn_info *. Take the rtx set. + (late_combine::parallelize_insns, late_combine::combine_cc_setter) + (late_combine::combine_insn): New member functions. + (late_combine::m_parallel): New member variable. + * rtlanal.cc (pattern_cost): Handle sets of CC registers in the + same way as comparisons. + +2025-08-20 Richard Sandiford + + * rtl-ssa/blocks.cc (function_info::create_degenerate_phi): Fix + inverted test of bitmap_set_bit. Call add_live_out_use even + if the register was previously live-out from the predecessor block. + Instead... + (function_info::add_live_out_use): ...check here whether a live-out + use already exists. + +2025-08-20 Richard Sandiford + + * rtl-ssa/accesses.h (use_lookup): New class. + * rtl-ssa/functions.h (function_info::find_def): Expand comment. + (function_info::find_use): Declare. + * rtl-ssa/member-fns.inl (use_lookup::prev_use, use_lookup::next_use) + (use_lookup::matching_use, use_lookup::matching_or_prev_use) + (use_lookup::matching_or_next_use): New member functions. + * rtl-ssa/accesses.cc (function_info::find_use): Likewise. + +2025-08-20 Richard Biener + + PR tree-optimization/114480 + * cfganal.cc (compute_idf): Split processing of the initial + workset from the main iteration. Use a vector for the + workset of the main iteration. + +2025-08-20 Georg-Johann Lay + + PR target/121608 + * config/avr/specs.h (LINK_RELAX_SPEC): Wrap in %{!r...}. + +2025-08-20 Richard Biener + + * tree-vect-slp.cc (vect_analyze_slp_reduc_chain): New, + copied from vect_analyze_slp_instance and only handle + slp_inst_kind_reduc_chain. Inline vect_build_slp_instance. + (vect_analyze_slp_instance): Only handle slp_inst_kind_store. + Inline vect_build_slp_instance. + (vect_build_slp_instance): Remove now unused stmt_info parameter, + remove special code for store groups and reduction chains. + (vect_analyze_slp): Call vect_analyze_slp_reduc_chain + for reduction chain SLP build and adjust. + +2025-08-20 Richard Biener + + * tree-vect-data-refs.cc (vect_check_gather_scatter): + Remove restriction on epilogue of epilogue vectorization. + +2025-08-20 Richard Biener + + * tree-vect-loop.cc (update_epilogue_loop_vinfo): Remove + fixing up pattern stmt operands and gather/scatter DR_REFs. + (find_in_mapping): Remove. + +2025-08-20 Richard Biener + + * tree-vectorizer.h (vect_load_store_data): New. + (_slp_tree::memory_access_type): Remove. + (SLP_TREE_MEMORY_ACCESS_TYPE): Turn into inline function. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Do not + initialize SLP_TREE_MEMORY_ACCESS_TYPE. + * tree-vect-stmts.cc (check_load_store_for_partial_vectors): + Remove gather_scatter_info pointer argument, instead get + info from the SLP node. + (vect_build_one_gather_load_call): Get SLP node and builtin + decl as argument and remove uses of gather_scatter_info. + (vect_build_one_scatter_store_call): Likewise. + (vect_get_gather_scatter_ops): Remove uses of gather_scatter_info. + (vect_get_strided_load_store_ops): Get SLP node and remove + uses of gather_scatter_info. + (get_load_store_type): Take pointer to vect_load_store_data + instead of individual pointers. + (vectorizable_store): Adjust. Re-use get_load_store_type + result from analysis time. + (vectorizable_load): Likewise. + +2025-08-20 H.J. Lu + + PR target/121572 + * config/i386/i386-features.cc (replace_tls_call): Add a bitmap + argument and put the updated TLS instruction in the bitmap. + (ix86_get_dominator_for_reg): New. + (ix86_check_flags_reg): Likewise. + (ix86_emit_tls_call): Likewise. + (ix86_place_single_tls_call): Add 2 bitmap arguments for updated + GNU and GNU2 TLS instructions. Call ix86_emit_tls_call to emit + TLS instruction. Correct debug dump for before instruction. + +2025-08-19 Andrew Pinski + + PR middle-end/120024 + * calls.cc (expand_call): Remove start_sequence/end_sequence + for ECF_MALLOC. + Check valreg before deferencing it when it comes to malloc like + functions. Use the return value of emit_move_insn instead of + calling get_last_insn. + +2025-08-19 Richard Biener + + * tree-parloops.cc (parloops_is_slp_reduction): Remove. + (parloops_is_simple_reduction): Do not call it. + +2025-08-19 Richard Biener + + * tree-vect-loop.cc (vectorizable_reduction): Pass the + appropriate SLP node for costing of single-def-use-cycle + operations. + (vectorizable_live_operation): Pass the SLP node to the + costing hook. + * tree-vect-stmts.cc (vectorizable_bswap): Likewise. + (vectorizable_store): Likewise. + +2025-08-19 Richard Biener + + PR tree-optimization/121592 + * tree-vect-slp.cc (vect_analyze_slp): When SLP reduction chain + discovery fails, fail overall when the tail of the chain + isn't also the entry for the non-SLP reduction. + +2025-08-19 Richard Biener + + * config.gcc (riscv*-*-*): Look for python3, then fall back + to python. Never use python2. + +2025-08-19 Richard Biener + + PR tree-optimization/121527 + * tree-sra.cc (build_access_from_expr_1): Do not strip an + outer VIEW_CONVERT_EXPR as it's relevant for the size of + the access. + (get_access_for_expr): Likewise. + +2025-08-19 Tamar Christina + + PR target/121536 + * config/aarch64/aarch64.cc (aarch64_bool_compound_p): Use + SLP_TREE_VECTYPE instead of STMT_VINFO_VECTYPE. + (aarch64_adjust_stmt_cost, aarch64_vector_costs::count_ops): Pass SLP + node to aarch64_bool_compound_p. + +2025-08-19 Tamar Christina + + PR target/121536 + * tree-vect-loop.cc (vectorizable_phi, vectorizable_recurr, + vectorizable_nonlinear_induction, vectorizable_induction): Pass slp_node + instead of stmt_info to record_stmt_cost. + +2025-08-19 Tamar Christina + + PR target/121536 + * config/aarch64/aarch64.cc (aarch64_vector_costs::add_stmt_cost): Set + vectype from type of lhs of gimple stmt. + +2025-08-18 Andrew Pinski + + PR middle-end/121581 + * doc/extend.texi (__builtin_object_size): Fix example. + +2025-08-18 Indu Bhagat + + * flag-types.h (sanitize_code_type): Define. + * asan.h (sanitize_flags_p): Use 'sanitize_code_type' instead of + 'unsigned int'. + * common.opt: Likewise. + * dwarf2asm.cc (dw2_output_indirect_constant_1): Likewise. + * opts.cc (find_sanitizer_argument): Likewise. + (report_conflicting_sanitizer_options): Likewise. + (parse_sanitizer_options): Likewise. + (parse_no_sanitize_attribute): Likewise. + * opts.h (parse_sanitizer_options): Likewise. + (parse_no_sanitize_attribute): Likewise. + * tree-cfg.cc (print_no_sanitize_attr_value): Likewise. + * tree.cc (tree_fits_sanitize_code_type_p): Define. + (tree_to_sanitize_code_type): Likewise. + * tree.h (tree_fits_sanitize_code_type_p): Declare. + (tree_to_sanitize_code_type): Likewise. + +2025-08-18 Indu Bhagat + + * config/aarch64/aarch64.md (MEMTAG_TAG_MASK): New define + constant. + (MEMTAG_ADDR_MASK): Likewise. + (irg, subp, ldg): Use new constants. + +2025-08-18 Richard Sandiford + + PR rtl-optimization/97497 + * function-abi.h (predefined_function_abi::only_partial_reg_clobbers) + (function_abi::only_partial_reg_clobbers): New member functions. + * gcse-common.cc: Include regs.h and function-abi.h. + (compute_transp): Check for partially call-clobbered registers + and treat them as not being transparent in blocks with calls. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (UNSPEC_TI_FETCH_ADD): New unspec. + (UNSPEC_TI_FETCH_SUB): Likewise. + (UNSPEC_TI_FETCH_AND): Likewise. + (UNSPEC_TI_FETCH_XOR): Likewise. + (UNSPEC_TI_FETCH_OR): Likewise. + (UNSPEC_TI_FETCH_NAND_MASK_INVERTED): Likewise. + (ALL_SC): New define_mode_iterator. + (_scq): New define_mode_attr. + (atomic_fetch_nand): Accept ALL_SC instead of only GPR. + (UNSPEC_TI_FETCH_DIRECT): New define_int_iterator. + (UNSPEC_TI_FETCH): New define_int_iterator. + (amop_ti_fetch): New define_int_attr. + (size_ti_fetch): New define_int_attr. + (atomic_fetch_ti_scq): New define_insn. + (atomic_fetch_ti): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_exchangeti_scq): New + define_insn. + (atomic_exchangeti): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_compare_and_swapti_scq): New + define_insn. + (atomic_compare_and_swapti): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Accept "%t" for printing the number of the 64-bit machine + register holding the upper half of a TImode. + * config/loongarch/sync.md (atomic_storeti_scq): New + define_insn. + (atomic_storeti): expand to atomic_storeti_scq if !ISA_HAS_LSX. + +2025-08-18 Xi Ruoyao + + * config/loongarch/genopts/isa-evolution.in (scq): New evolution + feature. + * config/loongarch/loongarch-evolution.cc: Regenerate. + * config/loongarch/loongarch-evolution.h: Regenerate. + * config/loongarch/loongarch-str.h: Regenerate. + * config/loongarch/loongarch.opt: Regenerate. + * config/loongarch/loongarch.opt.urls: Regenerate. + * config/loongarch/loongarch-def.cc: Make -mscq the default for + -march=la664 and -march=la64v1.1. + * doc/invoke.texi (LoongArch Options): Document -m[no-]scq. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_storeti_lsx): New + define_insn. + (atomic_storeti): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_loadti_lsx): New define_insn. + (atomic_loadti): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md + (atomic_fetch_nand_mask_inverted): New define_insn. + (atomic_fetch_nand): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_fetch_sub): + Disable if ISA_HAS_LAM_BH. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (UNSPEC_COMPARE_AND_SWAP_AND): + Remove. + (UNSPEC_COMPARE_AND_SWAP_XOR): Remove. + (UNSPEC_COMPARE_AND_SWAP_OR): Remove. + (atomic_test_and_set): Rename to ... + (atomic_fetch_): ... this, and + adapt the expansion to use it for any bitwise operations and any + val, instead of just ior 1. + (atomic_test_and_set): New define_expand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_test_and_set): Remove + unneeded andi instruction from the expansion. + +2025-08-18 Xi Ruoyao + + * config/loongarch/loongarch.cc (loongarch_print_operand_reloc): + Make "%T" output a comment marker if the operand is a memory + order for which the barrier won't be generated; remove "%t". + * config/loongarch/sync.md (atomic_cas_value_strong): Add + %T before "b 3f". + (atomic_cas_value_cmp_and_7_): Likewise. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (UNSPEC_COMPARE_AND_SWAP_AMCAS): New + UNSPEC code. + (atomic_cas_value_strong): NFC, update the comment to note + we only need to consider failure memory order. + (atomic_cas_value_strong_amcas): Use + UNSPEC_COMPARE_AND_SWAP_AMCAS instead of + UNSPEC_COMPARE_AND_SWAP. + (atomic_compare_and_swap): Pass failure memorder to + gen_atomic_cas_value_strong. + (atomic_compare_and_swap): Pass failure memorder to + gen_atomic_cas_value_cmp_and_7_si. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_test_and_set): Use bstrins + for masking the address if possible. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_load): Remove "+" for + the memory operand. + (atomic_store): Use "=" instead of "+" for the memory + operand. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md: Use instead of . + (amo): Remove. + +2025-08-18 Xi Ruoyao + + * config/loongarch/sync.md (atomic_optab): Remove. + (atomic_): Change atomic_optab to amop. + (atomic_fetch_): Likewise. + +2025-08-17 Austin Law + + PR target/121213 + * config/riscv/sync.md (amo_atomic_exchange): Allow + (const_int 0) as input operand. Do not tie input to output. + No longer earlyclobber the output. + +2025-08-17 Artemiy Volkov + + * regrename.cc (scan_rtx_reg): Handle fused insn pairs. + +2025-08-17 Jan Dubiec + + PR target/109324 + * config/h8300/addsub.md: Explicitly specify mode for plus operation. + * config/h8300/jumpcall.md: Explicitly specify modes for eq and + match_operand operations. + * config/h8300/testcompare.md: Explicitly specify modes for eq, ltu + and compare operations. + +2025-08-17 Artemiy Volkov + + * ira-conflicts.cc (add_insn_allocno_copies): Handle fused insn pairs. + * rtl.h (single_output_fused_pair_p): Declare new function. + * rtlanal.cc (single_output_fused_pair_p): Define it. + +2025-08-17 Dimitar Dimitrov + + PR target/121538 + * config/riscv/arch-canonicalize (parse_dep_exts): + Match condition block up to closing brace. + (test_parse_long_condition_block): New test. + +2025-08-17 H.J. Lu + + PR target/121541 + * config/i386/i386-options.cc + (ix86_valid_target_attribute_inner_p): Add target("80387") + attribute. Set the mask bit in opts_set->x_target_flags if the + mask bit in opts->x_target_flags is updated. + * doc/extend.texi: Document target("80387") function attribute. + +2025-08-17 Pan Li + + * config/riscv/autovec-opt.md: Add supported insn + of vx combine. + +2025-08-17 Pan Li + + * config/riscv/autovec-opt.md: Add missed DONE + for vx combine pattern. + +2025-08-15 Jakub Jelinek + + * doc/extend.texi (Type Traits): Document + __builtin_structured_binding_size. + +2025-08-15 Jakub Jelinek + + PR c++/121552 + * doc/invoke.texi (-Wno-non-c-typedef-for-linkage): Document. + +2025-08-15 Robert Dubner + + * real.cc (real_to_decimal_for_mode): Set str[0] to known value. + +2025-08-15 Vineet Gupta + + PR target/121534 + * config/riscv/riscv.md (round_pattern): save/restore fflags. + +2025-08-15 Qing Zhao + + * doc/extend.texi: Extend counted_by attribute to pointer fields in + structures. Add one more requirement to pointers with counted_by + attribute. + +2025-08-15 Umesh Kalappa + + * config/riscv/riscv-ext-mips.def (DEFINE_RISCV_EXT): + Added mips prefetch extension. + * config/riscv/riscv-ext.opt: Generated file. + * config/riscv/riscv.md (prefetch): + Added mips prefetch address operand constraint. + * config/riscv/constraints.md: Added mips specific constraint. + * config/riscv/predicates.md (prefetch_operand): + Updated for mips nine bits offset. + * config/riscv/riscv.cc (riscv_prefetch_offset_address_p): + Legitimate address with offset for prefetch check. + * config/riscv/riscv-protos.h: Likewise. + * config/riscv/riscv.h: + Macros to support for mips cached type. + * doc/riscv-ext.texi: Updated for mips prefetch. + +2025-08-15 Richard Sandiford + + * config/riscv/riscv-protos.h + (riscv_process_target_version_attr): Change location_t argument + to location_t *. + * config/riscv/riscv-subset.h + (riscv_subset_list::riscv_subset_list): Change location_t argument + to location_t *. + (riscv_subset_list::parse): Likwise. + (riscv_subset_list::set_loc): Likewise. + (riscv_minimal_hwprobe_feature_bits): Likewise. + (riscv_subset_list::m_loc): Change type to location_t. + * common/config/riscv/riscv-common.cc + (riscv_subset_list::riscv_subset_list): Change location_t argument + to location_t *. + (riscv_subset_list::add): Suppress diagnostics when m_loc is null. + (riscv_subset_list::parsing_subset_version): Likewise. + (riscv_subset_list::parse_profiles): Likewise. + (riscv_subset_list::parse_base_ext): Likewise. + (riscv_subset_list::parse_single_std_ext): Likewise. + (riscv_subset_list::check_conflict_ext): Likewise. + (riscv_subset_list::parse_single_multiletter_ext): Likewise. + (riscv_subset_list::parse): Change location_t argument to location_t *. + (riscv_subset_list::set_loc): Likewise. + (riscv_minimal_hwprobe_feature_bits): Likewise. + (riscv_parse_arch_string): Update call accordingly. + * config/riscv/riscv-target-attr.cc + (riscv_target_attr_parser::m_loc): Change type to location_t *. + (riscv_target_attr_parser::riscv_target_attr_parser): Change + location_t argument to location_t *. + (riscv_process_one_target_attr): Likewise. + (riscv_process_target_attr): Likewise. + (riscv_process_target_version_attr): Likewise. + (riscv_target_attr_parser::parse_arch): Suppress diagnostics when + m_loc is null. + (riscv_target_attr_parser::handle_arch): Likewise. + (riscv_target_attr_parser::handle_cpu): Likewise. + (riscv_target_attr_parser::handle_tune): Likewise. + (riscv_target_attr_parser::handle_priority): Likewise. + (riscv_option_valid_attribute_p): Update call accordingly. + (riscv_option_valid_version_attribute_p): Likewise. + * config/riscv/riscv.cc (parse_features_for_version): Add a + location_t * argument. + (dispatch_function_versions): Update call accordingly. + (riscv_compare_version_priority): Likewise, suppressing diagnostics. + +2025-08-15 Kito Cheng + + * tree-nrv.cc (pass_return_slot::execute): Use + gimple_call_fntype instead of gimple_call_fndecl. + +2025-08-15 Lulu Cheng + + PR target/121542 + * config/loongarch/loongarch.cc + (loongarch_vector_costs::add_stmt_cost): When using vectype, + first determine whether it is NULL. + +2025-08-14 Jeff Law + + PR target/119275 + * config/riscv/riscv.cc (riscv_legitimize_move): Avoid calling + gen_lowpart for cases where it'll fail. Just use standard expander + paths for those cases. + +2025-08-14 Mikael Pettersson + + PR target/121336 + * config/cris/cris.h: Do not abbreviate --emulation. + +2025-08-14 Richard Sandiford + + PR target/121501 + * config/rs6000/rs6000.md (cmprb, setb_signed, setb_unsigned) + (cmprb2, cmpeqb): Add missing modes to nested if_then_elses. + +2025-08-14 Andrew Pinski + + PR tree-optimization/121474 + * passes.def: Update forwprop1/2 to have full_walk to be true. + * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Add new argument + full_walk. Take into account the full_walk and clobbers at the end + of the limit can be done always. + (simplify_builtin_call): Add new argument, full_walk. + Update call to optimize_aggr_zeroprop. + (pass_forwprop): Add m_full_walk field. + (pass_forwprop::set_pass_param): Update for m_full_walk. + (pass_forwprop::execute): Update call to simplify_builtin_call + and optimize_aggr_zeroprop. + +2025-08-14 Andrew Pinski + + * tree-ssa-forwprop.cc (optimize_agr_copyprop_1): New function split out of ... + (optimize_agr_copyprop): Here. Also try calling optimize_agr_copyprop_arg. + (optimize_agr_copyprop_arg): New function. + +2025-08-14 Stefan Schulze Frielinghaus + + * config/s390/s390.md: Merge movdi_zero_extend_A and + movsi_zero_extend_A into zero_extendsidi2 and + zero_extendhi2_z10 and + zero_extend2_extimm. + * config/s390/vector.md (*movdi_zero_extend_A): Remove. + (*movsi_zero_extend_A): Remove. + (*movdi_zero_extend_B): Move to vec_extract patterns and + rename to *vec_extract_zero_extend. + (*movsi_zero_extend_B): Ditto. + +2025-08-14 H.J. Lu + + PR target/121540 + * config/i386/i386-options.cc (ix86_set_current_function): + Properly check if MMX and 80387 are enabled. + +2025-08-13 Jeff Law + + PR target/121531 + * config/riscv/sifive-p400.md (sifive_p400_unknown): New reservation. + * config/riscv/sifive-p600.md (sifive_p600_unkonwn): Likewise. + +2025-08-13 David Malcolm + + * diagnostics/output-spec.cc (sarif_scheme_handler::make_sink): + Populate sarif_generation_options instance directly, rather than + through local variables. + (sarif_scheme_handler::make_sarif_gen_opts): Drop. + (html_scheme_handler::make_sink): Populate html_generation_options + instance directly, rather than through local variables. + +2025-08-13 Andrew Pinski + + * config.gcc: Mark epiphany*-*-* and rl78*-*-* as + obsolete targets. + +2025-08-13 H.J. Lu + + PR target/81501 + * config/i386/i386-features.cc (x86_cse_kind): Add X86_CSE_TLS_GD, + X86_CSE_TLS_LD_BASE and X86_CSE_TLSDESC. + (redundant_load): Renamed to ... + (redundant_pattern): This. + (ix86_place_single_vector_set): Replace redundant_load with + redundant_pattern. + (replace_tls_call): New. + (ix86_place_single_tls_call): Likewise. + (pass_remove_redundant_vector_load): Renamed to ... + (pass_x86_cse): This. Add val, def_insn, mode, scalar_mode, kind, + x86_cse, candidate_gnu_tls_p, candidate_gnu2_tls_p and + candidate_vector_p. + (pass_x86_cse::candidate_gnu_tls_p): New. + (pass_x86_cse::candidate_gnu2_tls_p): Likewise. + (pass_x86_cse::candidate_vector_p): Likewise. + (remove_redundant_vector_load): Renamed to ... + (pass_x86_cse::x86_cse): This. Extend to remove redundant TLS + calls. + (make_pass_remove_redundant_vector_load): Renamed to ... + (make_pass_x86_cse): This. + * config/i386/i386-passes.def: Replace + pass_remove_redundant_vector_load with pass_x86_cse. + * config/i386/i386-protos.h (ix86_tls_get_addr): New. + (make_pass_remove_redundant_vector_load): Renamed to ... + (make_pass_x86_cse): This. + * config/i386/i386.cc (ix86_tls_get_addr): Remove static. + * config/i386/i386.h (machine_function): Add + tls_descriptor_call_multiple_p. + * config/i386/i386.md (tls64): New attribute. + (@tls_global_dynamic_64_): Set tls_descriptor_call_multiple_p. + (@tls_local_dynamic_base_64_): Likewise. + (@tls_dynamic_gnu2_64_): Likewise. + (*tls_global_dynamic_64_): Set tls64 attribute to gd. + (*tls_local_dynamic_base_64_): Set tls64 attribute to ld_base. + (*tls_dynamic_gnu2_lea_64_): Set tls64 attribute to lea. + (*tls_dynamic_gnu2_call_64_): Set tls64 attribute to call. + (*tls_dynamic_gnu2_combine_64_): Set tls64 attribute to + combine. + +2025-08-13 Iain Sandoe + + * config.in: Regenerate. + * config/darwin.h (DARWIN_LD_NO_DEDUPLICATE): New. + (LINK_SPEC): Handle -no_deduplicate. + * configure: Regenerate. + * configure.ac: Detect linker support for -no_deduplicate. + +2025-08-13 Iain Sandoe + + * config/darwin-sections.def (asan_string_section, + asan_globals_section, asan_liveness_section): New. + * config/darwin.cc (objc_method_decl): Use asan sections + when asan is enabled. + (darwin_encode_section_info): Alter string constant + linker visibility depending on asan. + (machopic_select_section): Use the asan sections when + asan is enabled. + +2025-08-13 Jeff Law + + PR target/121160 + * config/riscv/riscv.cc (canonicalize_comparands); Tighten check for + forcing value into a GPR. + +2025-08-13 Andrew Pinski + + PR tree-optimization/121474 + * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Move the check + for limit before the alias check. + +2025-08-13 Richard Biener + + * tree-vectorizer.h (SLP_TREE_PERMUTE_P): New. + * tree-vect-slp-patterns.cc (linear_loads_p): Adjust. + (vect_detect_pair_op): Likewise. + (addsub_pattern::recognize): Likewise. + * tree-vect-slp.cc (vect_print_slp_tree): Likewise. + (vect_gather_slp_loads): Likewise. + (vect_is_slp_load_node): Likewise. + (optimize_load_redistribution_1): Likewise. + (vect_optimize_slp_pass::is_cfg_latch_edge): Likewise. + (vect_optimize_slp_pass::internal_node_cost): Likewise. + (vect_optimize_slp_pass::start_choosing_layouts): Likewise. + (vect_optimize_slp_pass::backward_cost): Likewise. + (vect_optimize_slp_pass::forward_pass): Likewise. + (vect_optimize_slp_pass::get_result_with_layout): Likewise. + (vect_optimize_slp_pass::materialize): Likewise. + (vect_optimize_slp_pass::dump): Likewise. + (vect_optimize_slp_pass::decide_masked_load_lanes): Likewise. + (vect_update_slp_vf_for_node): Likewise. + (vect_slp_analyze_node_operations_1): Likewise. + (vect_schedule_slp_node): Likewise. + (vect_schedule_scc): Likewise. + * tree-vect-stmts.cc (vect_analyze_stmt): Likewise. + (vect_transform_stmt): Likewise. + (vect_is_simple_use): Likewise. + +2025-08-13 Richard Biener + + * tree-vect-stmts.cc (vect_analyze_stmt): Use + SLP_TREE_DEF_TYPE instead of STMT_VINFO_DEF_TYPE. + +2025-08-13 Richard Biener + + * tree-vectorizer.h (vect_memory_access_type): Replace + VMAT_GATHER_SCATTER with three separate access types, + VMAT_GATHER_SCATTER_LEGACY, VMAT_GATHER_SCATTER_IFN and + VMAT_GATHER_SCATTER_EMULATED. + (mat_gather_scatter_p): New predicate. + (GATHER_SCATTER_LEGACY_P): Remove. + (GATHER_SCATTER_IFN_P): Likewise. + (GATHER_SCATTER_EMULATED_P): Likewise. + * tree-vect-stmts.cc (check_load_store_for_partial_vectors): + Adjust. + (get_load_store_type): Likewise. + (vect_get_loop_variant_data_ptr_increment): Likewise. + (vectorizable_store): Likewise. + (vectorizable_load): Likewise. + * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost): + Likewise. + * config/riscv/riscv-vector-costs.cc + (costs::need_additional_vector_vars_p): Likewise. + * config/aarch64/aarch64.cc (aarch64_detect_vector_stmt_subtype): + Likewise. + (aarch64_vector_costs::count_ops): Likewise. + (aarch64_vector_costs::add_stmt_cost): Likewise. + +2025-08-13 Richard Biener + + * tree-vectorizer.h (vect_supportable_dr_alignment): Pass + a bool instead of a pointer to gather_scatter_info. + * tree-vect-data-refs.cc (vect_supportable_dr_alignment): + Likewise. + * tree-vect-stmts.cc (get_load_store_type): Adjust. + +2025-08-13 Lulu Cheng + + PR target/120476 + * config/loongarch/loongarch.cc + (loongarch_compute_pressure_classes): New function. + (TARGET_COMPUTE_PRESSURE_CLASSES): Define. + +2025-08-13 Yang Yujie + + PR target/117599 + * config/loongarch/loongarch.h: Define a PROMOTE_MODE case for + small _BitInts. + * config/loongarch/loongarch.cc (loongarch_promote_function_mode): + Same. + (loongarch_bitint_type_info): New function. + (TARGET_C_BITINT_TYPE_INFO): Declare. + +2025-08-13 Jeff Law + + PR target/121113 + * config/riscv/sifive-p400.md: Handle HFmode for fdiv/fsqrt. + * config/riscv/sifive-p600.md: Likewise. + * config/riscv/xiangshan.md: Likewise. + +2025-08-13 H.J. Lu + Liu, Hongtao + + PR target/121497 + * config/i386/i386-features.cc (ix86_broadcast_inner): Convert + integer constant to mode of move + +2025-08-12 Pan Li + + * config/riscv/autovec-opt.md (*merge_vx_): Add new + pattern to combine the vmerge.vxm. + +2025-08-12 Robin Dapp + + PR target/121334 + * config/riscv/riscv-v.cc (expand_slide1up): New function. + (expand_vector_init_trailing_same_elem): Use new function. + (expand_const_vector_onestep): New function. + (expand_const_vector): Uew expand_slide1up. + (expand_vector_init_merge_repeating_sequence): Ditto. + (shuffle_off_by_one_patterns): Ditto. + +2025-08-12 mengqinggang + + * config/loongarch/loongarch-def.h (ABI_BASE_LP64D): New macro. + (ABI_BASE_LP64F): New macro. + (ABI_BASE_LP64S): New macro. + (N_ABI_BASE_TYPES): New macro. + +2025-08-12 Richard Biener + + * tree-vect-loop.cc (vect_analyze_loop_2): Change + slp_done_for_suggested_uf to a boolean + single_lane_slp_done_for_suggested_uf. Change slp + to force_single_lane boolean. + (vect_analyze_loop_1): Adjust similarly. + +2025-08-12 Richard Sandiford + + PR rtl-optimization/121253 + * fwprop.cc (forward_propagate_into): Don't propagate asm defs. + +2025-08-12 Richard Biener + + PR tree-optimization/121509 + * tree-vect-stmts.cc (vect_mark_stmts_to_be_vectorized): + Fail early when we detect a relevant but not handled PHI. + +2025-08-12 Richard Biener + + PR tree-optimization/121514 + * tree-ssa-sccvn.cc (visit_nary_op): Only call + vn_nary_op_insert_stmt for SSA name result. + +2025-08-12 Andrew Pinski + + PR tree-optimization/121494 + * tree-ssa-forwprop.cc (optimize_agr_copyprop): Mark the bb of the use + stmt if needed for eh cleanup. + +2025-08-12 Richard Biener + + * tree-vect-stmts.cc (vect_analyze_stmt): Only set + STMT_VINFO_VECTYPE for dataref SLP representatives. + Clear it for others and do not restore the original value. + (vect_transform_stmt): Likewise. + +2025-08-12 Richard Biener + + * tree-vect-loop.cc (get_initial_defs_for_reduction): + Get vector type as argument. + (vect_find_reusable_accumulator): Likewise. + (vect_transform_cycle_phi): Adjust. + +2025-08-12 Richard Biener + + * tree-vect-loop.cc (vectorizable_reduction): Replace + STMT_VINFO_VECTYPE use with SLP_TREE_VECTYPE. + +2025-08-12 Richard Biener + + PR tree-optimization/121493 + * tree-ssa-sccvn.cc (vn_reference_lookup_3): Opportunistically + strip components with known offset. + +2025-08-12 Richard Biener + + * tree-ssa-sccvn.cc (vn_reference_lookup_3): When we fail to + match up the two base MEM_REFs, fail. + +2025-08-12 Shreya Munnangi + Jeff Law + Philipp Tomsich + + * config/riscv/riscv-protos.h (synthesize_add): Add prototype. + * config/riscv/riscv.cc (synthesize_add): New function. + * config/riscv/riscv.md (addsi3): Allow any constant as operands[2] + in the expander. Force the constant into a register as needed for + TARGET_64BIT. Use synthesize_add for !TARGET_64BIT. + (*adddi3): Renamed from adddi3. + (adddi3): New expander. Use synthesize_add. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.md (movcc): Accept MODE_CC + conditions directly; reject QI/HImode conditions. + +2025-08-11 Richard Henderson + + PR target/121388 + * config/aarch64/aarch64.cc (aarch64_cb_rhs): Restrict the + range of LT/GE and GT/LE to their intersections. + * config/aarch64/aarch64.md (*aarch64_cb): Unexport. + Use cmpbr_imm_predicate instead of aarch64_cb_rhs. + * config/aarch64/constraints.md (Uc1): Accept 0..62. + (Uc2): Remove. + * config/aarch64/iterators.md (cmpbr_imm_predicate): New. + (cmpbr_imm_constraint): Update to match aarch64_cb_rhs. + * config/aarch64/predicates.md (aarch64_cb_reg_i63_operand): New. + (aarch64_cb_reg_i62_operand): New. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.cc (aarch64_if_then_else_costs): + Use aarch64_cb_rhs to match CB insns. + +2025-08-11 Richard Henderson + + PR target/121385 + * config/aarch64/aarch64.md (*aarch64_tbz1): Remove + cc clobber and expansion via TST+Bcond. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.h (TARGET_CMPBR): False when + aarch64_track_speculation is true. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.cc (aarch64_gen_compare_split_imm24): New. + * config/aarch64/aarch64-protos.h: Update. + * config/aarch64/aarch64.md (*aarch64_bcond_wide_imm): Use it. + Add match_scratch and cc clobbers. Use match_operator instead of + iterator expansion. + (*compare_cstore_insn): Likewise. + +2025-08-11 Richard Henderson + + * config/aarch64/predicates.md (aarch64_split_imm24): Rename from + aarch64_imm24; exclude aarch64_move_imm and aarch64_uimm12_shift. + * config/aarch64/aarch64.md (*aarch64_bcond_wide_imm): + Update for aarch64_split_imm24. + (*compare_cstore_insn): Likewise. + * config/aarch64/aarch64.cc (aarch64_if_then_else_costs): Likewise. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.md (tbranch_3): Remove. + (save_stack_nonlocal): Use aarch64_gen_compare_zero_and_branch. + (restore_stack_nonlocal): Likewise. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.cc + (aarch64_gen_compare_zero_and_branch): Export. + * config/aarch64/aarch64-protos.h + (aarch64_gen_compare_zero_and_branch): Declare it. + * config/aarch64/aarch64-sme.md (aarch64_restore_za): Use it. + * config/aarch64/aarch64.md (*aarch64_cbz): Unexport. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.cc (aarch64_if_the_else_costs): Reorg to + include the cost of inner within TBZ sign-bit test, only match + CBZ/CBNZ with valid modes, and both for the aarch64_imm24 test. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.cc (aarch64_if_then_else_costs): Remove + else after return and re-indent. + +2025-08-11 Richard Henderson + + * config/aarch64/aarch64.md (BRANCH_LEN_N_1KiB): Rename + from BRANCH_LEN_N_1Kib. + +2025-08-11 Andrew Pinski + + * tree-ssa-forwprop.cc (optimize_aggr_zeroprop): Recognize stores + of integer_zerop as memset of 0. + +2025-08-11 Jeff Law + + * doc/sourcebuild.texi: Add riscv_b_ok and riscv_v_ok target selectors. + +2025-08-11 Richard Biener + + PR tree-optimization/121488 + * tree-ssa-sccvn.cc (visit_nary_op): If the BIT_FIELD_REF + result is of wrong type, try a VIEW_CONVERT_EXPR around it. + +2025-08-11 Richard Biener + + PR tree-optimization/121362 + * tree-ssa-sccvn.cc (vn_reference_lookup_3): Swap index + guards on component stripping loops. + +2025-08-11 Pan Li + + * tree-ssa-math-opts.cc (match_saturation_mul): Add new func + to emit IFN_SAT_MUL if matched. + (math_opts_dom_walker::after_dom_children): Try to match + the phi node for SAT_MUL. + +2025-08-11 Pan Li + + * match.pd: Add form 2 for unsigned SAT_MUL. + +2025-08-11 Pan Li + + * config/riscv/riscv.cc (get_vector_binary_rtx_cost): Remove. + (riscv_rtx_costs): Refactor to serach vec_duplicate on the + sub rtx. + +2025-08-11 Andre Vieira + + PR target/121464 + * config/arm/arm.md (arm_, arm_): Fix operand check. + +2025-08-11 Richard Sandiford + + PR rtl-optimization/121306 + * simplify-rtx.cc (simplify_context::simplify_subreg): Distribute + non-narrowing integer-to-integer subregs through logic ops, + in a similar way to the existing word_mode handling. + +2025-08-11 Jakub Jelinek + + * tree-cfg.cc (find_case_label_for_value): Fix comment typo, + singe-valued -> single-valued. + * config/arc/arc.md: Fix comment typos, unsinged -> unsigned. + +2025-08-10 Matthew Fortune + + * config/mips/mips.h (FRAME_GROWS_DOWNWARD) Allow the frame to + grow downwards for mips16 when -mgrow-frame-downwards is set. + * config/mips/mips.opt: Add -mgrow-frame-downwards option. + +2025-08-10 Andrew Pinski + + PR middle-end/121438 + PR middle-end/121444 + * output.h (MAX_ALIGN_MERGABLE): Rename to ... + (MAX_MERGEABLE_BITSIZE): This. + * tree-switch-conversion.cc (switch_conversion::build_one_array): Don't + increase the alignment. + * varasm.cc (mergeable_string_section): Use MAX_MERGEABLE_BITSIZE + instead of MAX_ALIGN_MERGABLE. Also replace `/ 8` with `/ BITS_PER_UNIT`. + (mergeable_constant_section): Select the mergeable section based on + the bitsize rather than the alignment. Make sure the align is less + than the entity size. + +2025-08-10 Andrew Pinski + + PR middle-end/121394 + * varasm.cc (assemble_variable_contents): Pad out + mergeable sections if needed. + (output_constant_pool_1): Change the padding to be explicit + zeroing for mergeable sections. + +2025-08-10 H.J. Lu + + * configure: Regenerated. + +2025-08-09 Iain Sandoe + + * config/darwin.cc (darwin_encode_section_info): Do not + make anchored symbols linker-visible. + (darwin_use_anchors_for_symbol_p): Disallow anchoring on + symbols that must be linker-visible (or external), even + if the definitions are in this TU. + +2025-08-09 Iain Sandoe + + * config/darwin.h (ASM_GENERATE_INTERNAL_LABEL): New + entry for LANCHOR. + +2025-08-09 David Malcolm + + * diagnostics/context.cc (context::dump): Bulletproof against + m_reference_printer being null. + * diagnostics/dumping.cc (emit_field): Replace + with... + (emit_string_field): ...this. + (emit_field): Eliminate. + (emit_field): Replace with... + (emit_bool_field): ...this. + (emit_field): Replace with... + (emit_size_t_field): ...this, and use HOST_SIZE_T_PRINT_DEC rather + than %zi in fprintf call. + (emit_field): Replace with... + (emit_int_field): ...this. + (emit_field): Replace with... + (emit_unsigned_field): ...this. + * diagnostics/dumping.h (emit_field): Replace this template decl + with... + (emit_string_field): ...this, + (emit_bool_field): ...this, + (emit_size_t_field): ...this, + (emit_int_field): ...this, + (emit_unsigned_field): ... and this. + (DIAGNOSTICS_DUMPING_EMIT_FIELD): Rename to... + (DIAGNOSTICS_DUMPING_EMIT_BOOL_FIELD): ...this and update for + above change. + * diagnostics/file-cache.cc (file_cache_slot::dump): Replace + emit_field calls with calls that explicitly state the type. Fix + type of dump of m_missing_trailing_newline to use bool. + (file_cache_slot::dump): Use HOST_SIZE_T_PRINT_DEC rather than + %zi in fprintf call. + * diagnostics/html-sink.cc (html_generation_options::dump): Update + for macro renaming. + * diagnostics/sarif-sink.cc + (sarif_serialization_format_json::dump): Likewise. + (sarif_generation_options::dump): Likewise, and for function + renaming. + * diagnostics/text-sink.cc (text_sink::dump): Update for macro + renaming. + * libgdiagnostics.cc (diagnostic_manager_debug_dump_file): Use + HOST_SIZE_T_PRINT_DEC rather than %zi in fprintf call. + * pretty-print.cc: Include "diagnostics/dumping.h". + (pp_formatted_chunks::dump): Use it. + (get_url_format_as_string): New. + (pretty_printer::dump): Use diagnostics::dumping. Bulletproof + against m_buffer being null. + +2025-08-09 Takayuki 'January June' Suwa + + * config/xtensa/constraints.md (T): + Change define_memory_constraint to define_special_memory_constraint. + +2025-08-08 Andrew Pinski + + PR tree-optimization/120599 + * tree-ssa-forwprop.cc (optimize_agr_copyprop): Don't try to copy + from statements that throw. + +2025-08-08 Andrew Pinski + + PR tree-optimization/118946 + PR tree-optimization/121422 + * tree-ssa-forwprop.cc (optimize_memcpy_to_memset): Remove. + (optimize_aggr_zeroprop_1): New function. + (optimize_aggr_zeroprop): New function. + (simplify_builtin_call): Don't call optimize_memcpy_to_memset + for memcpy but call optimize_aggr_zeroprop for memset. + (pass_forwprop::execute): Don't call optimize_memcpy_to_memset + for aggregate copies but rather call optimize_aggr_zeroprop + for aggregate stores. + +2025-08-08 Andrew Pinski + + * tree-ssa-forwprop.cc (optimize_agr_copyprop): Change into a + forward looking (looking at vdef's uses) instead of a back + looking (vuse's def). + +2025-08-08 David Malcolm + + PR diagnostics/116253 + * common.opt (fdiagnostics-show-nesting): New option. + (fdiagnostics-show-nesting-locations): New option. + (fdiagnostics-show-nesting-levels): New option. + * common.opt.urls: Regenerate. + * diagnostics/context.cc (context::set_show_nesting): New. + (context::set_show_nesting_locations): New. + (context::set_show_nesting_levels): New. + * diagnostics/context.h (context::set_show_nesting): New decl. + (context::set_show_nesting_locations): New decl. + (context::set_show_nesting_levels): New decl. + * diagnostics/html-sink.cc: Tweak comment. + * diagnostics/output-spec.cc (text_scheme_handler::make_sink): + Rename "experimental-nesting" to "show-nesting" and enable by + default. Rename "experimental-nesting-show-locations" to + "show-nesting-locations". Rename + "experimental-nesting-show-levels" to "show-nesting-levels". + * diagnostics/sink.h (sink::dyn_cast_text_sink): New. + * diagnostics/text-sink.h (text_sink::dyn_cast_text_sink): New. + * doc/invoke.texi: Add -fdiagnostics-show-nesting, + -fdiagnostics-show-nesting-locations, and + -fdiagnostics-show-nesting-levels. Update for changes to + output-spec.cc above. + * lto-wrapper.cc (merge_and_complain): Ignore + OPT_fdiagnostics_show_nesting, + OPT_fdiagnostics_show_nesting_locations, and + OPT_fdiagnostics_show_nesting_levels. + (append_compiler_options): Likewise. + (append_diag_options): Likewise. + * opts-common.cc (decode_cmdline_options_to_array): Add + "-fno-diagnostics-show-nesting" to -fdiagnostics-plain-output. + * opts.cc (common_handle_option): Handle the new options. + (gen_command_line_string): Ignore the new options. + * toplev.cc (general_init): Call set_show_nesting, + set_show_nesting_locations, and set_show_nesting_levels on + global_dc. + +2025-08-08 David Malcolm + + * Makefile.in (OBJS-libcommon): Add diagnostics/dumping.o. + * diagnostics/buffering.cc: Include "diagnostics/dumping.h". + (buffer::dump): Reimplement using diagnostics::dumping. + * diagnostics/context.cc: Include "diagnostics/dumping.h". + (context::dump): Reimplement using diagnostics::dumping. + Use sink::dump_kind when listing the sinks. + (sink::dump): Reimplement using diagnostics::dumping. + (counters::dump): Likewise. + * diagnostics/dumping.cc: New file. + * diagnostics/dumping.h: New file. + * diagnostics/file-cache.cc: Include "diagnostics/dumping.h". + (file_cache::dump): Reimplement using diagnostics::dumping. + (file_cache_slot::dump): Likewise. + * diagnostics/html-sink.cc: Include "diagnostics/dumping.h". + (html_generation_options::dump): New. + (html_sink_buffer::dump): Reimplement using diagnostics::dumping. + (html_builder::dump): New. + (html_sink::dump): Reimplement using diagnostics::dumping. + Add dump of the html_builder. + (html_file_sink::dump): Replace with... + (html_file_sink::dump_kind): ...this. + (html_buffered_sink::dump_kind): New. + * diagnostics/html-sink.h (html_generation_options::dump): New + decl. + * diagnostics/sarif-sink.cc: Include "diagnostics/dumping.h". + (sarif_serialization_format_json::dump): New. + (sarif_builder::dump): New. + (sarif_sink_buffer::dump): Reimplement using diagnostics::dumping. + (sarif_sink::dump): Likewise. Add dump of the sarif_builder. + (sarif_stream_sink::dump_kind): New. + (sarif_file_sink::dump): Replace with... + (sarif_file_sink::dump_kind): ...this. + (get_dump_string_for_sarif_version): New. + (sarif_generation_options::dump): New. + (class buffered_sink): Rename to... + (class sarif_buffered_sink): ...this. + (sarif_buffered_sink::dump_kind): New. + * diagnostics/sarif-sink.h (sarif_serialization_format::dump): + New. + (sarif_serialization_format_json::dump): New decl. + (sarif_generation_options::dump): New decl. + * diagnostics/sink.h (sink::dump_kind): New. + * diagnostics/text-sink.cc: Include "diagnostics/dumping.h". + (text_sink_buffer::dump): Reimplement using diagnostics::dumping. + (text_sink::dump): Likewise. Emit fields m_show_nesting, + m_show_locations_in_nesting, and m_show_nesting_levels. + * diagnostics/text-sink.h (text_sink::dump_kind): New. + +2025-08-08 David Malcolm + + * diagnostic.h (diagnostics::get_cwe_url): Move decl to + diagnostics/metadata.h. + (diagnostics::maybe_line_and_column): Move into + diagnostics::text_sink. + * diagnostics/context.cc: Update for maybe_line_and_column + becoming a static member of text_sink. + * diagnostics/metadata.h (diagnostics::get_cwe_url): Move decl + here from diagnostic.h. + * diagnostics/text-sink.cc (maybe_line_and_column): Convert to... + (text_sink::maybe_line_and_column): ...this. + * diagnostics/text-sink.h (text_sink::maybe_line_and_column): Move + here from diagnostic.h. + +2025-08-08 David Malcolm + + * diagnostics/context.cc (context::get_any_inlining_info): Convert + "context" arg of m_set_locations_cb from ptr to const &. + (context::report_diagnostic): Convert "context" arg of + m_adjust_diagnostic_info from ptr to const &. + * diagnostics/context.h (context::set_locations_callback_t): + Likewise. + (context::set_adjust_diagnostic_info_callback): Likewise. + (context::m_adjust_diagnostic_info): Likewise. + * tree-diagnostic.cc (set_inlining_locations): Likewise. + +2025-08-08 David Malcolm + + * diagnostics/column-options.h: New file, adding struct + diagnostics::column_options, taken from fields in + diagnostics::context and diagnostics::column_policy. + * diagnostics/context.cc (context::initialize): Update for moving + fields of diagnostics::context into diagnostics::column_options. + (column_policy::column_policy): Likewise. + (column_policy::converted_column): Move implementation to... + (column_options::convert_column): ...this new function. + (context::report_diagnostic): Update for moving fields of + diagnostics::context into diagnostics::column_options. + (assert_location_text): Likewise. + * diagnostics/context.h: Include "diagnostics/column-options.h". + (class column_policy): Replace fields m_column_unit, + m_column_origin, and m_tabstop with m_column_options. + (context::get_column_options): New accessors. + (context::m_column_unit): Move to struct column_options and + replace with m_column_options. + (context::m_column_origin): Likewise. + (context::m_tabstop): Likewise. + * diagnostics/sarif-sink.cc (sarif_builder::sarif_builder): Update + for moving fields of diagnostics::context into + diagnostics::column_options. + * diagnostics/source-printing.cc: Likewise. + * opts.cc (common_handle_option): Likewise. + +2025-08-08 Christophe Lyon + + PR target/120977 + * config/arm/arm.md (call): Move unspec parameter to parallel. + (nonsecure_call_internal): Likewise. + (call_value): Likewise. + (nonsecure_call_value_internal): Likewise. + * config/arm/thumb1.md (nonsecure_call_reg_thumb1_v5): Likewise. + (nonsecure_call_value_reg_thumb1_v5): Likewise. + * config/arm/thumb2.md (nonsecure_call_reg_thumb2_fpcxt): + Likewise. + (nonsecure_call_reg_thumb2): Likewise. + (nonsecure_call_value_reg_thumb2_fpcxt): Likewise. + (nonsecure_call_value_reg_thumb2): Likewise. + * config/arm/arm.cc (cmse_nonsecure_call_inline_register_clear): + Likewise. + +2025-08-08 Pengfei Li + + PR target/121449 + * config/aarch64/aarch64-sve.md + (mask_gather_load): Use vg + constraints for alternatives with immediate offset. + (mask_scatter_store): Likewise. + +2025-08-08 Richard Biener + + * doc/tm.texi.in: Add Vectorization and OpenMP and OpenACC + sub-sections to the list of target macros and functions. + * doc/tm.texi: Re-generate. + +2025-08-08 Richard Biener + + * tree-vect-loop.cc (vect_determine_vectype_for_stmt_1): Remove. + (vect_determine_vectype_for_stmt): Likewise. + (vect_set_stmts_vectype): Likewise. + (vect_analyze_loop_2): Do not call vect_set_stmts_vectype. + * tree-vect-stmts.cc (vect_mark_stmts_to_be_vectorized): Detect + irregular stmts early here. + +2025-08-08 Alex Coplan + + PR target/120986 + * config/aarch64/aarch64-sve-builtins.cc + (function_expander::expand): Relax fpm_t assert to allow + modeless const_ints. + +2025-08-08 Alex Coplan + + PR target/120986 + * config/aarch64/aarch64-sve2.md (@aarch64_sve_dot): + Switch mode iterator from SVE_FULL_HSF to new iterator; + remove insn predicate as this is now taken care of by conditions + in the mode iterator. + (@aarch64_sve_dot_lane): Likewise. + * config/aarch64/iterators.md (SVE_FULL_HSF_FP8_FDOT): New. + +2025-08-08 Richard Biener + + PR tree-optimization/121454 + * tree-ssa-sccvn.cc (visit_nary_op): Avoid unexpected + BIT_FIELD_REFs. + +2025-08-08 Jakub Jelinek + + PR middle-end/121389 + * tree-tailcall.cc (find_tail_calls): For finally_tmp.N + handle not just GIMPLE_CONDs with EQ_EXPR/NE_EXPR and only + values 0 and 1, but arbitrary non-negative values, arbitrary + comparisons in conditions and also GIMPLE_SWITCH next to + GIMPLE_CONDs. + +2025-08-08 Richard Biener + + * tree-vect-loop.cc (vect_is_emulated_mixed_dot_prod): Get + the SLP node rather than the stmt_info. + (vectorizable_lane_reducing): Adjust, pass SLP node to costing. + (vect_transform_reduction): Adjust. + +2025-08-08 Richard Biener + + * tree-vect-stmts.cc (vect_model_promotion_demotion_cost): Pass + in SLP node and drop unused dr argument. Use SLP node for + costing, drop costing of constant/external operands. + (vectorizable_conversion): Adjust. + +2025-08-08 Richard Biener + + * tree-vect-stmts.cc (vectorizable_store): Apply SLP_TREE_VECTYPE + to slp_node rather than stmt_info. + +2025-08-07 Richard Sandiford + + PR target/121414 + * config/aarch64/aarch64.cc (aarch64_is_variant_pcs): New function, + split out from... + (aarch64_asm_output_variant_pcs): ...here. Handle various types + of SME function type. + +2025-08-07 Richard Sandiford + + PR rtl-optimization/120718 + * simplify-rtx.cc (simplify_context::simplify_gen_subreg): + Remove MODE_COMPOSITE_P condition. + +2025-08-07 Richard Biener + + PR tree-optimization/121405 + * tree-ssa-sccvn.cc (visit_nary_op): Handle BIT_FIELD_REF + with reference def by looking up a combination of both. + +2025-08-07 Pengfei Li + + * tree-vect-data-refs.cc (vect_compute_data_ref_alignment): + Allow DR target alignment to be a poly_int. + (vect_enhance_data_refs_alignment): Support peeling and + versioning for VLA modes. + * tree-vect-loop-manip.cc (get_misalign_in_elems): Remove + power-of-two rounding in peeling. + (vect_create_cond_for_align_checks): Update alignment check + logic for poly_int mask. + (vect_create_cond_for_vla_spec_read): New runtime checks. + (vect_loop_versioning): Support new runtime checks. + * tree-vect-loop.cc (_loop_vec_info::_loop_vec_info): Add a new + loop_vinfo field. + (vectorizable_induction): Fix wrong IV offset issue. + * tree-vect-stmts.cc (get_load_store_type): Refactor + vectorizable checks for speculative loads. + * tree-vectorizer.h (LOOP_VINFO_MAX_SPEC_READ_AMOUNT): New + macro for new runtime checks. + (LOOP_REQUIRES_VERSIONING_FOR_SPEC_READ): Likewise + (LOOP_REQUIRES_VERSIONING): Update macro for new runtime checks. + +2025-08-07 Jakub Jelinek + + PR preprocessor/120778 + * doc/invoke.texi (Wkeyword-macro): Document. + +2025-08-07 Stefan Schulze Frielinghaus + + * config/s390/s390.cc (print_operand): Allow arbitrary wide_int + constants for _BitInt. + (s390_bitint_type_info): Implement target hook + TARGET_C_BITINT_TYPE_INFO. + +2025-08-06 Uros Bizjak + + PR target/96226 + * config/i386/predicates.md (and_operator): New operator. + * config/i386/i386.md (splitter after *3_mask): + Use and_operator to match AND RTX and use its mode + in the split pattern. + +2025-08-06 Gerald Pfeifer + + PR target/69374 + * doc/install.texi (Prerequisites): Replace bzip2 by xz. + +2025-08-06 Yangyu Chen + + * config/i386/i386.h (PTA_BDVER1): + Add missing PTA_POPCNT and PTA_LZCNT with PTA_ABM. + (PTA_ZNVER1): Ditto. + (PTA_BTVER1): Ditto. + (PTA_LUJIAZUI): Ditto. + (PTA_YONGFENG): Do not include extra PTA_LZCNT. + +2025-08-06 Sam James + + PR libstdc++/29286 + * Makefile.in (ALIASING_FLAGS): Drop. + * configure: Regenerate. + * configure.ac: Drop -fno-strict-aliasing workaround for < GCC 4.3. + +2025-08-06 Richard Biener + + * tree-vect-data-refs.cc (vect_supportable_dr_alignment): + Prune dead code. + +2025-08-06 Patrick Palka + + PR c++/121231 + PR c++/119688 + PR c++/94511 + * common.opt: Document additional ABI version 21 change. + * doc/invoke.texi: Likewise. + +2025-08-06 Richard Biener + + * tree-vectorizer.h (_slp_tree::gs_scale): New. + (_slp_tree::gs_base): Likewise. + (SLP_TREE_GS_SCALE): Likewise. + (SLP_TREE_GS_BASE): Likewise. + (vect_describe_gather_scatter_call): Declare. + * tree-vect-slp.cc (_slp_tree::_slp_tree): Initialize + new members. + (vect_build_slp_tree_2): Record gather/scatter base and scale. + (vect_get_and_check_slp_defs): For gather/scatter IFNs + describe the call to first_gs_info. + * tree-vect-data-refs.cc (vect_gather_scatter_fn_p): Add + mode of operation with fixed offset vector type. + (vect_describe_gather_scatter_call): Export. + * tree-vect-stmts.cc (get_load_store_type): Do not call + vect_check_gather_scatter to fill gs_info, instead populate + from the SLP tree. Check which of, IFN, decl or fallback + is supported and record that decision. + +2025-08-06 Richard Biener + + * tree-vect-stmts.cc (vectorizable_store): Build proper + alias + align pointer value for gather/scatter and SLP + and use it. + (vectorizable_load): Likewise. + +2025-08-06 Richard Biener + + * tree-vect-stmts.cc (check_load_store_for_partial_vectors): + Remove redundant gather/scatter target support check, instead + check the recorded ifns. Also allow legacy gather/scatter + with loop masking. + +2025-08-06 Jakub Jelinek + + PR tree-optimization/121413 + * gimple-lower-bitint.cc (gimple_lower_bitint): Fix up last + commit, cast limb_prec to unsigned before comparison. + +2025-08-06 Yang Yujie + + * match.pd: Preserve conversion to _BitInt before a VCE + if the _BitInt is extended. + +2025-08-06 Yang Yujie + + * gimple-lower-bitint.cc (bitint_large_huge::lower_mergeable_stmt): + Zero-extend the partial limb of any unsigned _BitInt LHS assigned + with a widening sign-extension. + +2025-08-06 Yang Yujie + + * gimple-lower-bitint.cc (bitint_large_huge::limb_access): + Add a parameter abi_load_p. If set, load a limb directly + in its actual precision without casting from m_limb_type. + (struct bitint_large_huge): Same. + (bitint_large_huge::handle_load): Use. + +2025-08-06 Jakub Jelinek + + PR tree-optimization/121413 + * gimple-lower-bitint.cc (abi_limb_prec): New variable + (bitint_precision_kind): Initialize it. + (gimple_lower_bitint): Clear it at the start. For + min_prec > limb_prec descreased precision vars for + INTEGER_CST PHI arguments ensure min_prec is either + prec or multiple of abi_limb_prec. + +2025-08-06 Jakub Jelinek + + PR tree-optimization/121127 + * gimple-lower-bitint.cc (bitint_large_huge::handle_operand_addr): For + uninitialized SSA_NAME, set *prec_stored to 0 rather than *prec. + Handle that case in narrowing casts. If prec_stored is non-NULL, + set *prec_stored to prec_stored_val. + +2025-08-06 Jakub Jelinek + + PR bootstrap/121386 + * Makefile.in (gengtype-lex.cc): Append #define FLEX_SCANNER, + #include "system.h" and #undef FLEX_SCANNER to the prepended lines. + * gengtype-lex.l: Remove inclusion of config.h or bconfig.h, system.h + and definition of malloc/realloc from %{} section. + +2025-08-06 Kito Cheng + + * config/riscv/arch-canonicalize: Read extension data from + riscv-ext*.def and adding unittest. + +2025-08-06 Kito Cheng + + * common/config/riscv/riscv-common.cc (riscv_expand_arch): + Ignore `unset`. + * config/riscv/riscv.h (OPTION_DEFAULT_SPECS): Handle + `-march=unset`. + (ARCH_UNSET_CLEANUP_SPECS): New. + (DRIVER_SELF_SPECS): Handle -march=unset. + * doc/invoke.texi (RISC-V Options): Update documentation for + `-march=unset`. + +2025-08-06 Kwok Cheung Yeung + + * gimplify.cc (remove_unused_omp_iterator_vars): Display unused + variable warning for 'to' and 'from' clauses. + (gimplify_scan_omp_clauses): Add argument for iterator loop sequence. + Gimplify the clause decl and size into the iterator loop if iterators + are used. + (gimplify_omp_workshare): Add argument for iterator loops sequence + in call to gimplify_scan_omp_clauses. + (gimplify_omp_target_update): Call remove_unused_omp_iterator_vars and + build_omp_iterators_loops. Add loop sequence as argument when calling + gimplify_scan_omp_clauses, gimplify_adjust_omp_clauses and building + the Gimple statement. + * tree-pretty-print.cc (dump_omp_clause): Call dump_omp_iterators + for to/from clauses with iterators. + * tree.cc (omp_clause_num_ops): Add extra operand for OMP_CLAUSE_FROM + and OMP_CLAUSE_TO. + * tree.h (OMP_CLAUSE_HAS_ITERATORS): Add check for OMP_CLAUSE_TO and + OMP_CLAUSE_FROM. + (OMP_CLAUSE_ITERATORS): Likewise. + +2025-08-06 Kwok Cheung Yeung + Andrew Stubbs + + * gimple-pretty-print.cc (dump_gimple_omp_target): Print expanded + iterator loops. + * gimple.cc (gimple_build_omp_target): Add argument for iterator + loops sequence. Initialize iterator loops field. + * gimple.def (GIMPLE_OMP_TARGET): Set GSS symbol to GSS_OMP_TARGET. + * gimple.h (gomp_target): Set GSS symbol to GSS_OMP_TARGET. Add extra + field for iterator loops. + (gimple_build_omp_target): Add argument for iterator loops sequence. + (gimple_omp_target_iterator_loops): New. + (gimple_omp_target_iterator_loops_ptr): New. + (gimple_omp_target_set_iterator_loops): New. + * gimplify.cc (find_var_decl): New. + (copy_omp_iterator): New. + (remap_omp_iterator_var_1): New. + (remap_omp_iterator_var): New. + (remove_unused_omp_iterator_vars): New. + (struct iterator_loop_info_t): New type. + (iterator_loop_info_map_t): New type. + (build_omp_iterators_loops): New. + (enter_omp_iterator_loop_context_1): New. + (enter_omp_iterator_loop_context): New. + (enter_omp_iterator_loop_context): New. + (exit_omp_iterator_loop_context): New. + (gimplify_adjust_omp_clauses): Add argument for iterator loop + sequence. Gimplify the clause decl and size into the iterator + loop if iterators are used. + (gimplify_omp_workshare): Call remove_unused_omp_iterator_vars and + build_omp_iterators_loops for OpenMP target expressions. Add + loop sequence as argument when calling gimplify_adjust_omp_clauses + and building the Gimple statement. + * gimplify.h (enter_omp_iterator_loop_context): New prototype. + (exit_omp_iterator_loop_context): New prototype. + * gsstruct.def (GSS_OMP_TARGET): New. + * omp-low.cc (lower_omp_map_iterator_expr): New. + (lower_omp_map_iterator_size): New. + (finish_omp_map_iterators): New. + (lower_omp_target): Add sorry if iterators used with deep mapping. + Call lower_omp_map_iterator_expr before assigning to sender ref. + Call lower_omp_map_iterator_size before setting the size. Insert + iterator loop sequence before the statements for the target clause. + * tree-nested.cc (convert_nonlocal_reference_stmt): Walk the iterator + loop sequence of OpenMP target statements. + (convert_local_reference_stmt): Likewise. + (convert_tramp_reference_stmt): Likewise. + * tree-pretty-print.cc (dump_omp_iterators): Dump extra iterator + information if present. + (dump_omp_clause): Call dump_omp_iterators for iterators in map + clauses. + * tree.cc (omp_clause_num_ops): Add operand for OMP_CLAUSE_MAP. + (walk_tree_1): Do not walk last operand of OMP_CLAUSE_MAP. + * tree.h (OMP_CLAUSE_HAS_ITERATORS): New. + (OMP_CLAUSE_ITERATORS): New. + +2025-08-05 H.J. Lu + + PR target/121410 + * config/i386/i386-expand.cc (ix86_expand_set_or_cpymem): Use + STORE_MAX_PIECES to get the widest vector mode in vector loop + for memset. + +2025-08-05 Georg-Johann Lay + + * config/avr/avr.cc (avr_rtx_costs_1) [SIGN_EXTEND]: Adjust cost. + * config/avr/avr.md (*sext.ashift2): New + insn and a cc split. + +2025-08-05 Richard Sandiford + + PR target/121306 + * config/i386/predicates.md (extract_operator): Replace with... + (extract_high_operator): ...this new predicate. + * config/i386/i386.md (*cmpqi_ext_1, *cmpqi_ext_2) + (*cmpqi_ext_3, *cmpqi_ext_4, *movstrictqi_ext_1) + (*extzv, *insvqi_2, *extendqi_ext_1) + (*addqi_ext_1_slp, *addqi_ext_1_slp, *addqi_ext_0) + (*addqi_ext2_0, *addqi_ext_1, *qi_ext_2) + (*subqi_ext_1_slp, *subqi_ext_2_slp, *subqi_ext_0) + (*subqi_ext2_0, *subqi_ext_1, *testqi_ext_1) + (*testqi_ext_2, *qi_ext_1_slp) + (*qi_ext_2_slp. *qi_ext_0) + (*qi_ext2_0, *qi_ext_1) + (*qi_ext_1_cc, *qi_ext_1_cc) + (*qi_ext_2, *qi_ext_3, *negqi_ext_1) + (*one_cmplqi_ext_1, *ashlqi_ext_1, *qi_ext_1) + (define_peephole2): Replace uses of extract_operator with + extract_high_operator, matching only the first operand. + Use zero_extract rather than match_op_dup when splitting. + +2025-08-05 Richard Biener + + * tree-vectorizer.h (vect_relevant::hybrid): Remove. + * tree-vect-loop.cc (vect_analyze_loop_2): Do not call + vect_detect_hybrid_slp. + * tree-vect-slp.cc (maybe_push_to_hybrid_worklist): Remove. + (vect_detect_hybrid_slp): Likewise. + +2025-08-05 Georg-Johann Lay + + PR target/121359 + * config/avr/avr.h: Remove -mlra and remains of reload. + * config/avr/avr.cc: Same. + * config/avr/avr.md: Same. + * config/avr/avr-log.cc: Same. + * config/avr/avr-protos.h: Same. + * config/avr/avr.opt: Same. + * config/avr/avr.opt.urls: Same. + +2025-08-05 H.J. Lu + + PR target/121306 + * config/i386/i386.md (*one_cmplqi_ext_1): Updated to + support the new pattern. + +2025-08-05 Richard Biener + + PR tree-optimization/121395 + * tree-vectorizer.h (_loop_vec_info::alternate_defs): New member. + (LOOP_VINFO_ALTERNATE_DEFS): New. + * tree-vect-stmts.cc (vect_stmt_relevant_p): Populate it. + (vectorizable_simd_clone_call): Do not register a SLP def + when there is none. + * tree-vect-slp.cc (vect_build_slp_tree_1): Allow a NULL + vectype when there's no LHS. Allow all calls w/o LHS. + (vect_analyze_slp): Process LOOP_VINFO_ALTERNATE_DEFS as + SLP graph entries. + (vect_make_slp_decision): Handle a NULL SLP_TREE_VECTYPE. + (vect_slp_analyze_node_operations_1): Likewise. + (vect_schedule_slp_node): Likewise. + +2025-08-05 Richard Biener + + * tree-vectorizer.h (enum slp_vect_type): Rename loop_vect + to not_vect, clarify docs. + (HYBRID_SLP_STMT): Remove. + * tree-vectorizer.cc (vec_info::new_stmt_vec_info): Adjust. + * tree-vect-loop.cc (vect_analyze_loop_2): Likewise. + +2025-08-05 Richard Biener + + * tree-vect-data-refs.cc (vect_get_data_access_cost): Use + ncopies == 1. + * tree-vect-slp.cc (vect_remove_slp_scalar_calls): Remove + hybrid/loop SLP skip. + * tree-vect-stmts.cc (vectorizable_store): Remove pure SLP assert. + +2025-08-05 Richard Biener + + PR tree-optimization/121382 + * tree-ssa-loop-ivopts.cc (create_new_iv): Rewrite the IV + step to defined form. + +2025-08-05 Richard Biener + + PR tree-optimization/121370 + * tree-scalar-evolution.cc (scev_dfs::add_to_evolution_1): + Avoid UB integer overflow in accumulating CHREC_RIGHT. + +2025-08-05 Yang Yujie + + * expr.cc (expand_expr_real_1): Do not call + reduce_to_bit_field_precision if the target assumes the _BitInt + results to be already extended. + (EXTEND_BITINT): Same. + * expr.h (bitint_extended): Declare the cache variable. + * function.cc (prepare_function_start): Initialize it. + +2025-08-05 Yang Yujie + + * explow.cc (promote_function_mode): Add a case for + small/medium _BitInts. + (promote_mode): Same. + +2025-08-05 Gerald Pfeifer + + PR target/69374 + * doc/install.texi (Configuration): Mark up atexit as code. + +2025-08-05 Pan Li + + * config/riscv/riscv.cc (riscv_expand_xmode_usmul): Take + umulhu for high bits mul result. + 2025-08-04 Hans-Peter Nilsson * defaults.h (MAX_FIXED_MODE_SIZE): Default to 2 * BITS_PER_WORD diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index 3724f1581ebd..815d6fb4b74f 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20250805 +20250827 diff --git a/gcc/Makefile.in b/gcc/Makefile.in index d7d5cbe72770..d2744db843d7 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -187,8 +187,6 @@ C_STRICT_WARN = @c_strict_warn@ NOEXCEPTION_FLAGS = @noexception_flags@ -ALIASING_FLAGS = @aliasing_flags@ - # This is set by --disable-maintainer-mode (default) to "#" # FIXME: 'MAINT' will always be set to an empty string, no matter if # --disable-maintainer-mode is used or not. This is because the @@ -1622,6 +1620,7 @@ OBJS = \ mcf.o \ mode-switching.o \ modulo-sched.o \ + diagnostic-context-rich-location.o \ multiple_target.o \ omp-offload.o \ omp-expand.o \ @@ -1858,6 +1857,7 @@ OBJS-libcommon = \ diagnostics/color.o \ diagnostics/context.o \ diagnostics/digraphs.o \ + diagnostics/dumping.o \ diagnostics/file-cache.o \ diagnostics/output-spec.o \ diagnostics/html-sink.o \ @@ -3402,6 +3402,9 @@ gengtype-lex.cc : gengtype-lex.l echo '#else' >> $@.tmp; \ echo '#include "bconfig.h"' >> $@.tmp; \ echo '#endif' >> $@.tmp; \ + echo '#define FLEX_SCANNER' >> $@.tmp; \ + echo '#include "system.h"' >> $@.tmp; \ + echo '#undef FLEX_SCANNER' >> $@.tmp; \ cat $@ >> $@.tmp; \ mv $@.tmp $@; \ } diff --git a/gcc/asan.h b/gcc/asan.h index 273d6745c58d..a24562f67a29 100644 --- a/gcc/asan.h +++ b/gcc/asan.h @@ -103,7 +103,7 @@ extern hash_set *asan_used_labels; independently here. */ /* How many bits are used to store a tag in a pointer. The default version uses the entire top byte of a pointer (i.e. 8 bits). */ -#define HWASAN_TAG_SIZE targetm.memtag.tag_size () +#define HWASAN_TAG_SIZE targetm.memtag.tag_bitsize () /* Tag Granule of HWASAN shadow stack. This is the size in real memory that each byte in the shadow memory refers to. I.e. if a variable is X bytes long in memory then its tag in shadow @@ -242,9 +242,10 @@ asan_protect_stack_decl (tree decl) remove all flags mentioned in "no_sanitize" of DECL_ATTRIBUTES. */ inline bool -sanitize_flags_p (unsigned int flag, const_tree fn = current_function_decl) +sanitize_flags_p (sanitize_code_type flag, + const_tree fn = current_function_decl) { - unsigned int result_flags = flag_sanitize & flag; + sanitize_code_type result_flags = flag_sanitize & flag; if (result_flags == 0) return false; diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index c9ab153acd8f..3c3814bf2ef1 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,84 @@ +2025-08-18 Indu Bhagat + + * c-attribs.cc (add_no_sanitize_value): Use 'sanitize_code_type' + instead of 'unsigned int'. + (handle_no_sanitize_attribute): Likewise. + (handle_no_sanitize_address_attribute): Likewise. + (handle_no_sanitize_thread_attribute): Likewise. + (handle_no_address_safety_analysis_attribute): Likewise. + * c-common.h (add_no_sanitize_value): Likewise. + +2025-08-15 Jakub Jelinek + + PR c++/121552 + * c.opt (Wnon-c-typedef-for-linkage): New option. + * c.opt.urls: Regenerate. + +2025-08-15 Jakub Jelinek + + PR preprocessor/120778 + PR target/121520 + * c-cppbuiltin.cc (c_cpp_builtins): Implement C++26 DR 2581. Add + cpp_define_warn lambda and use it as well as cpp_warn where needed. + In the if (c_dialect_cxx ()) block with __cpp_* predefinitions add + cpp_define lambda. Formatting fixes. + +2025-08-15 Qing Zhao + + * c-gimplify.cc (is_address_with_access_with_size): New function. + (ubsan_walk_array_refs_r): Instrument an INDIRECT_REF whose base + address is .ACCESS_WITH_SIZE or an address computation whose base + address is .ACCESS_WITH_SIZE. + * c-ubsan.cc (ubsan_instrument_bounds_pointer_address): New function. + (struct factor_t): New structure. + (get_factors_from_mul_expr): New function. + (get_index_from_offset): New function. + (get_index_from_pointer_addr_expr): New function. + (is_instrumentable_pointer_array_address): New function. + (ubsan_array_ref_instrumented_p): Change prototype. + Handle MEM_REF in addtional to ARRAY_REF. + (ubsan_maybe_instrument_array_ref): Handle MEM_REF in addtional + to ARRAY_REF. + +2025-08-15 Qing Zhao + + * c-attribs.cc (handle_counted_by_attribute): Accept counted_by + attribute for pointer fields. + +2025-08-13 Jakub Jelinek + + PR c++/120776 + * c-cppbuiltin.cc (c_cpp_builtins): Predefine + __cpp_expansion_statements=202506L for C++26. + +2025-08-08 David Malcolm + + * c-indentation.cc (should_warn_for_misleading_indentation): + Update for moving diagnostics::context::m_tabstop into + diagnostics::column_options. + * c-opts.cc (c_common_post_options): Likewise. + +2025-08-07 Jakub Jelinek + + PR c++/117783 + * c-cppbuiltin.cc (c_cpp_builtins): Change __cpp_structured_bindings + predefined value for C++26 from 202403L to 202411L. + +2025-08-07 Jakub Jelinek + + PR preprocessor/120778 + * c.opt (Wkeyword-macro): New option. + * c.opt.urls: Regenerate. + * c-common.h (cxx_dialect): Comment formatting fix. + * c-opts.cc (c_common_post_options): Default to + -Wkeyword-macro for C++26 if pedantic. + +2025-08-06 Alexandre Oliva + + * c-attribs.cc (handle_hardbool_attribute): Create distinct + enumeration types, with structural equality. Handle + base type qualifiers. + 2025-08-02 Martin Uecker * c-attribs.cc (handle_argspec_attribute): Update. diff --git a/gcc/c-family/c-attribs.cc b/gcc/c-family/c-attribs.cc index a0d832b5e056..1e3a94ed9493 100644 --- a/gcc/c-family/c-attribs.cc +++ b/gcc/c-family/c-attribs.cc @@ -1128,11 +1128,16 @@ handle_hardbool_attribute (tree *node, tree name, tree args, } tree orig = *node; - *node = build_duplicate_type (orig); + /* Drop qualifiers from the base type. Keep attributes, so that, in the odd + chance attributes are applicable and relevant to the base type, if they + are specified first, or through a typedef, they wouldn't be dropped on the + floor here. */ + tree unqual = build_qualified_type (orig, TYPE_UNQUALIFIED); + *node = build_distinct_type_copy (unqual); TREE_SET_CODE (*node, ENUMERAL_TYPE); - ENUM_UNDERLYING_TYPE (*node) = orig; - TYPE_CANONICAL (*node) = TYPE_CANONICAL (orig); + ENUM_UNDERLYING_TYPE (*node) = unqual; + SET_TYPE_STRUCTURAL_EQUALITY (*node); tree false_value; if (args) @@ -1191,7 +1196,13 @@ handle_hardbool_attribute (tree *node, tree name, tree args, gcc_checking_assert (!TYPE_CACHED_VALUES_P (*node)); TYPE_VALUES (*node) = values; - TYPE_NAME (*node) = orig; + TYPE_NAME (*node) = unqual; + + if (TYPE_QUALS (orig) != TYPE_QUALS (*node)) + { + *node = build_qualified_type (*node, TYPE_QUALS (orig)); + TYPE_NAME (*node) = orig; + } return NULL_TREE; } @@ -1409,23 +1420,24 @@ handle_cold_attribute (tree *node, tree name, tree ARG_UNUSED (args), /* Add FLAGS for a function NODE to no_sanitize_flags in DECL_ATTRIBUTES. */ void -add_no_sanitize_value (tree node, unsigned int flags) +add_no_sanitize_value (tree node, sanitize_code_type flags) { tree attr = lookup_attribute ("no_sanitize", DECL_ATTRIBUTES (node)); if (attr) { - unsigned int old_value = tree_to_uhwi (TREE_VALUE (attr)); + sanitize_code_type old_value = + tree_to_sanitize_code_type (TREE_VALUE (attr)); flags |= old_value; if (flags == old_value) return; - TREE_VALUE (attr) = build_int_cst (unsigned_type_node, flags); + TREE_VALUE (attr) = build_int_cst (uint64_type_node, flags); } else DECL_ATTRIBUTES (node) = tree_cons (get_identifier ("no_sanitize"), - build_int_cst (unsigned_type_node, flags), + build_int_cst (uint64_type_node, flags), DECL_ATTRIBUTES (node)); } @@ -1436,7 +1448,7 @@ static tree handle_no_sanitize_attribute (tree *node, tree name, tree args, int, bool *no_add_attrs) { - unsigned int flags = 0; + sanitize_code_type flags = 0; *no_add_attrs = true; if (TREE_CODE (*node) != FUNCTION_DECL) { @@ -1473,7 +1485,7 @@ handle_no_sanitize_address_attribute (tree *node, tree name, tree, int, if (TREE_CODE (*node) != FUNCTION_DECL) warning (OPT_Wattributes, "%qE attribute ignored", name); else - add_no_sanitize_value (*node, SANITIZE_ADDRESS); + add_no_sanitize_value (*node, (sanitize_code_type) SANITIZE_ADDRESS); return NULL_TREE; } @@ -1489,7 +1501,7 @@ handle_no_sanitize_thread_attribute (tree *node, tree name, tree, int, if (TREE_CODE (*node) != FUNCTION_DECL) warning (OPT_Wattributes, "%qE attribute ignored", name); else - add_no_sanitize_value (*node, SANITIZE_THREAD); + add_no_sanitize_value (*node, (sanitize_code_type) SANITIZE_THREAD); return NULL_TREE; } @@ -1506,7 +1518,7 @@ handle_no_address_safety_analysis_attribute (tree *node, tree name, tree, int, if (TREE_CODE (*node) != FUNCTION_DECL) warning (OPT_Wattributes, "%qE attribute ignored", name); else - add_no_sanitize_value (*node, SANITIZE_ADDRESS); + add_no_sanitize_value (*node, (sanitize_code_type) SANITIZE_ADDRESS); return NULL_TREE; } @@ -2906,22 +2918,53 @@ handle_counted_by_attribute (tree *node, tree name, " declaration %q+D", name, decl); *no_add_attrs = true; } - /* This attribute only applies to field with array type. */ - else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) + /* This attribute only applies to a field with array type or pointer type. */ + else if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE + && TREE_CODE (TREE_TYPE (decl)) != POINTER_TYPE) { error_at (DECL_SOURCE_LOCATION (decl), - "%qE attribute is not allowed for a non-array field", - name); + "%qE attribute is not allowed for a non-array" + " or non-pointer field", name); *no_add_attrs = true; } /* This attribute only applies to a C99 flexible array member type. */ - else if (! c_flexible_array_member_type_p (TREE_TYPE (decl))) + else if (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE + && !c_flexible_array_member_type_p (TREE_TYPE (decl))) { error_at (DECL_SOURCE_LOCATION (decl), "%qE attribute is not allowed for a non-flexible" " array member field", name); *no_add_attrs = true; } + /* This attribute cannot be applied to a pointer to void type. */ + else if (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == VOID_TYPE) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE attribute is not allowed for a pointer to void", + name); + *no_add_attrs = true; + } + /* This attribute cannot be applied to a pointer to function type. */ + else if (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE + && TREE_CODE (TREE_TYPE (TREE_TYPE (decl))) == FUNCTION_TYPE) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE attribute is not allowed for a pointer to" + " function", name); + *no_add_attrs = true; + } + /* This attribute cannot be applied to a pointer to structure or union + with flexible array member. */ + else if (TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE + && RECORD_OR_UNION_TYPE_P (TREE_TYPE (TREE_TYPE (decl))) + && TYPE_INCLUDES_FLEXARRAY (TREE_TYPE (TREE_TYPE (decl)))) + { + error_at (DECL_SOURCE_LOCATION (decl), + "%qE attribute is not allowed for a pointer to" + " structure or union with flexible array member", name); + *no_add_attrs = true; + } /* The argument should be an identifier. */ else if (TREE_CODE (argval) != IDENTIFIER_NODE) { @@ -2930,7 +2973,8 @@ handle_counted_by_attribute (tree *node, tree name, *no_add_attrs = true; } /* Issue error when there is a counted_by attribute with a different - field as the argument for the same flexible array member field. */ + field as the argument for the same flexible array member or + pointer field. */ else if (old_counted_by != NULL_TREE) { tree old_fieldname = TREE_VALUE (TREE_VALUE (old_counted_by)); diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h index 7c7e21d2d0eb..b6021d241731 100644 --- a/gcc/c-family/c-common.h +++ b/gcc/c-family/c-common.h @@ -747,7 +747,7 @@ enum cxx_dialect { cxx26 }; -/* The C++ dialect being used. C++98 is the default. */ +/* The C++ dialect being used. C++17 is the default. */ extern enum cxx_dialect cxx_dialect; /* Maximum template instantiation depth. This limit is rather @@ -1713,7 +1713,7 @@ extern enum flt_eval_method excess_precision_mode_join (enum flt_eval_method, enum flt_eval_method); extern int c_flt_eval_method (bool ts18661_p); -extern void add_no_sanitize_value (tree node, unsigned int flags); +extern void add_no_sanitize_value (tree node, sanitize_code_type flags); extern void maybe_add_include_fixit (rich_location *, const char *, bool); extern void maybe_suggest_missing_token_insertion (rich_location *richloc, diff --git a/gcc/c-family/c-cppbuiltin.cc b/gcc/c-family/c-cppbuiltin.cc index 4aea90288631..6b22f9e60b14 100644 --- a/gcc/c-family/c-cppbuiltin.cc +++ b/gcc/c-family/c-cppbuiltin.cc @@ -913,23 +913,42 @@ c_cpp_builtins (cpp_reader *pfile) /* encoding definitions used by users and libraries */ builtin_define_with_value ("__GNUC_EXECUTION_CHARSET_NAME", - cpp_get_narrow_charset_name (pfile), 1); + cpp_get_narrow_charset_name (pfile), 1); builtin_define_with_value ("__GNUC_WIDE_EXECUTION_CHARSET_NAME", - cpp_get_wide_charset_name (pfile), 1); - + cpp_get_wide_charset_name (pfile), 1); if (c_dialect_cxx ()) - { - int major; - parse_basever (&major, NULL, NULL); - cpp_define_formatted (pfile, "__GNUG__=%d", major); - } + { + int major; + parse_basever (&major, NULL, NULL); + cpp_define_formatted (pfile, "__GNUG__=%d", major); + } /* For stddef.h. They require macros defined in c-common.cc. */ c_stddef_cpp_builtins (); + /* Variant of cpp_define which arranges for diagnostics on user #define + or #undef of the macros. */ + auto cpp_define_warn = [] (cpp_reader *pfile, const char *def) + { + const char *end = strchr (def, '='); + cpp_define (pfile, def); + cpp_warn (pfile, def, end ? end - def : strlen (def)); + }; + if (c_dialect_cxx ()) { + /* Treat all cpp_define calls in this block for macros starting + with __cpp_ (for C++20 and later) or __STDCPP_ as cpp_define_warn. */ + auto cpp_define = [=] (cpp_reader *pfile, const char *def) + { + if ((cxx_dialect >= cxx20 && startswith (def, "__cpp_")) + || startswith (def, "__STDCPP_")) + cpp_define_warn (pfile, def); + else + ::cpp_define (pfile, def); + }; + if (flag_weak && SUPPORTS_ONE_ONLY) cpp_define (pfile, "__GXX_WEAK__=1"); else @@ -1090,52 +1109,76 @@ c_cpp_builtins (cpp_reader *pfile) cpp_define (pfile, "__cpp_constexpr_exceptions=202411L"); cpp_define (pfile, "__cpp_static_assert=202306L"); cpp_define (pfile, "__cpp_placeholder_variables=202306L"); - cpp_define (pfile, "__cpp_structured_bindings=202403L"); + cpp_define (pfile, "__cpp_structured_bindings=202411L"); cpp_define (pfile, "__cpp_deleted_function=202403L"); cpp_define (pfile, "__cpp_variadic_friend=202403L"); cpp_define (pfile, "__cpp_pack_indexing=202311L"); cpp_define (pfile, "__cpp_pp_embed=202502L"); cpp_define (pfile, "__cpp_constexpr_virtual_inheritance=202506L"); cpp_define (pfile, "__cpp_trivial_relocatability=202502L"); + cpp_define (pfile, "__cpp_expansion_statements=202506L"); } if (flag_concepts && cxx_dialect > cxx14) cpp_define (pfile, "__cpp_concepts=202002L"); + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_concepts"); if (flag_contracts) { cpp_define (pfile, "__cpp_contracts=201906L"); cpp_define (pfile, "__cpp_contracts_literal_semantics=201906L"); cpp_define (pfile, "__cpp_contracts_roles=201906L"); } + else if (cxx_dialect >= cxx26) + cpp_warn (pfile, "__cpp_contracts"); if (flag_modules) /* The std-defined value is 201907L, but I don't think we can claim victory yet. 201810 is the p1103 date. */ cpp_define (pfile, "__cpp_modules=201810L"); + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_modules"); if (flag_coroutines) cpp_define (pfile, "__cpp_impl_coroutine=201902L"); /* n4861, DIS */ + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_impl_coroutine"); if (flag_tm) /* Use a value smaller than the 201505 specified in the TS, since we don't yet support atomic_cancel. */ cpp_define (pfile, "__cpp_transactional_memory=201500L"); if (flag_sized_deallocation) cpp_define (pfile, "__cpp_sized_deallocation=201309L"); + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_sized_deallocation"); if (aligned_new_threshold) { cpp_define (pfile, "__cpp_aligned_new=201606L"); cpp_define_formatted (pfile, "__STDCPP_DEFAULT_NEW_ALIGNMENT__=%d", aligned_new_threshold); } + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_aligned_new"); + if (cxx_dialect >= cxx17) + cpp_warn (pfile, "__STDCPP_DEFAULT_NEW_ALIGNMENT__"); if (flag_new_ttp) cpp_define (pfile, "__cpp_template_template_args=201611L"); + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_template_template_args"); if (flag_threadsafe_statics) cpp_define (pfile, "__cpp_threadsafe_static_init=200806L"); + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_threadsafe_static_init"); if (flag_char8_t) cpp_define (pfile, "__cpp_char8_t=202207L"); + else if (cxx_dialect >= cxx20) + cpp_warn (pfile, "__cpp_char8_t"); #ifndef THREAD_MODEL_SPEC /* Targets that define THREAD_MODEL_SPEC need to define __STDCPP_THREADS__ in their config/XXX/XXX-c.c themselves. */ if (cxx_dialect >= cxx11 && strcmp (thread_model, "single") != 0) cpp_define (pfile, "__STDCPP_THREADS__=1"); + else #endif + if (cxx_dialect >= cxx11) + cpp_warn (pfile, "__STDCPP_THREADS__"); if (flag_implicit_constexpr) cpp_define (pfile, "__cpp_implicit_constexpr=20211111L"); } @@ -1284,16 +1327,22 @@ c_cpp_builtins (cpp_reader *pfile) for (int i = 0; i < NUM_FLOATN_NX_TYPES; i++) { - if (FLOATN_NX_TYPE_NODE (i) == NULL_TREE) - continue; if (c_dialect_cxx () && cxx_dialect > cxx20 && !floatn_nx_types[i].extended) { char name[sizeof ("__STDCPP_FLOAT128_T__=1")]; + if (FLOATN_NX_TYPE_NODE (i) == NULL_TREE) + { + sprintf (name, "__STDCPP_FLOAT%d_T__", floatn_nx_types[i].n); + cpp_warn (pfile, name); + continue; + } sprintf (name, "__STDCPP_FLOAT%d_T__=1", floatn_nx_types[i].n); - cpp_define (pfile, name); + cpp_define_warn (pfile, name); } + else if (FLOATN_NX_TYPE_NODE (i) == NULL_TREE) + continue; char prefix[20], csuffix[20]; sprintf (prefix, "FLT%d%s", floatn_nx_types[i].n, floatn_nx_types[i].extended ? "X" : ""); @@ -1305,10 +1354,12 @@ c_cpp_builtins (cpp_reader *pfile) if (bfloat16_type_node) { if (c_dialect_cxx () && cxx_dialect > cxx20) - cpp_define (pfile, "__STDCPP_BFLOAT16_T__=1"); + cpp_define_warn (pfile, "__STDCPP_BFLOAT16_T__=1"); builtin_define_float_constants ("BFLT16", "BF16", "%s", "BF16", bfloat16_type_node); } + else if (cxx_dialect >= cxx23) + cpp_warn (pfile, "__STDCPP_BFLOAT16_T__"); /* For float.h. */ if (targetm.decimal_float_supported_p ()) diff --git a/gcc/c-family/c-gimplify.cc b/gcc/c-family/c-gimplify.cc index c6fb7646567e..131eca8297f8 100644 --- a/gcc/c-family/c-gimplify.cc +++ b/gcc/c-family/c-gimplify.cc @@ -66,6 +66,19 @@ along with GCC; see the file COPYING3. If not see walk back up, we check that they fit our constraints, and copy them into temporaries if not. */ + +/* Check whether TP is an address computation whose base is a call to + .ACCESS_WITH_SIZE. */ + +static bool +is_address_with_access_with_size (tree tp) +{ + if (TREE_CODE (tp) == POINTER_PLUS_EXPR + && is_access_with_size_p (TREE_OPERAND (tp, 0))) + return true; + return false; +} + /* Callback for c_genericize. */ static tree @@ -121,6 +134,20 @@ ubsan_walk_array_refs_r (tree *tp, int *walk_subtrees, void *data) walk_tree (&TREE_OPERAND (*tp, 1), ubsan_walk_array_refs_r, pset, pset); walk_tree (&TREE_OPERAND (*tp, 0), ubsan_walk_array_refs_r, pset, pset); } + else if (TREE_CODE (*tp) == INDIRECT_REF + && is_address_with_access_with_size (TREE_OPERAND (*tp, 0))) + { + ubsan_maybe_instrument_array_ref (&TREE_OPERAND (*tp, 0), false); + /* Make sure ubsan_maybe_instrument_array_ref is not called again on + the POINTER_PLUS_EXPR, so ensure it is not walked again and walk + its subtrees manually. */ + tree aref = TREE_OPERAND (*tp, 0); + pset->add (aref); + *walk_subtrees = 0; + walk_tree (&TREE_OPERAND (aref, 0), ubsan_walk_array_refs_r, pset, pset); + } + else if (is_address_with_access_with_size (*tp)) + ubsan_maybe_instrument_array_ref (tp, true); return NULL_TREE; } diff --git a/gcc/c-family/c-indentation.cc b/gcc/c-family/c-indentation.cc index bb214fc259b7..d378464be150 100644 --- a/gcc/c-family/c-indentation.cc +++ b/gcc/c-family/c-indentation.cc @@ -330,7 +330,7 @@ should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo, if (guard_loc == body_loc || body_loc == next_stmt_loc) return false; - const unsigned int tab_width = global_dc->m_tabstop; + const unsigned int tab_width = global_dc->get_column_options ().m_tabstop; /* They must be in the same file. */ if (next_stmt_exploc.file != body_exploc.file) diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc index c652e82a8c7a..0ec30e852158 100644 --- a/gcc/c-family/c-opts.cc +++ b/gcc/c-family/c-opts.cc @@ -959,6 +959,15 @@ c_common_post_options (const char **pfilename) if (warn_enum_compare == -1) warn_enum_compare = c_dialect_cxx () ? 1 : 0; + /* For C++26 default to -Wkeyword-macro if -Wpedantic. */ + if (cxx_dialect >= cxx26 && pedantic) + { + SET_OPTION_IF_UNSET (&global_options, &global_options_set, + warn_keyword_macro, 1); + if (warn_keyword_macro) + cpp_opts->cpp_warn_keyword_macro = warn_keyword_macro; + } + /* -Wpacked-bitfield-compat is on by default for the C languages. The warning is issued in stor-layout.cc which is not part of the front-end so we need to selectively turn it on here. */ @@ -1191,7 +1200,7 @@ c_common_post_options (const char **pfilename) flag_char8_t = (cxx_dialect >= cxx20) || flag_isoc23; cpp_opts->unsigned_utf8char = flag_char8_t ? 1 : cpp_opts->unsigned_char; - cpp_opts->cpp_tabstop = global_dc->m_tabstop; + cpp_opts->cpp_tabstop = global_dc->get_column_options ().m_tabstop; if (flag_extern_tls_init) { diff --git a/gcc/c-family/c-ubsan.cc b/gcc/c-family/c-ubsan.cc index a4dc31066afb..a64f74e5b1c9 100644 --- a/gcc/c-family/c-ubsan.cc +++ b/gcc/c-family/c-ubsan.cc @@ -548,38 +548,317 @@ ubsan_instrument_bounds (location_t loc, tree array, tree *index, *index, bound); } -/* Return true iff T is an array that was instrumented by SANITIZE_BOUNDS. */ + +/* Instrument array bounds for the pointer array address which is + a call to .ACCESS_WITH_SIZE. We create special + builtin, that gets expanded in the sanopt pass, and make an array + dimention of it. POINTER_ADDR is the pointer array's base address. + *INDEX is an index to the array. + IGNORE_OFF_BY_ONE is true if the POINTER_ADDR is not inside an + INDIRECT_REF. + Return NULL_TREE if no instrumentation is emitted. */ + +tree +ubsan_instrument_bounds_pointer_address (location_t loc, tree pointer_addr, + tree *index, + bool ignore_off_by_one) +{ + tree call = pointer_addr; + if (!is_access_with_size_p (call)) + return NULL_TREE; + tree bound = get_bound_from_access_with_size (call); + + if (ignore_off_by_one) + bound = fold_build2 (PLUS_EXPR, TREE_TYPE (bound), bound, + build_int_cst (TREE_TYPE (bound), + 1)); + + /* Don't emit instrumentation in the most common cases. */ + tree idx = NULL_TREE; + if (TREE_CODE (*index) == INTEGER_CST) + idx = *index; + else if (TREE_CODE (*index) == BIT_AND_EXPR + && TREE_CODE (TREE_OPERAND (*index, 1)) == INTEGER_CST) + idx = TREE_OPERAND (*index, 1); + if (idx + && TREE_CODE (bound) == INTEGER_CST + && tree_int_cst_sgn (idx) >= 0 + && tree_int_cst_lt (idx, bound)) + return NULL_TREE; + + *index = save_expr (*index); + + /* Create an array_type for the corresponding pointer array. */ + tree itype = build_range_type (sizetype, size_zero_node, NULL_TREE); + /* The array's element type can be get from the return type of the call to + .ACCESS_WITH_SIZE. */ + tree element_type = TREE_TYPE (TREE_TYPE (call)); + tree array_type = build_array_type (element_type, itype); + /* Create a "(T *) 0" tree node to describe the array type. */ + tree zero_with_type = build_int_cst (build_pointer_type (array_type), 0); + return build_call_expr_internal_loc (loc, IFN_UBSAN_BOUNDS, + void_type_node, 3, zero_with_type, + *index, bound); +} + +/* This structure is to combine a factor with its parent and its position + * in its parent tree. */ +struct factor_t +{ + tree factor; + tree parent; /* the parent tree of this factor. */ + int pos; /* the position of this factor in its parent tree. */ +}; + +/* for a multiply expression like: + ((long unsigned int) m * (long unsigned int) SAVE_EXPR ) * 4 + + locate all the factors, the parents of the factor and the position of + the factor in its parent, and put them to VEC_FACTORS. */ + +static void +get_factors_from_mul_expr (tree mult_expr, tree parent, + int pos, auto_vec *vec_factors) +{ + struct factor_t mult_factor = {0, 0, -1}; + mult_factor.factor = mult_expr; + mult_factor.parent = parent; + mult_factor.pos = pos; + + while (CONVERT_EXPR_CODE_P (TREE_CODE (mult_expr))) + { + mult_factor.parent = mult_expr; + mult_factor.pos = 0; + mult_expr = TREE_OPERAND (mult_expr, 0); + mult_factor.factor = mult_expr; + } + if (TREE_CODE (mult_expr) != MULT_EXPR) + vec_factors->safe_push (mult_factor); + else + { + get_factors_from_mul_expr (TREE_OPERAND (mult_expr, 0), mult_expr, + 0, vec_factors); + get_factors_from_mul_expr (TREE_OPERAND (mult_expr, 1), mult_expr, + 1, vec_factors); + } +} + +/* Given an OFFSET expression, and the ELEMENT_SIZE, + get the index expression from OFFSET and return it. + For example: + OFFSET: + ((long unsigned int) m * (long unsigned int) SAVE_EXPR ) * 4 + ELEMENT_SIZE: + (sizetype) SAVE_EXPR * 4 + get the index as (long unsigned int) m, and return it. + The INDEX_P holds the pointer to the parent tree of the index, + INDEX_N holds the position of the index in its parent. */ + +static tree +get_index_from_offset (tree offset, tree *index_p, + int *index_n, tree element_size) +{ + if (TREE_CODE (offset) != MULT_EXPR) + return NULL_TREE; + + auto_vec e_factors, o_factors; + get_factors_from_mul_expr (element_size, NULL, -1, &e_factors); + get_factors_from_mul_expr (offset, *index_p, *index_n, &o_factors); + + if (e_factors.is_empty () || o_factors.is_empty ()) + return NULL_TREE; + + bool all_found = true; + for (unsigned i = 0; i < e_factors.length (); i++) + { + factor_t e_size_factor = e_factors[i]; + bool found = false; + for (unsigned j = 0; j < o_factors.length ();) + { + factor_t o_exp_factor = o_factors[j]; + if (operand_equal_p (e_size_factor.factor, o_exp_factor.factor)) + { + o_factors.unordered_remove (j); + found = true; + break; + } + else + j++; + } + if (!found) + all_found = false; + } + + if (!all_found) + return NULL_TREE; + + if (o_factors.length () != 1) + return NULL_TREE; + + *index_p = o_factors[0].parent; + *index_n = o_factors[0].pos; + return o_factors[0].factor; +} + +/* For an pointer + offset computation expression, such as, + .ACCESS_WITH_SIZE (p->c, &p->b, 1, 0, -1, 0B) + + (sizetype) ((long unsigned int) index * 4 + Return the index of this pointer array reference, + set the parent tree of INDEX to *INDEX_P. + set the operand position of the INDEX in the parent tree to *INDEX_N. + If failed, return NULL_TREE. */ + +static tree +get_index_from_pointer_addr_expr (tree pointer, tree *index_p, int *index_n) +{ + *index_p = NULL_TREE; + *index_n = -1; + tree call = TREE_OPERAND (pointer, 0); + if (!is_access_with_size_p (call)) + return NULL_TREE; + + /* Get the pointee type of the call to .ACCESS_WITH_SIZE. + This should be the element type of the pointer array. */ + tree pointee_type = TREE_TYPE (TREE_TYPE (call)); + tree pointee_size = TYPE_SIZE_UNIT (pointee_type); + + tree index_exp = TREE_OPERAND (pointer, 1); + *index_p = pointer; + *index_n = 1; + + if (!(TREE_CODE (index_exp) != MULT_EXPR + && tree_int_cst_equal (pointee_size, integer_one_node))) + { + while (CONVERT_EXPR_CODE_P (TREE_CODE (index_exp))) + { + *index_p = index_exp; + *index_n = 0; + index_exp = TREE_OPERAND (index_exp, 0); + } + index_exp = get_index_from_offset (index_exp, index_p, + index_n, pointee_size); + + if (!index_exp) + return NULL_TREE; + } + + while (CONVERT_EXPR_CODE_P (TREE_CODE (index_exp))) + { + *index_p = index_exp; + *index_n = 0; + index_exp = TREE_OPERAND (index_exp, 0); + } + + return index_exp; +} + +/* Return TRUE when the EXPR is a pointer array address that could be + instrumented. + We only instrument an address computation similar as the following: + .ACCESS_WITH_SIZE (p->c, &p->b, 1, 0, -1, 0B) + + (sizetype) ((long unsigned int) index * 4) + if the EXPR is instrumentable, return TRUE and + set the index to *INDEX. + set the .ACCESS_WITH_SIZE to *BASE. + set the parent tree of INDEX to *INDEX_P. + set the operand position of the INDEX in the parent tree to INDEX_N. */ + +static bool +is_instrumentable_pointer_array_address (tree expr, tree *base, + tree *index, tree *index_p, + int *index_n) +{ + /* For a pointer array address as: + .ACCESS_WITH_SIZE (p->c, &p->b, 1, 0, -1, 0B) + + (sizetype) ((long unsigned int) index * 4) + op0 is the call to .ACCESS_WITH_SIZE; + op1 is the index. */ + if (TREE_CODE (expr) != POINTER_PLUS_EXPR) + return false; + + tree op0 = TREE_OPERAND (expr, 0); + if (!is_access_with_size_p (op0)) + return false; + tree op1 = get_index_from_pointer_addr_expr (expr, index_p, index_n); + if (op1 != NULL_TREE) + { + *base = op0; + *index = op1; + return true; + } + return false; +} + +/* Return true iff T is an array or an indirect reference that was + instrumented by SANITIZE_BOUNDS. */ bool -ubsan_array_ref_instrumented_p (const_tree t) +ubsan_array_ref_instrumented_p (tree t) { - if (TREE_CODE (t) != ARRAY_REF) + if (TREE_CODE (t) != ARRAY_REF + && TREE_CODE (t) != MEM_REF) return false; - tree op1 = TREE_OPERAND (t, 1); - return TREE_CODE (op1) == COMPOUND_EXPR - && TREE_CODE (TREE_OPERAND (op1, 0)) == CALL_EXPR - && CALL_EXPR_FN (TREE_OPERAND (op1, 0)) == NULL_TREE - && CALL_EXPR_IFN (TREE_OPERAND (op1, 0)) == IFN_UBSAN_BOUNDS; + bool is_array = (TREE_CODE (t) == ARRAY_REF); + tree op0 = NULL_TREE; + tree op1 = NULL_TREE; + tree index_p = NULL_TREE; + int index_n = 0; + if (is_array) + { + op1 = TREE_OPERAND (t, 1); + return TREE_CODE (op1) == COMPOUND_EXPR + && TREE_CODE (TREE_OPERAND (op1, 0)) == CALL_EXPR + && CALL_EXPR_FN (TREE_OPERAND (op1, 0)) == NULL_TREE + && CALL_EXPR_IFN (TREE_OPERAND (op1, 0)) == IFN_UBSAN_BOUNDS; + } + else if (is_instrumentable_pointer_array_address (t, &op0, &op1, + &index_p, &index_n)) + return TREE_CODE (op1) == COMPOUND_EXPR + && TREE_CODE (TREE_OPERAND (op1, 0)) == CALL_EXPR + && CALL_EXPR_FN (TREE_OPERAND (op1, 0)) == NULL_TREE + && CALL_EXPR_IFN (TREE_OPERAND (op1, 0)) == IFN_UBSAN_BOUNDS; + + return false; } -/* Instrument an ARRAY_REF, if it hasn't already been instrumented. - IGNORE_OFF_BY_ONE is true if the ARRAY_REF is inside a ADDR_EXPR. */ +/* Instrument an ARRAY_REF or an address computation whose base address is + a call to .ACCESS_WITH_SIZE, if it hasn't already been instrumented. + IGNORE_OFF_BY_ONE is true if the ARRAY_REF is inside a ADDR_EXPR, or the + address computation is not inside a INDIRECT_REF. */ void ubsan_maybe_instrument_array_ref (tree *expr_p, bool ignore_off_by_one) { + tree e = NULL_TREE; + tree op0 = NULL_TREE; + tree op1 = NULL_TREE; + tree index_p = NULL_TREE; /* the parent tree of INDEX. */ + int index_n = 0; /* the operand position of INDEX in the parent tree. */ + if (!ubsan_array_ref_instrumented_p (*expr_p) && sanitize_flags_p (SANITIZE_BOUNDS | SANITIZE_BOUNDS_STRICT) && current_function_decl != NULL_TREE) { - tree op0 = TREE_OPERAND (*expr_p, 0); - tree op1 = TREE_OPERAND (*expr_p, 1); - tree e = ubsan_instrument_bounds (EXPR_LOCATION (*expr_p), op0, &op1, - ignore_off_by_one); + if (TREE_CODE (*expr_p) == ARRAY_REF) + { + op0 = TREE_OPERAND (*expr_p, 0); + op1 = TREE_OPERAND (*expr_p, 1); + index_p = *expr_p; + index_n = 1; + e = ubsan_instrument_bounds (EXPR_LOCATION (*expr_p), op0, + &op1, ignore_off_by_one); + } + else if (is_instrumentable_pointer_array_address (*expr_p, &op0, &op1, + &index_p, &index_n)) + e = ubsan_instrument_bounds_pointer_address (EXPR_LOCATION (*expr_p), + op0, &op1, + ignore_off_by_one); + + /* Replace the original INDEX with the instrumented INDEX. */ if (e != NULL_TREE) - TREE_OPERAND (*expr_p, 1) = build2 (COMPOUND_EXPR, TREE_TYPE (op1), - e, op1); + TREE_OPERAND (index_p, index_n) + = build2 (COMPOUND_EXPR, TREE_TYPE (op1), e, op1); } } diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt index 12877eb0e175..3f5e2f0874d9 100644 --- a/gcc/c-family/c.opt +++ b/gcc/c-family/c.opt @@ -964,6 +964,10 @@ Enum(warn_leading_whitespace_kind) String(tabs) Value(2) EnumValue Enum(warn_leading_whitespace_kind) String(blanks) Value(3) +Wkeyword-macro +C ObjC C++ ObjC++ CPP(cpp_warn_keyword_macro) CppReason(CPP_W_KEYWORD_MACRO) Var(warn_keyword_macro) Init(0) Warning +Warn about defining or undefining macros with identifiers equal to keywords (or for C++ conditional keywords or standard attribute names). + Wleading-whitespace= C ObjC C++ ObjC++ CPP(cpp_warn_leading_whitespace) CppReason(CPP_W_LEADING_WHITESPACE) Enum(warn_leading_whitespace_kind) Joined RejectNegative Var(warn_leading_whitespace) Init(0) Warning Warn about leading whitespace style issues on lines except when in raw string literals. @@ -1106,6 +1110,10 @@ Wnoexcept-type C++ ObjC++ Warning Var(warn_noexcept_type) LangEnabledBy(C++ ObjC++,Wabi || Wc++17-compat) Warn if C++17 noexcept function type will change the mangled name of a symbol. +Wnon-c-typedef-for-linkage +C++ ObjC++ Var(warn_non_c_typedef_for_linkage) Init(1) Warning +Warn for non-C compatible unnamed classes with a typedef name for linkage purposes. + Wnon-template-friend C++ ObjC++ Var(warn_nontemplate_friend) Init(1) Warning Warn when non-templatized friend functions are declared within a template. diff --git a/gcc/c-family/c.opt.urls b/gcc/c-family/c.opt.urls index 5c97593d703b..e09d51d8afb9 100644 --- a/gcc/c-family/c.opt.urls +++ b/gcc/c-family/c.opt.urls @@ -508,6 +508,9 @@ UrlSuffix(gcc/Warning-Options.html#index-Winvalid-utf8) Wjump-misses-init UrlSuffix(gcc/Warning-Options.html#index-Wjump-misses-init) +Wkeyword-macro +UrlSuffix(gcc/Warning-Options.html#index-Wkeyword-macro) + Wleading-whitespace= UrlSuffix(gcc/Warning-Options.html#index-Wleading-whitespace_003d) @@ -610,6 +613,9 @@ UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-noexcept) Wnoexcept-type UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-noexcept-type) +Wnon-c-typedef-for-linkage +UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-non-c-typedef-for-linkage) + Wnon-template-friend UrlSuffix(gcc/C_002b_002b-Dialect-Options.html#index-Wno-non-template-friend) diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 464e5a1883af..bb0b8a9b541f 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,123 @@ +2025-08-26 Sandra Loosemore + + PR middle-end/118839 + * c-parser.cc (c_finish_omp_declare_variant): Error if variant + is the same as base. + +2025-08-26 Sandra Loosemore + + * c-parser.cc (c_finish_omp_declare_variant): Rework diagnostic + code. Do not record variant if there are errors. Make check for + a missing "match" clause unconditional. + +2025-08-21 Andrew Pinski + + PR c/121478 + * c-fold.cc (c_fully_fold_internal): Fold nullptr_t ==/!= nullptr_t. + * c-typeck.cc (convert_arguments): Handle conversion from nullptr_t + for varargs. + (convert_for_assignment): Handle conversions from nullptr_t to + pointer type specially. + +2025-08-18 Indu Bhagat + + * c-parser.cc (c_parser_declaration_or_fndef): Use + 'sanitize_code_type' instead of 'unsigned int'. + +2025-08-15 Jakub Jelinek + + PR preprocessor/120778 + PR target/121520 + * c-decl.cc (c_init_decl_processing): Use cpp_warn instead of + cpp_lookup and NODE_WARN bit setting. + +2025-08-15 Qing Zhao + + * c-tree.h (handle_counted_by_for_component_ref): New prototype of + build_component_ref and handle_counted_by_for_component_ref. + * c-parser.cc (c_parser_postfix_expression): Call the new prototypes + of build_component_ref and handle_counted_by_for_component_ref, + update comments. + * c-typeck.cc (default_function_array_read_conversion): Likewise. + (convert_lvalue_to_rvalue): Likewise. + (default_conversion): Likewise. + (handle_counted_by_p): Update comments. + (handle_counted_by_for_component_ref): Delete one argument. + (build_component_ref): Delete one argument. Delete the call to + handle_counted_by_for_component_ref completely. + (build_array_ref): Generate call to .ACCESS_WITH_SIZE for array. + +2025-08-15 Qing Zhao + + * c-decl.cc (verify_counted_by_attribute): Change the 2nd argument + to a vector of fields with counted_by attribute. Verify all fields + in this vector. + (finish_struct): Collect all the fields with counted_by attribute + to a vector and pass this vector to verify_counted_by_attribute. + * c-tree.h (handle_counted_by_for_component_ref): New prototype of + handle_counted_by_form_component_ref. + * c-parser.cc (c_parser_postfix_expression): Call the new prototype + of handle_counted_by_for_component_ref. + * c-typeck.cc (default_function_array_read_conversion): Only generate + call to .ACCESS_WITH_SIZE for a pointer field when it's a read. + (convert_lvalue_to_rvalue): Likewise. + (default_conversion): Likewise. + (handle_counted_by_p): New routine. + (check_counted_by_attribute): New routine. + (build_counted_by_ref): Handle pointers with counted_by. + (build_access_with_size_for_counted_by): Handle pointers with counted_by. + (handle_counted_by_for_component_ref): Add one more argument. + (build_component_ref): Call the new prototype of + handle_counted_by_for_component_ref. + +2025-08-07 Jakub Jelinek + + PR preprocessor/120778 + * c-decl.cc (c_init_decl_processing): Mark cpp nodes corresponding + to keywords as NODE_WARN if warn_keyword_macro. + +2025-08-06 Alexandre Oliva + + * c-tree.h (C_BOOLEAN_TYPE_P): Cover hardbools as well. + * c-typeck.cc (convert_lvalue_to_rvalue): New overload and + wrapper. + (build_atomic_assign, build_modify_expr): Use it. + (build_asm_expr, handle_omp-array_sections_1): Simplify with + it. + (build_unary_op): Handle hardbools. + +2025-08-06 Martin Uecker + + PR c/108931 + * c-typeck.cc (composite_type_cond): Renamed from + composite_type with argument for condition + (composite_type): New function. + (composite_type_internal): Implement new logic. + (build_conditional_expr): Pass condition. + (common_pointer_type): Adapt. + (pointer_diff): Adapt. + (build_binary_op): Adapt. + +2025-08-06 Martin Uecker + + PR c/121217 + * c-typeck.cc (tagged_types_tu_compatible_p): Add check. + +2025-08-06 Kwok Cheung Yeung + + * c-parser.cc (c_parser_omp_clause_from_to): Parse 'iterator' modifier. + * c-typeck.cc (c_finish_omp_clauses): Finish iterators for to/from + clauses. + +2025-08-06 Kwok Cheung Yeung + Andrew Stubbs + + * c-parser.cc (c_parser_omp_variable_list): Use location of the + map expression as the clause location. + (c_parser_omp_clause_map): Parse 'iterator' modifier. + * c-typeck.cc (c_finish_omp_clauses): Finish iterators. Apply + iterators to generated clauses. + 2025-08-02 Martin Uecker * c-decl.cc (get_parm_array_spec): Remove. diff --git a/gcc/c/c-decl.cc b/gcc/c/c-decl.cc index 7850365f35c6..77006cacdb7e 100644 --- a/gcc/c/c-decl.cc +++ b/gcc/c/c-decl.cc @@ -4825,6 +4825,28 @@ c_init_decl_processing (void) make_fname_decl = c_make_fname_decl; start_fname_decls (); + + if (warn_keyword_macro) + { + for (unsigned int i = 0; i < num_c_common_reswords; ++i) + /* For C register keywords which don't start with underscore + or start with just single underscore. Don't complain about + ObjC or Transactional Memory keywords. */ + if (c_common_reswords[i].word[0] == '_' + && c_common_reswords[i].word[1] == '_') + continue; + else if (c_common_reswords[i].disable + & (D_TRANSMEM | D_OBJC | D_CXX_OBJC)) + continue; + else + { + tree id = get_identifier (c_common_reswords[i].word); + if (C_IS_RESERVED_WORD (id) + && C_RID_CODE (id) != RID_CXX_COMPAT_WARN) + cpp_warn (parse_in, IDENTIFIER_POINTER (id), + IDENTIFIER_LENGTH (id)); + } + } } /* Create the VAR_DECL at LOC for __FUNCTION__ etc. ID is the name to @@ -9281,56 +9303,62 @@ c_update_type_canonical (tree t) } } -/* Verify the argument of the counted_by attribute of the flexible array - member FIELD_DECL is a valid field of the containing structure, - STRUCT_TYPE, Report error and remove this attribute when it's not. */ +/* Verify the argument of the counted_by attribute of each of the + FIELDS_WITH_COUNTED_BY is a valid field of the containing structure, + STRUCT_TYPE, Report error and remove the corresponding attribute + when it's not. */ static void -verify_counted_by_attribute (tree struct_type, tree field_decl) +verify_counted_by_attribute (tree struct_type, + auto_vec *fields_with_counted_by) { - tree attr_counted_by = lookup_attribute ("counted_by", - DECL_ATTRIBUTES (field_decl)); + for (tree field_decl : *fields_with_counted_by) + { + tree attr_counted_by = lookup_attribute ("counted_by", + DECL_ATTRIBUTES (field_decl)); - if (!attr_counted_by) - return; + if (!attr_counted_by) + continue; - /* If there is an counted_by attribute attached to the field, - verify it. */ + /* If there is an counted_by attribute attached to the field, + verify it. */ - tree fieldname = TREE_VALUE (TREE_VALUE (attr_counted_by)); + tree fieldname = TREE_VALUE (TREE_VALUE (attr_counted_by)); - /* Verify the argument of the attrbute is a valid field of the - containing structure. */ + /* Verify the argument of the attrbute is a valid field of the + containing structure. */ - tree counted_by_field = lookup_field (struct_type, fieldname); + tree counted_by_field = lookup_field (struct_type, fieldname); - /* Error when the field is not found in the containing structure and - remove the corresponding counted_by attribute from the field_decl. */ - if (!counted_by_field) - { - error_at (DECL_SOURCE_LOCATION (field_decl), - "argument %qE to the % attribute" - " is not a field declaration in the same structure" - " as %qD", fieldname, field_decl); - DECL_ATTRIBUTES (field_decl) - = remove_attribute ("counted_by", DECL_ATTRIBUTES (field_decl)); - } - else - /* Error when the field is not with an integer type. */ - { - while (TREE_CHAIN (counted_by_field)) - counted_by_field = TREE_CHAIN (counted_by_field); - tree real_field = TREE_VALUE (counted_by_field); - - if (!INTEGRAL_TYPE_P (TREE_TYPE (real_field))) + /* Error when the field is not found in the containing structure and + remove the corresponding counted_by attribute from the field_decl. */ + if (!counted_by_field) { error_at (DECL_SOURCE_LOCATION (field_decl), "argument %qE to the % attribute" - " is not a field declaration with an integer type", - fieldname); + " is not a field declaration in the same structure" + " as %qD", fieldname, field_decl); DECL_ATTRIBUTES (field_decl) = remove_attribute ("counted_by", DECL_ATTRIBUTES (field_decl)); } + else + /* Error when the field is not with an integer type. */ + { + while (TREE_CHAIN (counted_by_field)) + counted_by_field = TREE_CHAIN (counted_by_field); + tree real_field = TREE_VALUE (counted_by_field); + + if (!INTEGRAL_TYPE_P (TREE_TYPE (real_field))) + { + error_at (DECL_SOURCE_LOCATION (field_decl), + "argument %qE to the % attribute" + " is not a field declaration with an integer type", + fieldname); + DECL_ATTRIBUTES (field_decl) + = remove_attribute ("counted_by", + DECL_ATTRIBUTES (field_decl)); + } + } } } @@ -9405,7 +9433,7 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes, until now.) */ bool saw_named_field = false; - tree counted_by_fam_field = NULL_TREE; + auto_vec fields_with_counted_by; for (x = fieldlist; x; x = DECL_CHAIN (x)) { /* Whether this field is the last field of the structure or union. @@ -9486,9 +9514,16 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes, record it here and do more verification later after the whole structure is complete. */ if (lookup_attribute ("counted_by", DECL_ATTRIBUTES (x))) - counted_by_fam_field = x; + fields_with_counted_by.safe_push (x); } + if (TREE_CODE (TREE_TYPE (x)) == POINTER_TYPE) + /* If there is a counted_by attribute attached to this field, + record it here and do more verification later after the + whole structure is complete. */ + if (lookup_attribute ("counted_by", DECL_ATTRIBUTES (x))) + fields_with_counted_by.safe_push (x); + if (pedantic && TREE_CODE (t) == RECORD_TYPE && flexible_array_type_p (TREE_TYPE (x))) pedwarn (DECL_SOURCE_LOCATION (x), OPT_Wpedantic, @@ -9787,8 +9822,8 @@ finish_struct (location_t loc, tree t, tree fieldlist, tree attributes, struct_parse_info->struct_types.safe_push (t); } - if (counted_by_fam_field) - verify_counted_by_attribute (t, counted_by_fam_field); + if (fields_with_counted_by.length () > 0) + verify_counted_by_attribute (t, &fields_with_counted_by); return t; } diff --git a/gcc/c/c-fold.cc b/gcc/c/c-fold.cc index d54ab3cf4793..3f6e4b469b9d 100644 --- a/gcc/c/c-fold.cc +++ b/gcc/c/c-fold.cc @@ -369,7 +369,25 @@ c_fully_fold_internal (tree expr, bool in_init, bool *maybe_const_operands, || TREE_CODE (op1) != INTEGER_CST)) goto out; - if (op0 != orig_op0 || op1 != orig_op1 || in_init) + if (TREE_CODE_CLASS (code) == tcc_comparison + && TREE_CODE (TREE_TYPE (op0)) == NULLPTR_TYPE + && TREE_CODE (TREE_TYPE (op1)) == NULLPTR_TYPE) + { + switch (code) + { + case EQ_EXPR: + ret = constant_boolean_node (true, TREE_TYPE (expr)); + break; + case NE_EXPR: + ret = constant_boolean_node (false, TREE_TYPE (expr)); + break; + default: + gcc_unreachable (); + } + ret = omit_two_operands_loc (loc, TREE_TYPE (expr), ret, + op0, op1); + } + else if (op0 != orig_op0 || op1 != orig_op1 || in_init) ret = in_init ? fold_build2_initializer_loc (loc, code, TREE_TYPE (expr), op0, op1) : fold_build2_loc (loc, code, TREE_TYPE (expr), op0, op1); diff --git a/gcc/c/c-parser.cc b/gcc/c/c-parser.cc index 4a13fc0d3842..566ab1baebed 100644 --- a/gcc/c/c-parser.cc +++ b/gcc/c/c-parser.cc @@ -2822,7 +2822,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, specs->constexpr_p, &richloc); /* A parameter is initialized, which is invalid. Don't attempt to instrument the initializer. */ - int flag_sanitize_save = flag_sanitize; + sanitize_code_type flag_sanitize_save = flag_sanitize; if (nested && !empty_ok) flag_sanitize = 0; init = c_parser_expr_no_commas (parser, NULL); @@ -2911,7 +2911,7 @@ c_parser_declaration_or_fndef (c_parser *parser, bool fndef_ok, specs->constexpr_p, &richloc); /* A parameter is initialized, which is invalid. Don't attempt to instrument the initializer. */ - int flag_sanitize_save = flag_sanitize; + sanitize_code_type flag_sanitize_save = flag_sanitize; if (TREE_CODE (d) == PARM_DECL) flag_sanitize = 0; init = c_parser_initializer (parser, d); @@ -11876,12 +11876,9 @@ c_parser_postfix_expression (c_parser *parser) if (c_parser_next_token_is (parser, CPP_NAME)) { c_token *comp_tok = c_parser_peek_token (parser); - /* Ignore the counted_by attribute for reference inside - offsetof since the information is not useful at all. */ offsetof_ref = build_component_ref (loc, offsetof_ref, comp_tok->value, - comp_tok->location, UNKNOWN_LOCATION, - false); + comp_tok->location, UNKNOWN_LOCATION); c_parser_consume_token (parser); while (c_parser_next_token_is (parser, CPP_DOT) || c_parser_next_token_is (parser, @@ -11908,14 +11905,11 @@ c_parser_postfix_expression (c_parser *parser) break; } c_token *comp_tok = c_parser_peek_token (parser); - /* Ignore the counted_by attribute for reference inside - offsetof since the information is not useful. */ offsetof_ref = build_component_ref (loc, offsetof_ref, comp_tok->value, comp_tok->location, - UNKNOWN_LOCATION, - false); + UNKNOWN_LOCATION); c_parser_consume_token (parser); } else @@ -12701,7 +12695,7 @@ c_parser_postfix_expression (c_parser *parser) /* If the array ref is inside TYPEOF or ALIGNOF, the call to .ACCESS_WITH_SIZE was not generated by the routine build_component_ref by default, we should generate it here. */ - if ((in_typeof || in_alignof) && TREE_CODE (ref) == COMPONENT_REF) + if (TREE_CODE (ref) == COMPONENT_REF) ref = handle_counted_by_for_component_ref (loc, ref); if (has_counted_by_object (ref)) @@ -16659,7 +16653,7 @@ c_parser_omp_variable_list (c_parser *parser, || CONVERT_EXPR_P (decl)) decl = TREE_OPERAND (decl, 0); - tree u = build_omp_clause (clause_loc, kind); + tree u = build_omp_clause (loc, kind); OMP_CLAUSE_DECL (u) = decl; OMP_CLAUSE_CHAIN (u) = list; list = u; @@ -20072,7 +20066,7 @@ c_parser_omp_clause_doacross (c_parser *parser, tree list) map ( [map-type-modifier[,] ...] map-kind: variable-list ) map-type-modifier: - always | close */ + always | close | present | iterator (iterators-definition) */ static tree c_parser_omp_clause_map (c_parser *parser, tree list, bool declare_mapper_p) @@ -20087,15 +20081,35 @@ c_parser_omp_clause_map (c_parser *parser, tree list, bool declare_mapper_p) int pos = 1; int map_kind_pos = 0; - while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME) + int iterator_length = 0; + for (;;) { - if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COLON) + c_token *tok = c_parser_peek_nth_token_raw (parser, pos); + if (tok->type != CPP_NAME) + break; + + const char *p = IDENTIFIER_POINTER (tok->value); + c_token *next_tok = c_parser_peek_nth_token_raw (parser, pos + 1); + if (strcmp (p, "iterator") == 0 && next_tok->type == CPP_OPEN_PAREN) + { + unsigned n = pos + 2; + if (c_parser_check_balanced_raw_token_sequence (parser, &n) + && c_parser_peek_nth_token_raw (parser, n)->type + == CPP_CLOSE_PAREN) + { + iterator_length = n - pos + 1; + pos = n; + next_tok = c_parser_peek_nth_token_raw (parser, pos + 1); + } + } + + if (next_tok->type == CPP_COLON) { map_kind_pos = pos; break; } - if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COMMA) + if (next_tok->type == CPP_COMMA) pos++; else if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_OPEN_PAREN) @@ -20117,6 +20131,7 @@ c_parser_omp_clause_map (c_parser *parser, tree list, bool declare_mapper_p) int present_modifier = 0; int mapper_modifier = 0; tree mapper_name = NULL_TREE; + tree iterators = NULL_TREE; for (int pos = 1; pos < map_kind_pos; ++pos) { c_token *tok = c_parser_peek_token (parser); @@ -20150,6 +20165,17 @@ c_parser_omp_clause_map (c_parser *parser, tree list, bool declare_mapper_p) close_modifier++; c_parser_consume_token (parser); } + else if (strcmp ("iterator", p) == 0) + { + if (iterators) + { + c_parser_error (parser, "too many % modifiers"); + parens.skip_until_found_close (parser); + return list; + } + iterators = c_parser_omp_iterators (parser); + pos += iterator_length - 1; + } else if (strcmp ("mapper", p) == 0) { c_parser_consume_token (parser); @@ -20223,8 +20249,8 @@ c_parser_omp_clause_map (c_parser *parser, tree list, bool declare_mapper_p) else { c_parser_error (parser, "% clause with map-type modifier other " - "than %, %, % or " - "%"); + "than %, %, %, " + "% or %"); parens.skip_until_found_close (parser); return list; } @@ -20273,9 +20299,19 @@ c_parser_omp_clause_map (c_parser *parser, tree list, bool declare_mapper_p) tree last_new = NULL_TREE; + if (iterators) + { + tree block = pop_scope (); + if (iterators == error_mark_node) + iterators = NULL_TREE; + else + TREE_VEC_ELT (iterators, 5) = block; + } + for (c = nl; c != list; c = OMP_CLAUSE_CHAIN (c)) { OMP_CLAUSE_SET_MAP_KIND (c, kind); + OMP_CLAUSE_ITERATORS (c) = iterators; last_new = c; } @@ -20534,8 +20570,11 @@ c_parser_omp_clause_device_type (c_parser *parser, tree list) to ( variable-list ) OpenMP 5.1: - from ( [present :] variable-list ) - to ( [present :] variable-list ) */ + from ( [motion-modifier[,] [motion-modifier[,]...]:] variable-list ) + to ( [motion-modifier[,] [motion-modifier[,]...]:] variable-list ) + + motion-modifier: + present | iterator (iterators-definition) */ static tree c_parser_omp_clause_from_to (c_parser *parser, enum omp_clause_code kind, @@ -20546,18 +20585,85 @@ c_parser_omp_clause_from_to (c_parser *parser, enum omp_clause_code kind, if (!parens.require_open (parser)) return list; + int pos = 1, colon_pos = 0; + int iterator_length = 0; + + while (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_NAME) + { + const char *identifier = + IDENTIFIER_POINTER (c_parser_peek_nth_token_raw (parser, pos)->value); + if (c_parser_peek_nth_token_raw (parser, pos + 1)->type + == CPP_OPEN_PAREN) + { + unsigned int npos = pos + 2; + if (c_parser_check_balanced_raw_token_sequence (parser, &npos) + && (c_parser_peek_nth_token_raw (parser, npos)->type + == CPP_CLOSE_PAREN)) + { + if (strcmp (identifier, "iterator") == 0) + iterator_length = npos - pos + 1; + pos = npos; + } + } + if (c_parser_peek_nth_token_raw (parser, pos + 1)->type == CPP_COMMA) + pos += 2; + else + pos++; + if (c_parser_peek_nth_token_raw (parser, pos)->type == CPP_COLON) + { + colon_pos = pos; + break; + } + } + bool present = false; - c_token *token = c_parser_peek_token (parser); + tree iterators = NULL_TREE; - if (token->type == CPP_NAME - && strcmp (IDENTIFIER_POINTER (token->value), "present") == 0 - && c_parser_peek_2nd_token (parser)->type == CPP_COLON) + for (int pos = 1; pos < colon_pos; ++pos) { - present = true; - c_parser_consume_token (parser); - c_parser_consume_token (parser); + c_token *token = c_parser_peek_token (parser); + if (token->type == CPP_COMMA) + { + c_parser_consume_token (parser); + continue; + } + const char *p = IDENTIFIER_POINTER (token->value); + if (strcmp ("present", p) == 0) + { + if (present) + { + c_parser_error (parser, "too many % modifiers"); + parens.skip_until_found_close (parser); + return list; + } + present = true; + c_parser_consume_token (parser); + } + else if (strcmp ("iterator", p) == 0) + { + if (iterators) + { + c_parser_error (parser, "too many % modifiers"); + parens.skip_until_found_close (parser); + return list; + } + iterators = c_parser_omp_iterators (parser); + pos += iterator_length - 1; + } + else + { + error_at (token->location, + "%qs clause with modifier other than % or " + "%", + kind == OMP_CLAUSE_TO ? "to" : "from"); + parens.skip_until_found_close (parser); + return list; + } } + if (colon_pos) + c_parser_require (parser, CPP_COLON, "expected %<:%>"); + tree nl = c_parser_omp_variable_list (parser, loc, kind, list); parens.skip_until_found_close (parser); @@ -20565,6 +20671,19 @@ c_parser_omp_clause_from_to (c_parser *parser, enum omp_clause_code kind, for (tree c = nl; c != list; c = OMP_CLAUSE_CHAIN (c)) OMP_CLAUSE_MOTION_PRESENT (c) = 1; + if (iterators) + { + tree block = pop_scope (); + if (iterators == error_mark_node) + iterators = NULL_TREE; + else + TREE_VEC_ELT (iterators, 5) = block; + } + + if (iterators) + for (tree c = nl; c != list; c = OMP_CLAUSE_CHAIN (c)) + OMP_CLAUSE_ITERATORS (c) = iterators; + return nl; } @@ -27185,6 +27304,30 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) undeclared_variable (token->location, token->value); variant = error_mark_node; } + else if (TREE_CODE (variant) != FUNCTION_DECL) + { + error_at (token->location, "variant %qD is not a function", + variant); + variant = error_mark_node; + } + else if (fndecl_built_in_p (variant) + && (strncmp (IDENTIFIER_POINTER (DECL_NAME (variant)), + "__builtin_", strlen ("__builtin_")) == 0 + || strncmp (IDENTIFIER_POINTER (DECL_NAME (variant)), + "__sync_", strlen ("__sync_")) == 0 + || strncmp (IDENTIFIER_POINTER (DECL_NAME (variant)), + "__atomic_", strlen ("__atomic_")) == 0)) + { + error_at (token->location, "variant %qD is a built-in", + variant); + variant = error_mark_node; + } + else if (variant == fndecl) + { + error_at (token->location, "variant %qD is the same as base function", + variant); + variant = error_mark_node; + } c_parser_consume_token (parser); @@ -27258,30 +27401,6 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) goto fail; ctx = omp_check_context_selector (match_loc, ctx, OMP_CTX_DECLARE_VARIANT); - if (ctx != error_mark_node && variant != error_mark_node) - { - if (TREE_CODE (variant) != FUNCTION_DECL) - { - error_at (token->location, "variant %qD is not a function", - variant); - variant = error_mark_node; - } - else if (fndecl_built_in_p (variant) - && (strncmp (IDENTIFIER_POINTER (DECL_NAME (variant)), - "__builtin_", strlen ("__builtin_")) - == 0 - || strncmp (IDENTIFIER_POINTER (DECL_NAME (variant)), - "__sync_", strlen ("__sync_")) - == 0 - || strncmp (IDENTIFIER_POINTER (DECL_NAME (variant)), - "__atomic_", strlen ("__atomic_")) - == 0)) - { - error_at (token->location, "variant %qD is a built-in", - variant); - variant = error_mark_node; - } - } } else if (ccode == adjust_args) { @@ -27423,18 +27542,64 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) parens.require_close (parser); } while (c_parser_next_token_is_not (parser, CPP_PRAGMA_EOL)); + if (variant != error_mark_node && !has_match) + { + c_parser_error (parser, "expected % clause"); + variant = error_mark_node; + } c_parser_skip_to_pragma_eol (parser); - if ((ctx != error_mark_node && variant != error_mark_node) + /* At this point, we have completed parsing of the pragma, now it's + on to error checking. */ + if (variant == error_mark_node || ctx == error_mark_node) + /* Previously diagnosed error. */ + return; + + if ((has_adjust_args || append_args_tree) && !omp_get_context_selector (ctx, OMP_TRAIT_SET_CONSTRUCT, - OMP_TRAIT_CONSTRUCT_SIMD)) + OMP_TRAIT_CONSTRUCT_DISPATCH)) { - bool fail = false; - if (append_args_tree) + error_at (has_adjust_args ? adjust_args_loc : append_args_loc, + "an %qs clause can only be specified if the " + "% selector of the % selector " + "set appears in the % clause", + has_adjust_args ? "adjust_args" : "append_args"); + return; + } + + if (!omp_get_context_selector (ctx, OMP_TRAIT_SET_CONSTRUCT, + OMP_TRAIT_CONSTRUCT_SIMD)) + /* Check that the base and variant have compatible types. */ + { + tree base_type = TREE_TYPE (fndecl); + tree variant_type = TREE_TYPE (variant); + bool unprototyped_variant + = (TYPE_ARG_TYPES (variant_type) == NULL_TREE + && !TYPE_NO_NAMED_ARGS_STDARG_P (variant_type)); + + if (append_args_tree + && TYPE_ARG_TYPES (base_type) == NULL_TREE + && !TYPE_NO_NAMED_ARGS_STDARG_P (base_type)) + { + /* The base function is a pre-C23 unprototyped function. Without + a prototype, we don't know the offset where the append_args go. + That offset needs to be stored with the append_args in the + variant function attributes, so we cannot presently handle + this case. */ + sorry_at (append_args_loc, + "% with unprototyped base function " + "is not supported yet"); + inform (DECL_SOURCE_LOCATION (fndecl), + "base function %qD declared here", fndecl); + return; + } + else if (append_args_tree) { + /* Find nbase_args, the number of fixed arguments in the base + function. */ int nappend_args = 0; int nbase_args = 0; - for (tree t = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); + for (tree t = TYPE_ARG_TYPES (base_type); t && TREE_VALUE (t) != void_type_node; t = TREE_CHAIN (t)) nbase_args++; for (tree t = append_args_tree; t; t = TREE_CHAIN (t)) @@ -27445,135 +27610,117 @@ c_finish_omp_declare_variant (c_parser *parser, tree fndecl, tree parms) append_args_tree = build_tree_list (build_int_cst (integer_type_node, nbase_args), append_args_tree); - tree args, arg; - args = arg = TYPE_ARG_TYPES (TREE_TYPE (variant)); - for (int j = 0; j < nbase_args && arg; j++, arg = TREE_CHAIN (arg)) - args = arg; - for (int i = 0; i < nappend_args && arg; i++) - arg = TREE_CHAIN (arg); - tree saved_args; - if (nbase_args && args) - { - saved_args = TREE_CHAIN (args); - TREE_CHAIN (args) = arg; + + /* Give a specific diagnostic if the append_args parameters + of the variant are of the wrong type, or missing. The + compatible types test below could fail to detect this if + the variant is a varargs function. */ + if (!unprototyped_variant) + { + tree args = TYPE_ARG_TYPES (variant_type); + for (int i = 0; args && i < nbase_args; + i++, args = TREE_CHAIN (args)) + ; + for (int i = 0; i < nappend_args; i++, args = TREE_CHAIN (args)) + if (!args || !c_omp_interop_t_p (TREE_VALUE (args))) + { + error_at (DECL_SOURCE_LOCATION (variant), + "argument %d of %qD must be of " + "%", + nbase_args + i + 1, variant); + inform (append_args_loc, + "% specified here"); + return; + } } - else + + /* Perform the "implementation defined transformation" on the type + of the base function to add the append_args before checking it + for compatibility with the function variant's type. */ + tree args = TYPE_ARG_TYPES (base_type); + tree newargs = NULL_TREE; + tree lastarg = NULL_TREE; + for (int j = 0; j < nbase_args; j++, args = TREE_CHAIN (args)) { - saved_args = args; - TYPE_ARG_TYPES (TREE_TYPE (variant)) = arg; - TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (variant)) = 1; + tree t = tree_cons (TREE_PURPOSE (args), + TREE_VALUE (args), NULL_TREE); + if (lastarg) + TREE_CHAIN (lastarg) = t; + else + newargs = t; + lastarg = t; } - if (!comptypes (TREE_TYPE (fndecl), TREE_TYPE (variant))) - fail = true; - if (nbase_args && args) - TREE_CHAIN (args) = saved_args; - else + tree type = lookup_name (get_identifier ("omp_interop_t")); + type = type ? TREE_TYPE (type) : pointer_sized_int_node; + for (int j = 0; j < nappend_args; j++) { - TYPE_ARG_TYPES (TREE_TYPE (variant)) = saved_args; - TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (variant)) = 0; + tree t = tree_cons (NULL_TREE, type, NULL_TREE); + if (lastarg) + TREE_CHAIN (lastarg) = t; + else + newargs = t; + lastarg = t; } - arg = saved_args; - if (!fail) - for (int i = 0; i < nappend_args; i++, arg = TREE_CHAIN (arg)) - if (!arg || !c_omp_interop_t_p (TREE_VALUE (arg))) - { - error_at (DECL_SOURCE_LOCATION (variant), - "argument %d of %qD must be of %", - nbase_args + i + 1, variant); - inform (append_args_loc, "% specified here"); - break; - } - } - else - { - if (comptypes (TREE_TYPE (fndecl), TREE_TYPE (variant))) + TREE_CHAIN (lastarg) = args; + + /* Temporarily stuff newargs into the original base_type. */ + tree saveargs = TYPE_ARG_TYPES (base_type); + TYPE_ARG_TYPES (base_type) = newargs; + bool fail = !comptypes (base_type, variant_type); + TYPE_ARG_TYPES (base_type) = saveargs; + + if (fail) { - if (TYPE_ARG_TYPES (TREE_TYPE (variant)) == NULL_TREE - && TYPE_ARG_TYPES (TREE_TYPE (fndecl)) != NULL_TREE) - { - if (!append_args_tree) - TYPE_ARG_TYPES (TREE_TYPE (variant)) - = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); - else - { - tree new_args = NULL_TREE; - tree arg, last_arg = NULL_TREE; - for (arg = TYPE_ARG_TYPES (TREE_TYPE (fndecl)); - arg && arg != void_type_node; arg = TREE_CHAIN (arg)) - { - if (new_args == NULL_TREE) - new_args = last_arg = copy_node (arg); - else - { - TREE_CHAIN (last_arg) = copy_node (arg); - last_arg = TREE_CHAIN (last_arg); - } - } - for (tree t3 = append_args_tree; t3; t3 = TREE_CHAIN (t3)) - { - tree type = lookup_name (get_identifier ("omp_interop_t")); - type = type ? TREE_TYPE (type) : ptr_type_node; - last_arg = tree_cons (NULL_TREE, type, last_arg); - } - TREE_CHAIN (last_arg) = arg; - TYPE_ARG_TYPES (TREE_TYPE (variant)) = new_args; - } - } + error_at (token->location, + "variant %qD and base %qD have incompatible types " + "after % adjustment", + variant, fndecl); + inform (DECL_SOURCE_LOCATION (variant), + "% candidate %qD declared here", + variant); + return; } - else - fail = true; + else if (unprototyped_variant) + /* If we've got an unprototyped variant, copy the transformed + base arg types to the variant. This is needed later by + modify_call_for_omp_dispatch. */ + TYPE_ARG_TYPES (variant_type) = newargs; } - if (fail) - { - error_at (token->location, - "variant %qD and base %qD have incompatible types", - variant, fndecl); - variant = error_mark_node; - } - } - if (ctx != error_mark_node && variant != error_mark_node) - { - C_DECL_USED (variant) = 1; - tree construct = omp_get_context_selector_list (ctx, - OMP_TRAIT_SET_CONSTRUCT); - omp_mark_declare_variant (match_loc, variant, construct); - if (omp_context_selector_matches (ctx, NULL_TREE, false)) + else /* No append_args present. */ { - tree attr = tree_cons (get_identifier ("omp declare variant base"), - build_tree_list (variant, ctx), - DECL_ATTRIBUTES (fndecl)); - DECL_ATTRIBUTES (fndecl) = attr; + if (!comptypes (base_type, variant_type)) + { + error_at (token->location, + "variant %qD and base %qD have incompatible types", + variant, fndecl); + inform (DECL_SOURCE_LOCATION (variant), + "% candidate %qD declared here", + variant); + return; + } + else if (TYPE_ARG_TYPES (variant_type) == NULL_TREE + && !TYPE_NO_NAMED_ARGS_STDARG_P (variant_type) + && TYPE_ARG_TYPES (base_type) != NULL_TREE) + /* If we've got an unprototyped variant but the base has + a prototype, copy the base arg types to the variant. */ + TYPE_ARG_TYPES (variant_type) = TYPE_ARG_TYPES (base_type); } } - if (has_adjust_args || append_args_tree) + /* If we made it here, store the parsed information. */ + C_DECL_USED (variant) = 1; + tree construct = omp_get_context_selector_list (ctx, + OMP_TRAIT_SET_CONSTRUCT); + omp_mark_declare_variant (match_loc, variant, construct); + if (omp_context_selector_matches (ctx, NULL_TREE, false)) { - if (!has_match) - { - error_at (has_adjust_args ? adjust_args_loc : append_args_loc, - "an %qs clause requires a % clause", - has_adjust_args ? "adjust_args" : "append_args"); - } - else if (ctx != error_mark_node && variant != error_mark_node) - { - tree attr = lookup_attribute ("omp declare variant base", - DECL_ATTRIBUTES (fndecl)); - if (attr != NULL_TREE) - { - tree ctx = TREE_VALUE (TREE_VALUE (attr)); - if (!omp_get_context_selector (ctx, OMP_TRAIT_SET_CONSTRUCT, - OMP_TRAIT_CONSTRUCT_DISPATCH)) - error_at (has_adjust_args ? adjust_args_loc : append_args_loc, - "an %qs clause can only be specified if the " - "% selector of the % selector " - "set appears in the % clause", - has_adjust_args ? "adjust_args" : "append_args"); - } - } + tree attr = tree_cons (get_identifier ("omp declare variant base"), + build_tree_list (variant, ctx), + DECL_ATTRIBUTES (fndecl)); + DECL_ATTRIBUTES (fndecl) = attr; } - if ((ctx != error_mark_node && variant != error_mark_node) - && (need_device_ptr_list || append_args_tree)) + if (need_device_ptr_list || append_args_tree) { tree variant_decl = tree_strip_nop_conversions (variant); tree t = build_tree_list (need_device_ptr_list, diff --git a/gcc/c/c-tree.h b/gcc/c/c-tree.h index bb0b113754e5..afec03e25ebc 100644 --- a/gcc/c/c-tree.h +++ b/gcc/c/c-tree.h @@ -163,7 +163,8 @@ along with GCC; see the file COPYING3. If not see (TREE_CODE (TYPE) == BOOLEAN_TYPE \ || (TREE_CODE (TYPE) == ENUMERAL_TYPE \ && ENUM_UNDERLYING_TYPE (TYPE) != NULL_TREE \ - && TREE_CODE (ENUM_UNDERLYING_TYPE (TYPE)) == BOOLEAN_TYPE)) + && (TREE_CODE (ENUM_UNDERLYING_TYPE (TYPE)) == BOOLEAN_TYPE \ + || c_hardbool_type_attr (TYPE)))) /* Record parser information about an expression that is irrelevant for code generation alongside a tree representing its value. */ @@ -820,7 +821,7 @@ extern void mark_exp_read (tree); extern tree composite_type (tree, tree); extern tree lookup_field (const_tree, tree); extern tree build_component_ref (location_t, tree, tree, location_t, - location_t, bool = true); + location_t); extern tree handle_counted_by_for_component_ref (location_t, tree); extern tree build_array_ref (location_t, tree, tree); extern tree build_omp_array_section (location_t, tree, tree, tree); diff --git a/gcc/c/c-typeck.cc b/gcc/c/c-typeck.cc index ed6e56e72792..6a08fcd396a4 100644 --- a/gcc/c/c-typeck.cc +++ b/gcc/c/c-typeck.cc @@ -137,6 +137,7 @@ static void record_maybe_used_decl (tree); static bool comptypes_internal (const_tree, const_tree, struct comptypes_data *data); static bool comptypes_check_for_composite (tree t1, tree t2); +static bool handle_counted_by_p (tree); /* Return true if EXP is a null pointer constant, false otherwise. */ @@ -641,7 +642,8 @@ struct composite_cache { }; tree -composite_type_internal (tree t1, tree t2, struct composite_cache* cache) +composite_type_internal (tree t1, tree t2, tree cond, + struct composite_cache* cache) { enum tree_code code1; enum tree_code code2; @@ -686,8 +688,8 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) { tree pointed_to_1 = TREE_TYPE (t1); tree pointed_to_2 = TREE_TYPE (t2); - tree target = composite_type_internal (pointed_to_1, - pointed_to_2, cache); + tree target = composite_type_internal (pointed_to_1, pointed_to_2, + cond, cache); t1 = c_build_pointer_type_for_mode (target, TYPE_MODE (t1), false); t1 = c_build_type_attribute_variant (t1, attributes); return qualify_type (t1, t2); @@ -695,25 +697,20 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) case ARRAY_TYPE: { - tree elt = composite_type_internal (TREE_TYPE (t1), TREE_TYPE (t2), - cache); - int quals; - tree unqual_elt; tree d1 = TYPE_DOMAIN (t1); tree d2 = TYPE_DOMAIN (t2); - bool d1_variable, d2_variable; - bool d1_zero, d2_zero; - bool t1_complete, t2_complete; /* We should not have any type quals on arrays at all. */ gcc_assert (!TYPE_QUALS_NO_ADDR_SPACE (t1) && !TYPE_QUALS_NO_ADDR_SPACE (t2)); - t1_complete = COMPLETE_TYPE_P (t1); - t2_complete = COMPLETE_TYPE_P (t2); + bool t1_complete = COMPLETE_TYPE_P (t1); + bool t2_complete = COMPLETE_TYPE_P (t2); - d1_zero = d1 == NULL_TREE || !TYPE_MAX_VALUE (d1); - d2_zero = d2 == NULL_TREE || !TYPE_MAX_VALUE (d2); + bool d1_zero = d1 == NULL_TREE || !TYPE_MAX_VALUE (d1); + bool d2_zero = d2 == NULL_TREE || !TYPE_MAX_VALUE (d2); + + bool d1_variable, d2_variable; d1_variable = (!d1_zero && (TREE_CODE (TYPE_MIN_VALUE (d1)) != INTEGER_CST @@ -722,10 +719,8 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) && (TREE_CODE (TYPE_MIN_VALUE (d2)) != INTEGER_CST || TREE_CODE (TYPE_MAX_VALUE (d2)) != INTEGER_CST)); - bool use1 = TYPE_DOMAIN (t1) - && (d2_variable || d2_zero || !d1_variable); - bool use2 = TYPE_DOMAIN (t2) - && (d1_variable || d1_zero || !d2_variable); + bool use1 = d1 && (d2_variable || d2_zero || !d1_variable); + bool use2 = d2 && (d1_variable || d1_zero || !d2_variable); /* If the first is an unspecified size pick the other one. */ if (d2_variable && c_type_unspecified_p (t1)) @@ -734,25 +729,53 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) use1 = false; } - /* Save space: see if the result is identical to one of the args. */ - if (elt == TREE_TYPE (t1) && use1) - return c_build_type_attribute_variant (t1, attributes); - if (elt == TREE_TYPE (t2) && use2) - return c_build_type_attribute_variant (t2, attributes); + /* If both are VLAs but not unspecified and we are in the + conditional operator, we create a conditional to select + the size of the active branch. */ + bool use0 = cond && d1_variable && !c_type_unspecified_p (t1) + && d2_variable && !c_type_unspecified_p (t2); + + tree td; + tree elt = composite_type_internal (TREE_TYPE (t1), TREE_TYPE (t2), + cond, cache); + + if (!use0) + { + /* Save space: see if the result is identical to one of the args. */ + if (elt == TREE_TYPE (t1) && use1) + return c_build_type_attribute_variant (t1, attributes); + if (elt == TREE_TYPE (t2) && use2) + return c_build_type_attribute_variant (t2, attributes); + + if (elt == TREE_TYPE (t1) && !TYPE_DOMAIN (t2) && !TYPE_DOMAIN (t1)) + return c_build_type_attribute_variant (t1, attributes); + if (elt == TREE_TYPE (t2) && !TYPE_DOMAIN (t2) && !TYPE_DOMAIN (t1)) + return c_build_type_attribute_variant (t2, attributes); + + td = TYPE_DOMAIN (use1 ? t1 : t2); + } + else + { + /* Not used in C. */ + gcc_assert (size_zero_node == TYPE_MIN_VALUE (d1)); + gcc_assert (size_zero_node == TYPE_MIN_VALUE (d2)); - if (elt == TREE_TYPE (t1) && !TYPE_DOMAIN (t2) && !TYPE_DOMAIN (t1)) - return c_build_type_attribute_variant (t1, attributes); - if (elt == TREE_TYPE (t2) && !TYPE_DOMAIN (t2) && !TYPE_DOMAIN (t1)) - return c_build_type_attribute_variant (t2, attributes); + tree d = fold_build3_loc (UNKNOWN_LOCATION, COND_EXPR, sizetype, + cond, TYPE_MAX_VALUE (d1), + TYPE_MAX_VALUE (d2)); + + td = build_index_type (d); + } /* Merge the element types, and have a size if either arg has one. We may have qualifiers on the element types. To set up TYPE_MAIN_VARIANT correctly, we need to form the composite of the unqualified types and add the qualifiers back at the end. */ - quals = TYPE_QUALS (strip_array_types (elt)); - unqual_elt = c_build_qualified_type (elt, TYPE_UNQUALIFIED); - t1 = c_build_array_type (unqual_elt, TYPE_DOMAIN (use1 ? t1 : t2)); + int quals = TYPE_QUALS (strip_array_types (elt)); + tree unqual_elt = c_build_qualified_type (elt, TYPE_UNQUALIFIED); + + t1 = c_build_array_type (unqual_elt, td); /* Check that a type which has a varying outermost dimension got marked has having a variable size. */ @@ -819,7 +842,7 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) gcc_assert (DECL_NAME (a) == DECL_NAME (b)); gcc_checking_assert (!DECL_NAME (a) || comptypes (ta, tb)); - tree t = composite_type_internal (ta, tb, cache); + tree t = composite_type_internal (ta, tb, cond, cache); tree f = build_decl (input_location, FIELD_DECL, DECL_NAME (a), t); DECL_PACKED (f) = DECL_PACKED (a); @@ -876,8 +899,8 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) /* Function types: prefer the one that specified arg types. If both do, merge the arg types. Also merge the return types. */ { - tree valtype = composite_type_internal (TREE_TYPE (t1), - TREE_TYPE (t2), cache); + tree valtype = composite_type_internal (TREE_TYPE (t1), TREE_TYPE (t2), + cond, cache); tree p1 = TYPE_ARG_TYPES (t1); tree p2 = TYPE_ARG_TYPES (t2); int len; @@ -956,7 +979,7 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) { TREE_VALUE (n) = composite_type_internal (TREE_TYPE (memb), TREE_VALUE (p2), - cache); + cond, cache); pedwarn (input_location, OPT_Wpedantic, "function types not truly compatible in ISO C"); goto parm_done; @@ -979,14 +1002,14 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) TREE_VALUE (n) = composite_type_internal (TREE_TYPE (memb), TREE_VALUE (p1), - cache); + cond, cache); pedwarn (input_location, OPT_Wpedantic, "function types not truly compatible in ISO C"); goto parm_done; } } } - TREE_VALUE (n) = composite_type_internal (mv1, mv2, cache); + TREE_VALUE (n) = composite_type_internal (mv1, mv2, cond, cache); parm_done: ; } @@ -1001,18 +1024,25 @@ composite_type_internal (tree t1, tree t2, struct composite_cache* cache) } tree -composite_type (tree t1, tree t2) +composite_type_cond (tree t1, tree t2, tree cond) { gcc_checking_assert (comptypes_check_for_composite (t1, t2)); struct composite_cache cache = { }; - tree n = composite_type_internal (t1, t2, &cache); + tree n = composite_type_internal (t1, t2, cond, &cache); gcc_checking_assert (comptypes_check_for_composite (n, t1)); gcc_checking_assert (comptypes_check_for_composite (n, t2)); return n; } + +tree +composite_type (tree t1, tree t2) +{ + return composite_type_cond (t1, t2, NULL_TREE); +} + /* Return the type of a conditional expression between pointers to possibly differently qualified versions of compatible types. @@ -1020,7 +1050,7 @@ composite_type (tree t1, tree t2) true; if that isn't so, this may crash. */ static tree -common_pointer_type (tree t1, tree t2) +common_pointer_type (tree t1, tree t2, tree cond) { tree attributes; unsigned target_quals; @@ -1047,8 +1077,8 @@ common_pointer_type (tree t1, tree t2) qualifiers of the two types' targets. */ tree pointed_to_1 = TREE_TYPE (t1); tree pointed_to_2 = TREE_TYPE (t2); - tree target = composite_type (TYPE_MAIN_VARIANT (pointed_to_1), - TYPE_MAIN_VARIANT (pointed_to_2)); + tree target = composite_type_cond (TYPE_MAIN_VARIANT (pointed_to_1), + TYPE_MAIN_VARIANT (pointed_to_2), cond); /* Strip array types to get correct qualifier for pointers to arrays */ quals1 = TYPE_QUALS_NO_ADDR_SPACE (strip_array_types (pointed_to_1)); @@ -1970,6 +2000,9 @@ tagged_types_tu_compatible_p (const_tree t1, const_tree t2, ft2 = DECL_BIT_FIELD_TYPE (s2); } + if (!ft1 || !ft2) + return false; + if (TREE_CODE (ft1) == ERROR_MARK || TREE_CODE (ft2) == ERROR_MARK) return false; @@ -2454,6 +2487,10 @@ struct c_expr default_function_array_read_conversion (location_t loc, struct c_expr exp) { mark_exp_read (exp.value); + /* We only generate a call to .ACCESS_WITH_SIZE when it is a read. */ + if (TREE_CODE (exp.value) == COMPONENT_REF + && handle_counted_by_p (exp.value)) + exp.value = handle_counted_by_for_component_ref (loc, exp.value); return default_function_array_conversion (loc, exp); } @@ -2555,6 +2592,11 @@ convert_lvalue_to_rvalue (location_t loc, struct c_expr exp, bool force_non_npc = false; if (read_p) mark_exp_read (exp.value); + /* We only generate a call to .ACCESS_WITH_SIZE when it is a read. */ + if (read_p && TREE_CODE (exp.value) == COMPONENT_REF + && handle_counted_by_p (exp.value)) + exp.value = handle_counted_by_for_component_ref (loc, exp.value); + if (convert_p) exp = default_function_array_conversion (loc, exp); if (!VOID_TYPE_P (TREE_TYPE (exp.value))) @@ -2648,6 +2690,20 @@ convert_lvalue_to_rvalue (location_t loc, struct c_expr exp, return exp; } +/* Wrapper for the overload above, same arguments but for tree rather than + c_expr. This is important for hardbools to decay to bools. */ + +static inline tree +convert_lvalue_to_rvalue (location_t loc, tree val, + bool convert_p, bool read_p, bool for_init = false) +{ + struct c_expr expr; + memset (&expr, 0, sizeof (expr)); + expr.value = val; + expr = convert_lvalue_to_rvalue (loc, expr, convert_p, read_p, for_init); + return expr.value; +} + /* EXP is an expression of integer type. Apply the integer promotions to it and return the promoted value. */ @@ -2718,6 +2774,10 @@ default_conversion (tree exp) tree promoted_type; mark_exp_read (exp); + /* We only generate a call to .ACCESS_WITH_SIZE when it is a read. */ + if (TREE_CODE (exp) == COMPONENT_REF + && handle_counted_by_p (exp)) + exp = handle_counted_by_for_component_ref (EXPR_LOCATION (exp), exp); /* Functions and arrays have been converted during parsing. */ gcc_assert (code != FUNCTION_TYPE); @@ -2936,10 +2996,63 @@ should_suggest_deref_p (tree datum_type) return false; } -/* For a SUBDATUM field of a structure or union DATUM, generate a REF to - the object that represents its counted_by per the attribute counted_by - attached to this field if it's a flexible array member field, otherwise - return NULL_TREE. +/* Give a component ref REF, decide whether we should handle its counted_by + attribute based on its context: + Do not handle counted_by when in offsetof, typeof and alignof operator. */ + +static bool +handle_counted_by_p (tree ref) +{ + gcc_assert (TREE_CODE (ref) == COMPONENT_REF); + tree datum = TREE_OPERAND (ref, 0); + /* If the component_ref is build for a offsetof, i.e., the datum + of the component_ref is a indirect_ref of null_pointer_node, + we should not generate call to .ACCESS_WITH_SIZE. */ + if (TREE_CODE (datum) == INDIRECT_REF + && TREE_OPERAND (datum, 0) == null_pointer_node) + return false; + if (in_typeof || in_alignof) + return false; + return true; +} + +/* Given a component ref REF, if there is a counted_by attribute attached, + issue error when the element_type is a structure or union including a + flexible array member. */ + +static void +check_counted_by_attribute (location_t loc, tree ref) +{ + tree subdatum = TREE_OPERAND (ref, 1); + tree sub_type = TREE_TYPE (subdatum); + + if (!c_flexible_array_member_type_p (sub_type) + && TREE_CODE (sub_type) != POINTER_TYPE) + return; + + tree element_type = TREE_TYPE (sub_type); + + tree attr_counted_by = lookup_attribute ("counted_by", + DECL_ATTRIBUTES (subdatum)); + if (attr_counted_by) + { + /* Issue error when the element_type is a structure or + union including a flexible array member. */ + if (RECORD_OR_UNION_TYPE_P (element_type) + && TYPE_INCLUDES_FLEXARRAY (element_type)) + { + error_at (loc, + "% attribute is not allowed for a pointer to" + " structure or union with flexible array member"); + return; + } + } +} + +/* For a SUBDATUM field of a structure or union DATUM, generate a REF + to the object that represents its counted_by per the attribute + counted_by attached to this field if it's a flexible array member + or a pointer field, otherwise return NULL_TREE. Set COUNTED_BY_TYPE to the TYPE of the counted_by field. For example, if: @@ -2954,13 +3067,16 @@ should_suggest_deref_p (tree datum_type) the ref to the object that represents its element count will be: &(p->k) - */ + static tree -build_counted_by_ref (tree datum, tree subdatum, tree *counted_by_type) +build_counted_by_ref (tree datum, tree subdatum, + tree *counted_by_type) { tree type = TREE_TYPE (datum); - if (!c_flexible_array_member_type_p (TREE_TYPE (subdatum))) + tree sub_type = TREE_TYPE (subdatum); + if (!c_flexible_array_member_type_p (sub_type) + && TREE_CODE (sub_type) != POINTER_TYPE) return NULL_TREE; tree attr_counted_by = lookup_attribute ("counted_by", @@ -2991,8 +3107,11 @@ build_counted_by_ref (tree datum, tree subdatum, tree *counted_by_type) } /* Given a COMPONENT_REF REF with the location LOC, the corresponding - COUNTED_BY_REF, and the COUNTED_BY_TYPE, generate an INDIRECT_REF - to a call to the internal function .ACCESS_WITH_SIZE. + COUNTED_BY_REF, and the COUNTED_BY_TYPE, generate the corresponding + call to the internal function .ACCESS_WITH_SIZE. + + A: For the Flexible Array Member, Generate an INDIRECT_REF to a call to + the internal function .ACCESS_WITH_SIZE. REF @@ -3002,12 +3121,25 @@ build_counted_by_ref (tree datum, tree subdatum, tree *counted_by_type) TYPE_SIZE_UNIT for element) NOTE: The return type of this function is the POINTER type pointing - to the original flexible array type. - Then the type of the INDIRECT_REF is the original flexible array type. - + to the original flexible array type. Then the type of the INDIRECT_REF + is the original flexible array type. The type of the first argument of this function is a POINTER type to the original flexible array type. + B: For pointers with counted_by, generate a call to the internal function + .ACCESS_WITH_SIZE. + + REF + + to: + + .ACCESS_WITH_SIZE (REF, COUNTED_BY_REF, (* TYPE_OF_SIZE)0, + TYPE_SIZE_UNIT for element) + + NOTE: The return type of this function is the original pointer type. + The type of the first argument of this function is the original + pointer type. + The 3rd argument of the call is a constant 0 with the pointer TYPE whose pointee type is the TYPE of the object pointed by COUNTED_BY_REF. @@ -3020,16 +3152,25 @@ build_access_with_size_for_counted_by (location_t loc, tree ref, tree counted_by_ref, tree counted_by_type) { - gcc_assert (c_flexible_array_member_type_p (TREE_TYPE (ref))); - /* The result type of the call is a pointer to the flexible array type. */ - tree result_type = c_build_pointer_type (TREE_TYPE (ref)); + gcc_assert (c_flexible_array_member_type_p (TREE_TYPE (ref)) + || TREE_CODE (TREE_TYPE (ref)) == POINTER_TYPE); + + bool is_fam = c_flexible_array_member_type_p (TREE_TYPE (ref)); + + /* The result type of the call is a pointer to the flexible array type; + or is the original ponter type to the pointer field with counted_by. */ + tree result_type = is_fam ? c_build_pointer_type (TREE_TYPE (ref)) + : TREE_TYPE (ref); + tree element_size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (ref))); - tree first_param - = c_fully_fold (array_to_pointer_conversion (loc, ref), false, NULL); + tree first_param = is_fam + ? c_fully_fold (array_to_pointer_conversion (loc, ref), + false, NULL) + : ref; tree second_param = c_fully_fold (counted_by_ref, false, NULL); - tree third_param = build_int_cst (build_pointer_type (counted_by_type), 0); + tree third_param = build_int_cst (c_build_pointer_type (counted_by_type), 0); tree call = build_call_expr_internal_loc (loc, IFN_ACCESS_WITH_SIZE, @@ -3040,7 +3181,8 @@ build_access_with_size_for_counted_by (location_t loc, tree ref, element_size); /* Wrap the call with an INDIRECT_REF with the flexible array type. */ - call = build1 (INDIRECT_REF, TREE_TYPE (ref), call); + if (is_fam) + call = build1 (INDIRECT_REF, TREE_TYPE (ref), call); SET_EXPR_LOCATION (call, loc); return call; } @@ -3057,6 +3199,11 @@ handle_counted_by_for_component_ref (location_t loc, tree ref) tree datum = TREE_OPERAND (ref, 0); tree subdatum = TREE_OPERAND (ref, 1); tree counted_by_type = NULL_TREE; + + if (!(c_flexible_array_member_type_p (TREE_TYPE (ref)) + || TREE_CODE (TREE_TYPE (ref)) == POINTER_TYPE)) + return ref; + tree counted_by_ref = build_counted_by_ref (datum, subdatum, &counted_by_type); if (counted_by_ref) @@ -3076,8 +3223,7 @@ handle_counted_by_for_component_ref (location_t loc, tree ref) tree build_component_ref (location_t loc, tree datum, tree component, - location_t component_loc, location_t arrow_loc, - bool handle_counted_by) + location_t component_loc, location_t arrow_loc) { tree type = TREE_TYPE (datum); enum tree_code code = TREE_CODE (type); @@ -3149,8 +3295,6 @@ build_component_ref (location_t loc, tree datum, tree component, int quals; tree subtype; bool use_datum_quals; - /* Do not handle counted_by when in typeof and alignof operator. */ - handle_counted_by = handle_counted_by && !in_typeof && !in_alignof; if (TREE_TYPE (subdatum) == error_mark_node) return error_mark_node; @@ -3171,8 +3315,7 @@ build_component_ref (location_t loc, tree datum, tree component, NULL_TREE); SET_EXPR_LOCATION (ref, loc); - if (handle_counted_by) - ref = handle_counted_by_for_component_ref (loc, ref); + check_counted_by_attribute (loc, ref); if (TREE_READONLY (subdatum) || (use_datum_quals && TREE_READONLY (datum))) @@ -3359,6 +3502,11 @@ build_array_ref (location_t loc, tree array, tree index) bool was_vector = VECTOR_TYPE_P (TREE_TYPE (array)); bool non_lvalue = convert_vector_to_array_for_subscript (loc, &array, index); + /* We only generate a call to .ACCESS_WITH_SIZE when it is a read. */ + if (TREE_CODE (array) == COMPONENT_REF + && handle_counted_by_p (array)) + array = handle_counted_by_for_component_ref (loc, array); + if (TREE_CODE (TREE_TYPE (array)) == ARRAY_TYPE) { tree rval, type; @@ -4531,6 +4679,9 @@ convert_arguments (location_t loc, vec arg_loc, tree fntype, val, valtype, npc, rname, parmnum, argnum, excess_precision, 0); } + /* A NULLPTR type is just a nullptr always. */ + else if (TREE_CODE (TREE_TYPE (val)) == NULLPTR_TYPE) + parmval = omit_one_operand_loc (ploc, TREE_TYPE (val), nullptr_node, val); else if (promote_float_arg) { if (type_generic) @@ -4926,7 +5077,7 @@ pointer_diff (location_t loc, tree op0, tree op1, tree *instrument_expr) if (!addr_space_superset (as0, as1, &as_common)) gcc_unreachable (); - common_type = common_pointer_type (TREE_TYPE (op0), TREE_TYPE (op1)); + common_type = common_pointer_type (TREE_TYPE (op0), TREE_TYPE (op1), NULL_TREE); op0 = convert (common_type, op0); op1 = convert (common_type, op1); } @@ -5271,7 +5422,9 @@ build_atomic_assign (location_t loc, tree lhs, enum tree_code modifycode, /* newval = old + val; */ if (rhs_type != rhs_semantic_type) val = build1 (EXCESS_PRECISION_EXPR, nonatomic_rhs_semantic_type, val); - rhs = build_binary_op (loc, modifycode, old, val, true); + rhs = build_binary_op (loc, modifycode, + convert_lvalue_to_rvalue (loc, old, true, true), + val, true); if (TREE_CODE (rhs) == EXCESS_PRECISION_EXPR) { tree eptype = TREE_TYPE (rhs); @@ -5727,7 +5880,48 @@ build_unary_op (location_t location, enum tree_code code, tree xarg, goto return_build_unary_op; } - if (C_BOOLEAN_TYPE_P (TREE_TYPE (arg))) + tree true_res; + if (c_hardbool_type_attr (TREE_TYPE (arg), NULL, &true_res)) + { + tree larg = stabilize_reference (arg); + tree sarg = save_expr (larg); + switch (code) + { + case PREINCREMENT_EXPR: + val = build2 (MODIFY_EXPR, TREE_TYPE (larg), larg, true_res); + val = build2 (COMPOUND_EXPR, TREE_TYPE (larg), sarg, val); + break; + case POSTINCREMENT_EXPR: + val = build2 (MODIFY_EXPR, TREE_TYPE (larg), larg, true_res); + val = build2 (COMPOUND_EXPR, TREE_TYPE (larg), val, sarg); + val = build2 (COMPOUND_EXPR, TREE_TYPE (larg), sarg, val); + break; + case PREDECREMENT_EXPR: + { + tree rarg = convert_lvalue_to_rvalue (location, sarg, + true, true); + rarg = invert_truthvalue_loc (location, rarg); + rarg = convert (TREE_TYPE (sarg), rarg); + val = build2 (MODIFY_EXPR, TREE_TYPE (larg), larg, rarg); + } + break; + case POSTDECREMENT_EXPR: + { + tree rarg = convert_lvalue_to_rvalue (location, sarg, + true, true); + rarg = invert_truthvalue_loc (location, rarg); + tree iarg = convert (TREE_TYPE (larg), rarg); + val = build2 (MODIFY_EXPR, TREE_TYPE (larg), larg, iarg); + val = build2 (COMPOUND_EXPR, TREE_TYPE (larg), val, sarg); + val = build2 (COMPOUND_EXPR, TREE_TYPE (larg), sarg, val); + } + break; + default: + gcc_unreachable (); + } + TREE_SIDE_EFFECTS (val) = 1; + } + else if (C_BOOLEAN_TYPE_P (TREE_TYPE (arg))) val = boolean_increment (code, arg); else val = build2 (code, TREE_TYPE (arg), arg, inc); @@ -6380,7 +6574,10 @@ build_conditional_expr (location_t colon_loc, tree ifexp, bool ifexp_bcp, addr_space_t as_common; if (comp_target_types (colon_loc, type1, type2)) - result_type = common_pointer_type (type1, type2); + { + ifexp = save_expr (ifexp); + result_type = common_pointer_type (type1, type2, ifexp); + } else if (null_pointer_constant_p (orig_op1)) result_type = type2; else if (null_pointer_constant_p (orig_op2)) @@ -7337,8 +7534,10 @@ build_modify_expr (location_t location, tree lhs, tree lhs_origtype, clear_decl_read = true; } - newrhs = build_binary_op (location, - modifycode, lhs, newrhs, true); + newrhs = build_binary_op (location, modifycode, + convert_lvalue_to_rvalue (location, lhs, + true, true), + newrhs, true); if (clear_decl_read) DECL_READ_P (lhs) = 0; @@ -8787,11 +8986,13 @@ convert_for_assignment (location_t location, location_t expr_loc, tree type, && coder == INTEGER_TYPE) warning_at (location, OPT_Wzero_as_null_pointer_constant, "zero as null pointer constant"); - + /* A NULLPTR type is just a nullptr always. */ + if (coder == NULLPTR_TYPE) + return omit_one_operand_loc (expr_loc, type, nullptr_node, rhs); /* An explicit constant 0 or type nullptr_t can convert to a pointer, or one that results from arithmetic, even including a cast to integer type. */ - if (!null_pointer_constant && coder != NULLPTR_TYPE) + else if (!null_pointer_constant) switch (errtype) { case ic_argpass: @@ -12730,11 +12931,9 @@ build_asm_expr (location_t loc, tree string, tree outputs, tree inputs, } else { - struct c_expr expr; - memset (&expr, 0, sizeof (expr)); - expr.value = input; - expr = convert_lvalue_to_rvalue (loc, expr, true, false); - input = c_fully_fold (expr.value, false, NULL); + input = c_fully_fold (convert_lvalue_to_rvalue (loc, input, + true, false), + false, NULL); if (input != error_mark_node && VOID_TYPE_P (TREE_TYPE (input))) { @@ -14390,7 +14589,7 @@ build_binary_op (location_t location, enum tree_code code, Otherwise, the targets must be compatible and both must be object or both incomplete. */ if (comp_target_types (location, type0, type1)) - result_type = common_pointer_type (type0, type1); + result_type = common_pointer_type (type0, type1, NULL_TREE); else if (!addr_space_superset (as0, as1, &as_common)) { error_at (location, "comparison of pointers to " @@ -14529,7 +14728,7 @@ build_binary_op (location_t location, enum tree_code code, if (comp_target_types (location, type0, type1)) { - result_type = common_pointer_type (type0, type1); + result_type = common_pointer_type (type0, type1, NULL_TREE); if (!COMPLETE_TYPE_P (TREE_TYPE (type0)) != !COMPLETE_TYPE_P (TREE_TYPE (type1))) pedwarn_c99 (location, OPT_Wpedantic, @@ -15356,12 +15555,8 @@ handle_omp_array_sections_1 (tree c, tree t, vec &types, /* If the array section is pointer based and the pointer itself is _Atomic qualified, we need to atomically load the pointer. */ - c_expr expr; - memset (&expr, 0, sizeof (expr)); - expr.value = ret; - expr = convert_lvalue_to_rvalue (OMP_CLAUSE_LOCATION (c), - expr, false, false); - ret = expr.value; + ret = convert_lvalue_to_rvalue (OMP_CLAUSE_LOCATION (c), + ret, false, false); } return ret; } @@ -16204,7 +16399,14 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) /* We've reached the end of a list of expanded nodes. Reset the group start pointer. */ if (c == grp_sentinel) - grp_start_p = NULL; + { + if (grp_start_p + && OMP_CLAUSE_HAS_ITERATORS (*grp_start_p)) + for (tree gc = *grp_start_p; gc != grp_sentinel; + gc = OMP_CLAUSE_CHAIN (gc)) + OMP_CLAUSE_ITERATORS (gc) = OMP_CLAUSE_ITERATORS (*grp_start_p); + grp_start_p = NULL; + } switch (OMP_CLAUSE_CODE (c)) { @@ -16962,6 +17164,13 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) /* FALLTHRU */ case OMP_CLAUSE_TO: case OMP_CLAUSE_FROM: + if (OMP_CLAUSE_ITERATORS (c) + && c_omp_finish_iterators (OMP_CLAUSE_ITERATORS (c))) + { + t = error_mark_node; + break; + } + /* FALLTHRU */ case OMP_CLAUSE__CACHE_: { using namespace omp_addr_tokenizer; @@ -17690,6 +17899,11 @@ c_finish_omp_clauses (tree clauses, enum c_omp_region_type ort) pc = &OMP_CLAUSE_CHAIN (c); } + if (grp_start_p + && OMP_CLAUSE_HAS_ITERATORS (*grp_start_p)) + for (tree gc = *grp_start_p; gc; gc = OMP_CLAUSE_CHAIN (gc)) + OMP_CLAUSE_ITERATORS (gc) = OMP_CLAUSE_ITERATORS (*grp_start_p); + if (simdlen && safelen && tree_int_cst_lt (OMP_CLAUSE_SAFELEN_EXPR (safelen), diff --git a/gcc/calls.cc b/gcc/calls.cc index 2711c4ebe325..bb8a6d09f825 100644 --- a/gcc/calls.cc +++ b/gcc/calls.cc @@ -3235,11 +3235,6 @@ expand_call (tree exp, rtx target, int ignore) if (pass) precompute_arguments (num_actuals, args); - /* Now we are about to start emitting insns that can be deleted - if a libcall is deleted. */ - if (pass && (flags & ECF_MALLOC)) - start_sequence (); - /* Check the canary value for sibcall or function which doesn't return and could throw. */ if ((pass == 0 @@ -3771,25 +3766,23 @@ expand_call (tree exp, rtx target, int ignore) valreg = gen_rtx_REG (TYPE_MODE (rettype), REGNO (valreg)); } - if (pass && (flags & ECF_MALLOC)) + /* If the return register exists, for malloc like + function calls, mark the return register with the + alignment and noalias reg note. */ + if (pass && (flags & ECF_MALLOC) && valreg) { rtx temp = gen_reg_rtx (GET_MODE (valreg)); - rtx_insn *last, *insns; + rtx_insn *last; /* The return value from a malloc-like function is a pointer. */ if (TREE_CODE (rettype) == POINTER_TYPE) mark_reg_pointer (temp, MALLOC_ABI_ALIGNMENT); - emit_move_insn (temp, valreg); + last = emit_move_insn (temp, valreg); /* The return value from a malloc-like function cannot alias anything else. */ - last = get_last_insn (); add_reg_note (last, REG_NOALIAS, temp); - - /* Write out the sequence. */ - insns = end_sequence (); - emit_insn (insns); valreg = temp; } diff --git a/gcc/cfganal.cc b/gcc/cfganal.cc index 790357507714..3537e7927912 100644 --- a/gcc/cfganal.cc +++ b/gcc/cfganal.cc @@ -1679,30 +1679,17 @@ compute_dominance_frontiers (bitmap_head *frontiers) bitmap compute_idf (bitmap def_blocks, bitmap_head *dfs) { - bitmap_iterator bi; + bitmap_iterator bi, bi2; unsigned bb_index, i; bitmap phi_insertion_points; phi_insertion_points = BITMAP_ALLOC (NULL); - /* Seed the work set with all the blocks in DEF_BLOCKS. */ - auto_bitmap work_set; - bitmap_copy (work_set, def_blocks); - bitmap_tree_view (work_set); - - /* Pop a block off the workset, add every block that appears in - the original block's DF that we have not already processed to - the workset. Iterate until the workset is empty. Blocks - which are added to the workset are potential sites for - PHI nodes. */ - while (!bitmap_empty_p (work_set)) + /* The initial workset is the DEF_BLOCKs, process that first, + seeding phi_insertion_points as the start of the worklist + for the iteration which then also serves as a visited set. */ + EXECUTE_IF_SET_IN_BITMAP (def_blocks, 0, bb_index, bi2) { - /* The dominance frontier of a block is blocks after it so iterating - on earlier blocks first is better. - ??? Basic blocks are by no means guaranteed to be ordered in - optimal order for this iteration. */ - bb_index = bitmap_clear_first_set_bit (work_set); - /* Since the registration of NEW -> OLD name mappings is done separately from the call to update_ssa, when updating the SSA form, the basic blocks where new and/or old names are defined @@ -1716,9 +1703,28 @@ compute_idf (bitmap def_blocks, bitmap_head *dfs) the IDF and of work_set which is at most that of the IDF as well. That makes iterating over the DFS bitmap preferential to whole bitmap operations involving also phi_insertion_points. */ + EXECUTE_IF_SET_IN_BITMAP (&dfs[bb_index], 0, i, bi) + bitmap_set_bit (phi_insertion_points, i); + } + + /* Seed the work set with the initial phi_insertion_points. */ + auto_vec work_set (n_basic_blocks_for_fn (cfun)); + EXECUTE_IF_SET_IN_BITMAP (phi_insertion_points, 0, i, bi) + work_set.quick_push (i); + + /* Pop a block off the workset, add every block that appears in + the original block's DF that we have not already processed to + the workset. Iterate until the workset is empty. Blocks + which are added to the workset are potential sites for + PHI nodes. */ + while (!work_set.is_empty ()) + { + bb_index = work_set.pop (); + gcc_checking_assert (bb_index + < (unsigned) last_basic_block_for_fn (cfun)); EXECUTE_IF_SET_IN_BITMAP (&dfs[bb_index], 0, i, bi) if (bitmap_set_bit (phi_insertion_points, i)) - bitmap_set_bit (work_set, i); + work_set.quick_push (i); } return phi_insertion_points; diff --git a/gcc/cobol/ChangeLog b/gcc/cobol/ChangeLog index 35d645c26327..256ee70242a9 100644 --- a/gcc/cobol/ChangeLog +++ b/gcc/cobol/ChangeLog @@ -1,3 +1,46 @@ +2025-08-20 Robert Dubner + + * genutil.cc (get_binary_value): Fix a comment. + * parse.y: udf_args_valid(): Fix loc calculation. + * symbols.cc (assert): extend_66_capacity(): Avoid assert(e < e2) in + -O0 build until symbol_table expansion is fixed. + +2025-08-15 Robert Dubner + + * genapi.h (parser_call_exception_end): Remove obsolete comment. + * structs.cc (create_cbl_enabled_exception_t): + Remove cbl_enabled_exception_type_node; + remove create_cbl_enabled_exception_t(). + (create_our_type_nodes): Likewise. + * structs.h (GTY): Likewise. + +2025-08-13 Robert Dubner + + * genutil.cc (get_binary_value): Use the new routine. + +2025-08-13 Robert Dubner + + * genutil.cc (get_binary_value): Use the new routine. + +2025-08-12 Robert Dubner + + * genapi.cc (compare_binary_binary): Formatting. + (cobol_compare): Formatting. + (mh_numeric_display): Rewrite "move ND to ND" algorithm. + (initial_from_initial): Proper initialization of EBCDIC ND variables. + * genmath.cc (fast_add): Delete comment. + * genutil.cc (get_binary_value): Modify for updated EBCDIC. + +2025-08-07 Robert Dubner + + * cbldiag.h (location_dump): Source code formatting. + * parse.y: error_msg formatting. + * scan.l: Remove UTF-8 character from regex pattern. + * scan_ante.h (numstr_of): error_msg formatting. + * show_parse.h (class ANALYZE): Suppress cppcheck error. + * util.cc (cbl_field_t::report_invalid_initial_value): + error_msg formatting. + 2025-08-02 Jakub Jelinek * parse.y (intrinsic): Use %td format specifier with no cast on diff --git a/gcc/cobol/cbldiag.h b/gcc/cobol/cbldiag.h index dd16190fbd87..2554deb96447 100644 --- a/gcc/cobol/cbldiag.h +++ b/gcc/cobol/cbldiag.h @@ -122,8 +122,8 @@ static void location_dump( const char func[], int line, const char tag[], const LOC& loc) { extern int yy_flex_debug; // cppcheck-suppress shadowVariable if( yy_flex_debug ) { - const char *detail = gcobol_getenv("update_location"); // cppcheck-suppress knownConditionTrueFalse - if( detail ) { + const char *detail = gcobol_getenv("update_location"); + if( detail ) { // cppcheck-suppress knownConditionTrueFalse fprintf(stderr, "%s:%d: %s location (%d,%d) to (%d,%d)\n", func, line, tag, loc.first_line, loc.first_column, loc.last_line, loc.last_column); diff --git a/gcc/cobol/genapi.cc b/gcc/cobol/genapi.cc index c9d2da481ab9..40b79ba5ce6c 100644 --- a/gcc/cobol/genapi.cc +++ b/gcc/cobol/genapi.cc @@ -2102,6 +2102,7 @@ compare_binary_binary(tree return_int, right_side_ref->field, refer_offset(*right_side_ref), hilo_right); + IF( hilo_left, eq_op, integer_one_node ) { // left side is hi-value @@ -2358,8 +2359,6 @@ cobol_compare( tree return_int, NULL_TREE)); // compared = true; // Commented out to quiet cppcheck } - -// gg_printf(" result is %d\n", return_int, NULL_TREE); } static void @@ -14852,7 +14851,7 @@ static bool mh_numeric_display( const cbl_refer_t &destref, const cbl_refer_t &sourceref, const TREEPLET &tsource, - tree size_error) + tree size_error) { bool moved = false; @@ -14862,98 +14861,106 @@ mh_numeric_display( const cbl_refer_t &destref, && !(sourceref.field->attr & scaled_e) ) { Analyze(); - // I believe that there are 225 pathways through the following code. That's - // because there are five different valid combination of signable_e, + // I believe that there are 450 pathways through the following code. + // That's because there are five different valid combination of signable_e, // separate_e, and leading_e. There are three possibilities for - // sender/receiver rdigits (too many, too few, and just right), and the same - // for ldigits. 5 * 5 * 3 * 3 = 225. + // sender/receiver rdigits (too many, too few, and just right), and the + // same for ldigits. 5 * 5 * 3 * 3 * 2 = 450. // Fasten your seat belts. - // In order to simplify processing of a signable internal sender, we are - // going to pick up the sign byte and temporarily turn off the sign bit in - // the source data. At the end, we will restore that value. This - // reflexively makes me a bit nervous (it isn't, for example, thread-safe), - // but it makes life easier. - - static tree source_sign_loc = gg_define_variable(UCHAR_P, "..mhnd_sign_loc", vs_file_static); - static tree source_sign_byte = gg_define_variable(UCHAR, "..mhnd_sign_byte", vs_file_static); - static tree dest_p = gg_define_variable(UCHAR_P, "..mhnd_dest", vs_file_static); // The destination data pointer - static tree source_p = gg_define_variable(UCHAR_P, "..mhnd_source", vs_file_static); // The source data pointer - static tree source_ep = gg_define_variable(UCHAR_P, "..mhnd_source_e", vs_file_static); // When we need an end pointer + // This routine is complicated by the fact that although I had several + // false starts of putting this into libgcobol, I keep coming back to the + // fact that assignment of zoned values is common. And, so, there are all + // kinds of things that are known at compile time that would turn into + // execution-time decisions if I moved them to the library. So, complex + // or not, I am doing all this code here at compile time because it will + // minimize the code at execution time. + + // One thing to keep in mind is the problem caused by a source value being + // internally signed. That turns an ASCII "123" into "12t", and we + // very probably don't want that "t" to find its way into the destination + // value. The internal sign characteristic of ASCII is that the high + // nybble of the sign location is 0x30 or 0x70. For EBCDIC, the high + // nybble is 0xC0 for positive values, and 0xD0 for negative; all other + // digits are 0x70. + + static tree source_sign_loc = gg_define_variable(UCHAR_P, + "..mhnd_sign_loc", + vs_file_static); + static tree source_sign_byte = gg_define_variable(UCHAR, + "..mhnd_sign_byte", + vs_file_static); + // The destination data pointer + static tree dest_p = gg_define_variable( UCHAR_P, + "..mhnd_dest", + vs_file_static); + // The source data pointer + static tree source_p = gg_define_variable( UCHAR_P, + "..mhnd_source", + vs_file_static); + // When we need an end pointer + static tree source_ep = gg_define_variable( UCHAR_P, + "..mhnd_source_e", + vs_file_static); gg_assign(dest_p, qualified_data_location(destref)); gg_assign(source_p, gg_add(member(sourceref.field, "data"), tsource.offset)); - if( sourceref.field->attr & signable_e ) + bool source_is_signable = sourceref.field->attr & signable_e; + bool source_is_leading = sourceref.field->attr & leading_e; + bool source_is_separate = sourceref.field->attr & separate_e; + + bool dest_is_signable = destref.field->attr & signable_e; + bool dest_is_leading = destref.field->attr & leading_e; + bool dest_is_separate = destref.field->attr & separate_e; + + if( source_is_signable ) { - // The source is signable + // The source is signable, so we are going to calculate the location of + // the source sign information. + + gg_assign(source_sign_loc, + gg_add(member(sourceref.field->var_decl_node, "data"), + tsource.offset)); - if( !(sourceref.field->attr & leading_e) ) + if( (source_is_leading) ) { - // The sign location is trailing. Whether separate or not, the location - // is the final byte of the data: - gg_assign(source_sign_loc, gg_add(member( sourceref.field->var_decl_node, "data"), - tsource.offset)), - gg_assign(source_sign_loc, - gg_add(source_sign_loc, - build_int_cst_type(SIZE_T, - sourceref.field->data.capacity-1))); - if( (sourceref.field->attr & separate_e) ) - { - // We have trailing separate - } - else + // The source sign location is in the leading position. + if( source_is_separate ) { - // We have trailing internal + // We have LEADING SEPARATE, so the first actual digit is at + // source_p+1. + gg_increment(source_p); } } else { - // The source sign location is in the leading position. + // The sign location is trailing. Whether separate or not, the + // location is the final byte of the data: gg_assign(source_sign_loc, - gg_add(member(sourceref.field->var_decl_node, "data"), - tsource.offset)); - if( (sourceref.field->attr & separate_e) ) - { - // We have leading separate, so the first actual digit is at - // source_p+1. - gg_increment(source_p); - } - else - { - // We have leading internal - } + gg_add(source_sign_loc, + build_int_cst_type(SIZE_T, + sourceref.field->data.capacity-1))); } // Pick up the byte that contains the sign data, whether internal or // external: gg_assign(source_sign_byte, gg_indirect(source_sign_loc)); - if( !(sourceref.field->attr & separate_e) ) + if( !source_is_separate ) { - // This is signable and internal, so we want to turn off the sign bit - // in the original source data - if( internal_codeset_is_ebcdic() ) - { - gg_assign(gg_indirect(source_sign_loc), - gg_bitwise_or(source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - else - { - gg_assign(gg_indirect(source_sign_loc), - gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); - } + // The source is signable and internal. We will modify the zone of + // the source sign byte to force it to be plain vanilla positive. + + // When the move is done, we will replace that byte with the original + // value. + gg_assign(gg_indirect(source_sign_loc), + gg_bitwise_or(build_int_cst_type(UCHAR, ZONED_ZERO), + gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, 0x0F)))); } } - else - { - // The number is unsigned, so do nothing. - } // Let the shenanigans begin. @@ -14961,83 +14968,49 @@ mh_numeric_display( const cbl_refer_t &destref, // The first thing to do is see if we need to output a leading sign // character - if( (destref.field->attr & signable_e) - && (destref.field->attr & leading_e) - && (destref.field->attr & separate_e) ) + if( dest_is_signable + && dest_is_leading + && dest_is_separate ) { // The output is signed, separate, and leading, so the first character // needs to be either '+' or '-' - if( (sourceref.field->attr & separate_e) ) + if( source_is_separate ) { - // The source is signable/separate - // Oooh. Shiny. We already have that character. + // The source and dest are both signable/separate. + // Oooh. Shiny. We already have the sign character from the source, + // so we assign that to the destination. gg_assign(gg_indirect(dest_p), source_sign_byte); } else { - // The source is internal. Not that up above we set source_sign_byte - // even for source values that aren't signable - if( internal_codeset_is_ebcdic() ) + // The source is internal. + if( source_is_signable ) { - // We are working in EBCDIC - if( sourceref.field->attr & signable_e ) + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type( UCHAR, 0) ) { - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - eq_op, - build_int_cst_type( UCHAR, 0) ) - { - // The source was negative - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, EBCDIC_MINUS)); + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, SEPARATE_MINUS)); - } - ELSE - { - // The source was positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, EBCDIC_PLUS)); - } - ENDIF } - else + ELSE { - // The source is not signable, so the result is positive + // The source was positive gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, EBCDIC_PLUS)); + build_int_cst_type( UCHAR, SEPARATE_PLUS)); } + ENDIF } else { - // We are working in ASCII - if( sourceref.field->attr & signable_e ) - { - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type( UCHAR, 0) ) - { - // The source was negative - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, '-')); - - } - ELSE - { - // The source was positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, '+')); - } - ENDIF - } - else - { - // The source is not signable, so the result is positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, '+')); - } + // The source is not signable, so the signed becomes positive no + // matter what the sign of the source. + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, SEPARATE_PLUS)); } } gg_increment(dest_p); @@ -15058,8 +15031,7 @@ mh_numeric_display( const cbl_refer_t &destref, // The destination has more ldigits than the source, and needs some // leading zeroes: picky_memset( dest_p, - internal_codeset_is_ebcdic() ? - EBCDIC_ZERO : '0' , + ZONED_ZERO , dest_ldigits - source_ldigits); // With the leading zeros set, copy over the ldigits: digit_count = source_ldigits; @@ -15085,8 +15057,7 @@ mh_numeric_display( const cbl_refer_t &destref, IF( gg_indirect(source_p), ne_op, build_int_cst_type( UCHAR, - internal_codeset_is_ebcdic() ? - EBCDIC_ZERO : '0') ) + ZONED_ZERO) ) { set_exception_code(ec_size_truncation_e); gg_assign(size_error, integer_one_node); @@ -15132,25 +15103,23 @@ mh_numeric_display( const cbl_refer_t &destref, // over only the necessary rdigits, discarding the ones to the right. digit_count += dest_rdigits; } - picky_memcpy(dest_p, source_p, digit_count); picky_memset( dest_p, - internal_codeset_is_ebcdic() ? - EBCDIC_ZERO : '0' , + ZONED_ZERO , trailing_zeros); // With the digits in place, we need to sort out what to do if the target // is signable: - if( destref.field->attr & signable_e ) + if( dest_is_signable ) { - if( (destref.field->attr & separate_e) - && !(destref.field->attr & leading_e) ) + if( dest_is_separate + && !dest_is_leading ) { // The target is separate/trailing, so we need to tack a '+' // or '-' character - if( sourceref.field->attr & separate_e ) + if( source_is_separate ) { - // The source was separate, so we already have what we need in t + // The source was separate, so we already have what we need in the // source_sign_byte: gg_assign(gg_indirect(dest_p), source_sign_byte); gg_increment(dest_p); @@ -15158,68 +15127,43 @@ mh_numeric_display( const cbl_refer_t &destref, else { // The source is either internal, or unsigned - if( sourceref.field->attr & signable_e ) + if( source_is_signable ) { // The source is signable/internal, so we need to extract the // sign bit from source_sign_byte - if( internal_codeset_is_ebcdic() ) + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type( UCHAR, 0) ) { - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - eq_op, - build_int_cst_type( UCHAR, 0) ) - { - // The source was negative - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, EBCDIC_MINUS)); + // The source was negative + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, SEPARATE_MINUS)); - } - ELSE - { - // The source was positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, EBCDIC_PLUS)); - } - ENDIF } - else + ELSE { - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type( UCHAR, 0) ) - { - // The source was negative - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, '-')); - - } - ELSE - { - // The source was positive - gg_assign(gg_indirect(dest_p), - build_int_cst_type( UCHAR, '+')); - } - ENDIF + // The source was positive + gg_assign(gg_indirect(dest_p), + build_int_cst_type( UCHAR, SEPARATE_PLUS)); } + ENDIF } else { // The source is unsigned, so dest is positive gg_assign(gg_indirect(dest_p), build_int_cst_type( UCHAR, - internal_codeset_is_ebcdic() ? - EBCDIC_PLUS : '+' )); + SEPARATE_PLUS)); } } gg_increment(dest_p); } - else if( !(destref.field->attr & separate_e) ) + else if( !dest_is_separate ) { // The destination is signed/internal - if( destref.field->attr & leading_e ) + if( dest_is_leading ) { // The sign bit goes into the first byte: gg_assign(dest_p, qualified_data_location(destref)); @@ -15229,104 +15173,62 @@ mh_numeric_display( const cbl_refer_t &destref, // The sign bit goes into the last byte: gg_decrement(dest_p); } - if( sourceref.field->attr & signable_e ) + // dest_p now points to the internal sign location + if( internal_codeset_is_ebcdic() ) { - if( sourceref.field->attr & separate_e ) + // For EBCDIC, the zone is going to end up being 0xC0 or 0xD0 + gg_assign(gg_indirect(dest_p), + gg_bitwise_and(gg_indirect(dest_p), + build_int_cst_type(UCHAR, + ZONE_SIGNED_EBCDIC+0x0F))); + } + + if( source_is_signable ) + { + if( source_is_separate ) { // The source is separate, so source_sign_byte is '+' or '-' IF( source_sign_byte, eq_op, - build_int_cst_type(UCHAR, - internal_codeset_is_ebcdic() ? - EBCDIC_MINUS : '-') ) + build_int_cst_type(UCHAR, SEPARATE_MINUS) ) { - // The source is negative, so turn the ASCII bit on - if( !internal_codeset_is_ebcdic() ) - { - gg_assign(gg_indirect(dest_p), - gg_bitwise_or(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - - } - else - { - // It's ebcdic, so turn the sign bit OFF - gg_assign(gg_indirect(dest_p), - gg_bitwise_and(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); - } + // The source is negative, so turn on the internal "is minus" bit + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); } ELSE - { - // The source is positive, so turn the EBCDIC bit ON: - if( internal_codeset_is_ebcdic() ) - { - gg_assign(gg_indirect(dest_p), - gg_bitwise_or(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - } ENDIF } else { // The source is signable/internal, so the sign bit is in // source_sign_byte. Whatever it is, it has to go into dest_p: - if( internal_codeset_is_ebcdic() ) - { - // This is EBCDIC, so if the source_sign_byte bit is LOW, we - // clear that bit in dest_p high. - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - eq_op, - build_int_cst_type(UCHAR, 0) ) - { - // The source was negative, so make the dest negative - gg_assign(gg_indirect(dest_p), - gg_bitwise_and(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); - } - ELSE - ENDIF - } - else + IF( gg_bitwise_and( source_sign_byte, + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT)), + ne_op, + build_int_cst_type(UCHAR, 0) ) { - // This is ASCII, so if the source_sign_byte bit is high, we - // set that bit in dest_p high. - IF( gg_bitwise_and( source_sign_byte, - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), - ne_op, - build_int_cst_type(UCHAR, 0) ) - { - // The source was negative, so make the dest negative - gg_assign(gg_indirect(dest_p), - gg_bitwise_or(gg_indirect(dest_p), - build_int_cst_type( - UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - ELSE - ENDIF + // The source was negative, so make the dest negative + gg_assign(gg_indirect(dest_p), + gg_bitwise_or(gg_indirect(dest_p), + build_int_cst_type( + UCHAR, + NUMERIC_DISPLAY_SIGN_BIT))); } + ELSE + ENDIF } } } } - if( (sourceref.field->attr & signable_e) - && !(sourceref.field->attr & separate_e)) + if( source_is_signable + && !source_is_separate) { // The source is signable internal, so we need to restore the original // sign byte in the original source data: @@ -15335,7 +15237,7 @@ mh_numeric_display( const cbl_refer_t &destref, moved = true; } return moved; - } + } //NUMERIC_DISPLAY_SIGN static bool mh_little_endian( const cbl_refer_t &destref, @@ -16068,12 +15970,12 @@ initial_from_initial(cbl_field_t *field) bool negative; if( real_isneg (&value) ) { - negative = true; - value = real_value_negate (&value); + negative = true; + value = real_value_negate (&value); } else { - negative = false; + negative = false; } digits_from_float128(ach, field, field->data.digits, rdigits, value); @@ -16083,6 +15985,7 @@ initial_from_initial(cbl_field_t *field) && (field->attr & separate_e) && (field->attr & leading_e ) ) { + // This zoned decimal value is signable, separate, and leading. if( negative ) { *pretval++ = internal_minus; @@ -16094,12 +15997,14 @@ initial_from_initial(cbl_field_t *field) } for(size_t i=0; idata.digits; i++) { + // Start by assuming its an value that can't be signed *pretval++ = internal_zero + ((*digits++) & 0x0F); } if( (field->attr & signable_e) && (field->attr & separate_e) && !(field->attr & leading_e ) ) { + // The value is signable, separate, and trailing if( negative ) { *pretval++ = internal_minus; @@ -16110,30 +16015,21 @@ initial_from_initial(cbl_field_t *field) } } if( (field->attr & signable_e) - && !(field->attr & separate_e) - && negative) + && !(field->attr & separate_e) ) { - if( field->attr & leading_e ) + // This value is signable, and not separate. So, the sign information + // goes into the first or last byte: + char *sign_location = field->attr & leading_e ? + retval : retval + field->data.digits - 1 ; + if( internal_codeset_is_ebcdic() ) { - if( internal_is_ebcdic ) - { - retval[0] &= ~NUMERIC_DISPLAY_SIGN_BIT; - } - else - { - retval[0] |= NUMERIC_DISPLAY_SIGN_BIT; - } + // Change the zone from 0xFO to 0xC0 + *sign_location &= (ZONE_SIGNED_EBCDIC + 0x0F); } - else + if( negative ) { - if( internal_is_ebcdic ) - { - pretval[-1] &= ~NUMERIC_DISPLAY_SIGN_BIT; - } - else - { - pretval[-1] |= NUMERIC_DISPLAY_SIGN_BIT; - } + // Turn on the sign bit: + *sign_location |= NUMERIC_DISPLAY_SIGN_BIT; } } break; diff --git a/gcc/cobol/genapi.h b/gcc/cobol/genapi.h index b41b906aa697..b86be8e97909 100644 --- a/gcc/cobol/genapi.h +++ b/gcc/cobol/genapi.h @@ -536,8 +536,6 @@ void parser_exception_raise(ec_type_t ec); void parser_call_exception( cbl_label_t *name ); void parser_call_exception_end( cbl_label_t *name ); -//void parser_stash_exceptions(const cbl_enabled_exceptions_array_t *enabled); - void parser_match_exception(cbl_field_t *index); void parser_check_fatal_exception(); void parser_clear_exception(); diff --git a/gcc/cobol/genmath.cc b/gcc/cobol/genmath.cc index e7eb971d1acb..27d5c1ee65fc 100644 --- a/gcc/cobol/genmath.cc +++ b/gcc/cobol/genmath.cc @@ -394,7 +394,6 @@ fast_add( size_t nC, cbl_num_result_t *C, { Analyze(); // All targets are non-PICTURE binaries: - //gg_insert_into_assembler("# DUBNER addition START"); tree term_type = largest_binary_term(nA, A); if( term_type ) { diff --git a/gcc/cobol/genutil.cc b/gcc/cobol/genutil.cc index a5f69a09eec9..4b296e46e877 100644 --- a/gcc/cobol/genutil.cc +++ b/gcc/cobol/genutil.cc @@ -752,9 +752,9 @@ get_binary_value( tree value, return; } - static tree pointer = gg_define_variable(UCHAR_P, "..gbv_pointer", vs_file_static); - static tree pend = gg_define_variable(UCHAR_P, "..gbv_pend", vs_file_static); - + static tree pointer = gg_define_variable( UCHAR_P, + "..gbv_pointer", + vs_file_static); switch(field->type) { case FldLiteralN: @@ -791,8 +791,9 @@ get_binary_value( tree value, // We need to check early on for HIGH-VALUE and LOW-VALUE // Pick up the byte tree digit = gg_get_indirect_reference(source_address, NULL_TREE); - IF( digit, eq_op, build_int_cst(UCHAR, 0xFF) ) + IF( digit, eq_op, build_int_cst(UCHAR, DEGENERATE_HIGH_VALUE) ) { + // We are dealing with HIGH-VALUE if( hilo ) { gg_assign(hilo, integer_one_node); @@ -803,12 +804,14 @@ get_binary_value( tree value, build_int_cst_type( TREE_TYPE(rdigits), get_scaled_rdigits(field))); } - gg_assign(value, build_int_cst_type(TREE_TYPE(value), 0xFFFFFFFFFFFFFFFUL)); + gg_assign(value, build_int_cst_type(TREE_TYPE(value), + 0x7FFFFFFFFFFFFFFFUL)); } ELSE { - IF( digit, eq_op, build_int_cst(UCHAR, 0x00) ) + IF( digit, eq_op, build_int_cst(UCHAR, DEGENERATE_LOW_VALUE) ) { + // We are dealing with LOW-VALUE if( hilo ) { gg_assign(hilo, integer_minus_one_node); @@ -816,26 +819,25 @@ get_binary_value( tree value, } ELSE { - // Establish rdigits: + // We are dealing with an ordinary NumericDisplay value + gg_assign(pointer, source_address); + if( rdigits ) { gg_assign(rdigits, - build_int_cst_type( TREE_TYPE(rdigits), - get_scaled_rdigits(field))); + build_int_cst_type(TREE_TYPE(rdigits), + get_scaled_rdigits(field))); } - // Zero out the destination - gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); - // Pick up a pointer to the source bytes: - - gg_assign(pointer, source_address); - - // This is the we-are-done pointer - gg_assign(pend, gg_add( pointer, - get_any_capacity(field))); - - static tree signbyte = gg_define_variable(UCHAR, "..gbv_signbyte", vs_file_static); - - // The big decision is whether or not the variable is signed: + // This will be the 128-bit value of the character sequence + static tree val128 = gg_define_variable(INT128, + "..gbv_val128", + vs_file_static); + // This is a pointer to the sign byte + static tree signp = gg_define_variable(UCHAR_P, + "..gbv_signp", + vs_file_static); + // We need to figure out where the sign information, if any is to be + // found: if( field->attr & signable_e ) { // The variable is signed @@ -845,12 +847,17 @@ get_binary_value( tree value, if( field->attr & leading_e) { // The first byte is '+' or '-' + gg_assign(signp, source_address); + // Increment pointer to point to the first actual digit gg_increment(pointer); } else { // The final byte is '+' or '-' - gg_decrement(pend); + gg_assign(signp, + gg_add(source_address, + build_int_cst_type( SIZE_T, + field->data.digits))); } } else @@ -858,219 +865,34 @@ get_binary_value( tree value, // The sign byte is internal if( field->attr & leading_e) { - // The first byte has the sign bit: - gg_assign(signbyte, - gg_get_indirect_reference(source_address, NULL_TREE)); - if( internal_codeset_is_ebcdic() ) - { - // We need to make sure the EBCDIC sign bit is ON, for positive - gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), - gg_bitwise_or(signbyte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - else - { - // We need to make sure the ascii sign bit is Off, for positive - gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), - gg_bitwise_and( signbyte, - build_int_cst_type( UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); - } + // The first byte has the sign bit. + gg_assign(signp, source_address); } else { - // The final byte has the sign bit: - gg_assign(signbyte, - gg_get_indirect_reference(source_address, - build_int_cst_type(SIZE_T, - field->data.capacity-1))); - if( internal_codeset_is_ebcdic() ) - { - // We need to make sure the EBCDIC sign bit is ON, for positive - gg_assign(gg_get_indirect_reference(source_address, - build_int_cst_type( SIZE_T, - field->data.capacity-1)), - gg_bitwise_or(signbyte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT))); - } - else - { - // We need to make sure the ASCII sign bit is Off, for positive - gg_assign(gg_get_indirect_reference(source_address, - build_int_cst_type( SIZE_T, - field->data.capacity-1)), - gg_bitwise_and( signbyte, - build_int_cst_type( UCHAR, - ~NUMERIC_DISPLAY_SIGN_BIT))); - } + // The final byte has the sign bit. + gg_assign(signp, + gg_add(source_address, + build_int_cst_type( SIZE_T, + field->data.digits-1))); } } } - // We can now set up the byte-by-byte processing loop: - if( internal_codeset_is_ebcdic() ) - { - // We are working in EBCDIC - WHILE( pointer, lt_op, pend ) - { - // Pick up the byte - digit = gg_get_indirect_reference(pointer, NULL_TREE); - IF( digit, lt_op, build_int_cst_type(UCHAR, EBCDIC_ZERO) ) - { - // break on a non-digit - gg_assign(pointer, pend); - } - ELSE - { - IF( digit, gt_op, build_int_cst_type(UCHAR, EBCDIC_NINE) ) - { - // break on a non-digit - gg_assign(pointer, pend); - } - ELSE - { - // Whether ASCII or EBCDIC, the bottom four bits tell the tale: - // Multiply our accumulator by ten: - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - // And add in the current digit - gg_assign(value, - gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and( digit, - build_int_cst_type(UCHAR, 0x0F) )))); - gg_increment(pointer); - } - ENDIF - } - ENDIF - } - WEND - } else { - // We are working in ASCII: - WHILE( pointer, lt_op, pend ) - { - // Pick up the byte - digit = gg_get_indirect_reference(pointer, NULL_TREE); - // Whether ASCII or EBCDIC, the bottom four bits tell the tale: - // Multiply our accumulator by ten: - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - // And add in the current digit - gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(digit, build_int_cst_type(UCHAR, 0x0F))))); - gg_increment(pointer); - } - WEND + // This value is unsigned, so just use the first location: + gg_assign(signp, source_address); } - // Value contains the binary value. The last thing is to apply -- and - // undo -- the signable logic: - - if( field->attr & signable_e ) - { - // The variable is signed - if( field->attr & separate_e ) - { - // The sign byte is separate - if( field->attr & leading_e) - { - // The first byte is '+' or '-' - if( internal_codeset_is_ebcdic() ) - { - // We are operating in EBCDIC, so we look for a 96 (is minus sign) - IF( gg_get_indirect_reference(source_address, NULL_TREE), - eq_op, - build_int_cst_type(UCHAR, 96) ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - else - { - // We are operating in ASCII - IF( gg_get_indirect_reference(source_address, NULL_TREE), - eq_op, - build_int_cst_type(UCHAR, '-') ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - } - else - { - // The final byte is '+' or '-' - if( internal_codeset_is_ebcdic() ) - { - // We are operating in EBCDIC, so we look for a 96 (is minus sign) - IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), - eq_op, - build_int_cst_type(UCHAR, 96) ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - else - { - // We are operating in ASCII - IF( gg_get_indirect_reference(source_address, build_int_cst_type(SIZE_T, field->data.capacity-1)), - eq_op, - build_int_cst_type(UCHAR, '-') ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - } - } - else - { - // The sign byte is internal. Check the sign bit - if(internal_codeset_is_ebcdic()) - { - IF( gg_bitwise_and( signbyte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), eq_op, build_int_cst_type(UCHAR, 0) ) - { - // The EBCDIC sign bit was OFF, so negate the result - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - else - { - IF( gg_bitwise_and( signbyte, - build_int_cst_type( UCHAR, - NUMERIC_DISPLAY_SIGN_BIT)), ne_op, build_int_cst_type(UCHAR, 0) ) - { - // The ASCII sign bit was on, so negate the result - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF - } - // It's time to put back the original data: - if( field->attr & leading_e) - { - // The first byte has the sign bit: - gg_assign(gg_get_indirect_reference(source_address, NULL_TREE), - signbyte); - } - else - { - // The final byte has the sign bit: - gg_assign(gg_get_indirect_reference(source_address, - build_int_cst_type(SIZE_T, field->data.capacity-1)), - signbyte); - } - } - } + gg_assign(val128, + gg_call_expr( INT128, + "__gg__numeric_display_to_binary", + signp, + pointer, + build_int_cst_type(INT, field->data.digits), + NULL_TREE)); + // Assign the value we got from the string to our "return" value: + gg_assign(value, gg_cast(TREE_TYPE(value), val128)); } ENDIF } @@ -1119,7 +941,9 @@ get_binary_value( tree value, vs_file_static); if( field->attr & signable_e ) { - IF( gg_array_value(gg_cast(build_pointer_type(SCHAR), source)), lt_op, gg_cast(SCHAR, integer_zero_node) ) + IF( gg_array_value(gg_cast(build_pointer_type(SCHAR), source)), + lt_op, + gg_cast(SCHAR, integer_zero_node) ) { gg_assign(extension, build_int_cst_type(UCHAR, 0xFF)); } @@ -1202,45 +1026,23 @@ get_binary_value( tree value, case FldPacked: { - // Zero out the destination: - gg_assign(value, gg_cast(TREE_TYPE(value), integer_zero_node)); - gg_assign(pointer, get_data_address(field, field_offset)); - gg_assign(pend, - gg_add(pointer, - build_int_cst_type(SIZE_T, field->data.capacity-1))); - - // Convert all but the last byte of the packed decimal sequence - WHILE( pointer, lt_op, pend ) - { - // Convert the first nybble - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_rshift(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst(UINT, 4))))); - - // Convert the second nybble - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF))))); - gg_increment(pointer); - } - WEND - - // This is the final byte: - gg_assign(value, gg_multiply(value, build_int_cst_type(TREE_TYPE(value), 10))); - gg_assign(value, gg_add(value, gg_cast(TREE_TYPE(value), gg_rshift(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst(UINT, 4))))); - - IF( gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF)), eq_op, build_int_cst_type(UCHAR, 0x0D) ) - { - gg_assign(value, gg_negate(value)); - } - ELSE + if( rdigits ) { - IF( gg_bitwise_and(gg_get_indirect_reference(pointer, NULL_TREE), build_int_cst_type(UCHAR, 0xF)), eq_op, build_int_cst_type(UCHAR, 0x0B) ) - { - gg_assign(value, gg_negate(value)); - } - ELSE - ENDIF + gg_assign(rdigits, + build_int_cst_type( TREE_TYPE(rdigits), + get_scaled_rdigits(field))); } - ENDIF + tree dest_type = TREE_TYPE(value); + + gg_assign(value, + gg_cast(dest_type, + gg_call_expr(INT128, + "__gg__packed_to_binary", + get_data_address( field, + field_offset), + build_int_cst_type(INT, + field->data.capacity), + NULL_TREE))); break; } diff --git a/gcc/cobol/parse.y b/gcc/cobol/parse.y index fae96ed5c718..039cb957de0c 100644 --- a/gcc/cobol/parse.y +++ b/gcc/cobol/parse.y @@ -10336,8 +10336,8 @@ intrinsic: function_udf if( p != NULL ) { auto loc = symbol_field_location(field_index(p->field)); error_msg(loc, "FUNCTION %qs has " - "inconsistent parameter type %td (%qs)", - keyword_str($1), p - args.data(), name_of(p->field) ); + "inconsistent parameter type %ld (%qs)", + keyword_str($1), (long)(p - args.data()), name_of(p->field) ); YYERROR; } $$ = is_numeric(args[0].field)? @@ -11959,7 +11959,10 @@ current_t::udf_args_valid( const cbl_label_t *L, if( arg.field ) { // else omitted auto tgt = cbl_field_of(symbol_at(udf.linkage_fields.at(i).isym)); if( ! valid_move(tgt, arg.field) ) { - auto loc = symbol_field_location(field_index(arg.field)); + auto loc = current_location; + if( ! is_temporary(arg.field) ) { + loc = symbol_field_location(field_index(arg.field)); + } error_msg(loc, "FUNCTION %s argument %zu, '%s' cannot be passed to %s, type %s", L->name, i, arg.field->pretty_name(), tgt->pretty_name(), 3 + cbl_field_type_str(tgt->type) ); diff --git a/gcc/cobol/scan.l b/gcc/cobol/scan.l index ba4c044e15e3..5773f09a9704 100644 --- a/gcc/cobol/scan.l +++ b/gcc/cobol/scan.l @@ -123,7 +123,7 @@ NUMEDCHAR [BPVZ90/,]+{COUNT}? NUMEDCHARS {NUMEDCHAR}([.]?{NUMEDCHAR})* NUMED ([+-]{NUMEDCHARS}+)|({NUMEDCHARS}+[+-]) CURRENCY [A-Zfhijklmoqtuwy\x80-\xFF]{-}[ABCDEGNPRSVXZ] -NUMEDCUR (([.]?[-$0B/Z*+,P9()V+–]|{CURRENCY}+|{COUNT})+([.][$0B/Z*+P9()V+\–])*)+ +NUMEDCUR (([.]?[$0B/Z*+,P9()V+-]|{CURRENCY}+|{COUNT})+([.][$0B/Z*+P9()V+-])*)+ NUMEDITED {NUMED}|{NUMEDCUR} EDITED {ALPHED}|{NUMED}|{NUMEDCUR} diff --git a/gcc/cobol/scan_ante.h b/gcc/cobol/scan_ante.h index 31093a68350f..c00826d652fd 100644 --- a/gcc/cobol/scan_ante.h +++ b/gcc/cobol/scan_ante.h @@ -149,7 +149,7 @@ numstr_of( const char string[], radix_t radix = decimal_e ) { } auto nx = std::count_if(input, p, fisdigit); if( 36 < nx ) { - error_msg(yylloc, "significand of %s has more than 36 digits (%td)", input, nx); + error_msg(yylloc, "significand of %s has more than 36 digits (%ld)", input, (long)nx); return NO_CONDITION; } diff --git a/gcc/cobol/show_parse.h b/gcc/cobol/show_parse.h index bd0e16fe8332..e1a8cb21b6f7 100644 --- a/gcc/cobol/show_parse.h +++ b/gcc/cobol/show_parse.h @@ -500,7 +500,7 @@ class ANALYZE int level; inline static int analyze_level=1; public: - ANALYZE(const char *func_) : func(func_) + ANALYZE(const char *func_) : func(func_) // cppcheck-suppress noExplicitConstructor { level = 0; if( getenv("Analyze") ) diff --git a/gcc/cobol/structs.cc b/gcc/cobol/structs.cc index 7a4db97ea483..2393dfbe5b92 100644 --- a/gcc/cobol/structs.cc +++ b/gcc/cobol/structs.cc @@ -156,7 +156,6 @@ tree cblc_field_p_type_node; tree cblc_field_pp_type_node; tree cblc_file_type_node; tree cblc_file_p_type_node; -tree cbl_enabled_exception_type_node; tree cblc_goto_type_node; // The following functions return type_decl nodes for the various structures @@ -288,29 +287,6 @@ typedef struct cblc_file_t return retval; } -static tree -create_cbl_enabled_exception_t() - { - /* - struct cbl_enabled_exception_t - { - bool enabled, location; - ec_type_t ec; - size_t file; - }; - */ - tree retval = NULL_TREE; - retval = gg_get_filelevel_struct_type_decl( "cbl_enabled_exception_t", - 4, - BOOL, "enabled", - BOOL, "location", - UINT, "ec", - SIZE_T, "file"); - retval = TREE_TYPE(retval); - - return retval; - } - void create_our_type_nodes() { @@ -323,7 +299,6 @@ create_our_type_nodes() cblc_field_pp_type_node = build_pointer_type(cblc_field_p_type_node); cblc_file_type_node = create_cblc_file_t(); cblc_file_p_type_node = build_pointer_type(cblc_file_type_node); - cbl_enabled_exception_type_node = create_cbl_enabled_exception_t(); } } diff --git a/gcc/cobol/structs.h b/gcc/cobol/structs.h index 1a16523dcb6b..47a78b45e9a8 100644 --- a/gcc/cobol/structs.h +++ b/gcc/cobol/structs.h @@ -54,7 +54,6 @@ extern GTY(()) tree cblc_field_p_type_node; extern GTY(()) tree cblc_field_pp_type_node; extern GTY(()) tree cblc_file_type_node; extern GTY(()) tree cblc_file_p_type_node; -extern GTY(()) tree cbl_enabled_exception_type_node; extern GTY(()) tree cblc_goto_type_node; extern void create_our_type_nodes(); diff --git a/gcc/cobol/symbols.cc b/gcc/cobol/symbols.cc index f2cd1b55f802..bbe99b6801f6 100644 --- a/gcc/cobol/symbols.cc +++ b/gcc/cobol/symbols.cc @@ -1598,7 +1598,17 @@ extend_66_capacity( cbl_field_t *alias ) { symbol_elem_t *e = symbol_at(alias->parent); symbol_elem_t *e2 = reinterpret_cast(const_cast(alias->data.picture)); +#ifndef __OPTIMIZE__ +#pragma message "The assert(e < e2) needs fixing" + // The following assert fails when valgrind is involved. This is the known + // problem of expecting mmap() to put new memory maps after older memory + // maps; that assumption fails when valgrind is involved. + + // For now I am defeating the assert when using -O0 so that I can run the + // NIST "make valgrind" tests. But this should be fixed so that the + // symbol table index is used, not the entry locations. assert(e < e2); +#endif alias->data.picture = NULL; capacity_of cap; diff --git a/gcc/cobol/util.cc b/gcc/cobol/util.cc index aed9483ff7bb..2a7bf2ba86d0 100644 --- a/gcc/cobol/util.cc +++ b/gcc/cobol/util.cc @@ -1049,8 +1049,8 @@ cbl_field_t::report_invalid_initial_value(const YYLTYPE& loc) const { return TOUPPER(ch) == 'E'; } ); if( !has_exponent && data.precision() < pend - p ) { - error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%td)", - name, data.initial, '.', pend - p); + error_msg(loc, "%s cannot represent VALUE %qs exactly (max %c%ld)", + name, data.initial, '.', (long)(pend - p)); } } } diff --git a/gcc/common.opt b/gcc/common.opt index 70659fabebd5..cd6a224c9a9a 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -205,15 +205,15 @@ bool flag_opts_finished ; What the sanitizer should instrument Variable -unsigned int flag_sanitize +sanitize_code_type flag_sanitize ; What sanitizers should recover from errors Variable -unsigned int flag_sanitize_recover = (SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT | SANITIZE_KERNEL_ADDRESS | SANITIZE_KERNEL_HWADDRESS) & ~(SANITIZE_UNREACHABLE | SANITIZE_RETURN) +sanitize_code_type flag_sanitize_recover = (SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT | SANITIZE_KERNEL_ADDRESS | SANITIZE_KERNEL_HWADDRESS) & ~(SANITIZE_UNREACHABLE | SANITIZE_RETURN) ; What sanitizers should use __builtin_trap () instead of runtime diagnostics Variable -unsigned int flag_sanitize_trap +sanitize_code_type flag_sanitize_trap ; Flag whether a prefix has been added to dump_base_name Variable @@ -1067,6 +1067,8 @@ Driver Undocumented ; ; 21: Fix noexcept lambda capture pruning. ; Fix C++20 layout of base with all explicitly defaulted constructors. +; Fix mangling of class and array objects with implicitly +; zero-initialized non-trailing subojects. ; Default in G++ 16. ; ; Additional positive integers will be assigned as new versions of @@ -1618,6 +1620,25 @@ fdiagnostics-minimum-margin-width= Common Joined UInteger Var(diagnostics_minimum_margin_width) Init(6) Set minimum width of left margin of source code when showing source. +fdiagnostics-show-nesting +Common Var(flag_diagnostics_show_nesting) Init(1) +Use indentation to show nesting of diagnostics in text output. + +fdiagnostics-show-nesting-locations +Common Var(flag_diagnostics_show_nesting_locations) Init(1) +Show location information when showing nested diagnostics. + +fdiagnostics-show-nesting-levels +Common Var(flag_diagnostics_show_nesting_levels) Init(0) +Show nesting levels as numbers when showing nested diagnostics. + +fdiagnostics-show-context +Common Alias(fdiagnostics-show-context=,1,0) + +fdiagnostics-show-context= +Common Joined RejectNegative UInteger Var(flag_diagnostics_show_context) Init(0) +Collect and print more context information for diagnostics. + fdisable- Common Joined RejectNegative Var(common_deferred_options) Defer -fdisable-[tree|rtl|ipa]-=range1+range2 Disable an optimization pass. diff --git a/gcc/common.opt.urls b/gcc/common.opt.urls index 38dd9d317598..3684edd63073 100644 --- a/gcc/common.opt.urls +++ b/gcc/common.opt.urls @@ -640,6 +640,21 @@ UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-text fdiagnostics-minimum-margin-width= UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-minimum-margin-width) +fdiagnostics-show-nesting +UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-show-nesting) + +fdiagnostics-show-nesting-locations +UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-show-nesting-locations) + +fdiagnostics-show-nesting-levels +UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-show-nesting-levels) + +fdiagnostics-show-context +UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-show-context) + +fdiagnostics-show-context= +UrlSuffix(gcc/Diagnostic-Message-Formatting-Options.html#index-fdiagnostics-show-context) + fdisable- UrlSuffix(gcc/Developer-Options.html#index-fdisable-) diff --git a/gcc/common/config/pru/pru-common.cc b/gcc/common/config/pru/pru-common.cc index dcc27b913d40..19e35702530b 100644 --- a/gcc/common/config/pru/pru-common.cc +++ b/gcc/common/config/pru/pru-common.cc @@ -28,7 +28,8 @@ along with GCC; see the file COPYING3. If not see #include "flags.h" #undef TARGET_DEFAULT_TARGET_FLAGS -#define TARGET_DEFAULT_TARGET_FLAGS (MASK_OPT_LOOP) +#define TARGET_DEFAULT_TARGET_FLAGS \ + (MASK_OPT_LOOP | MASK_OPT_MUL | MASK_OPT_FILLZERO) #undef TARGET_EXCEPT_UNWIND_INFO #define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info diff --git a/gcc/common/config/riscv/riscv-common.cc b/gcc/common/config/riscv/riscv-common.cc index da3cb9f788dc..6582c15bae2e 100644 --- a/gcc/common/config/riscv/riscv-common.cc +++ b/gcc/common/config/riscv/riscv-common.cc @@ -378,7 +378,7 @@ riscv_subset_t::riscv_subset_t () { } -riscv_subset_list::riscv_subset_list (const char *arch, location_t loc) +riscv_subset_list::riscv_subset_list (const char *arch, location_t *loc) : m_arch (arch), m_loc (loc), m_head (NULL), m_tail (NULL), m_xlen (0), m_subset_num (0), m_allow_adding_dup (false) { @@ -584,47 +584,49 @@ riscv_subset_list::add (const char *subset, int major_version, else { /* The extension is already in the list. */ - if (!m_allow_adding_dup - || ext->major_version != major_version - || ext->minor_version != minor_version) - error_at ( - m_loc, - "%<-march=%s%>: extension %qs appear more than one time", - m_arch, - subset); + if (m_loc + && (!m_allow_adding_dup + || ext->major_version != major_version + || ext->minor_version != minor_version)) + error_at (*m_loc, "%<-march=%s%>: extension %qs appear more " + "than one time", m_arch, subset); } return; } else if (strlen (subset) == 1 && !standard_extensions_p (subset)) { - error_at (m_loc, - "%<-march=%s%>: extension %qs is unsupported standard single " - "letter extension", - m_arch, subset); + if (m_loc) + error_at (*m_loc, + "%<-march=%s%>: extension %qs is unsupported standard " + "single letter extension", + m_arch, subset); return; } else if (subset[0] == 'z' && !standard_extensions_p (subset)) { - error_at (m_loc, - "%<-march=%s%>: extension %qs starts with 'z' but is " - "unsupported standard extension", - m_arch, subset); + if (m_loc) + error_at (*m_loc, + "%<-march=%s%>: extension %qs starts with 'z' but is " + "unsupported standard extension", + m_arch, subset); return; } else if (subset[0] == 's' && !standard_extensions_p (subset)) { - error_at (m_loc, - "%<-march=%s%>: extension %qs starts with 's' but is " - "unsupported standard supervisor extension", - m_arch, subset); + if (m_loc) + error_at (*m_loc, + "%<-march=%s%>: extension %qs starts with 's' but is " + "unsupported standard supervisor extension", + m_arch, subset); return; } else if (subset[0] == 'x' && !standard_extensions_p (subset)) { - error_at (m_loc, - "%<-march=%s%>: extension %qs starts with 'x' but is " - "unsupported non-standard extension", - m_arch, subset); + if (m_loc) + error_at (*m_loc, + "%<-march=%s%>: extension %qs starts with 'x' but is " + "unsupported non-standard extension", + m_arch, subset); return; } @@ -886,15 +888,17 @@ riscv_subset_list::parsing_subset_version (const char *ext, { if (!ISDIGIT (*(p+1))) { - error_at (m_loc, "%<-march=%s%>: expect number " - "after %<%dp%>", m_arch, version); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: expect number " + "after %<%dp%>", m_arch, version); return NULL; } if (!major_p) { - error_at (m_loc, "%<-march=%s%>: for %<%s%dp%dp?%>, version " - "number with more than 2 level is not supported", - m_arch, ext, major, version); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: for %<%s%dp%dp?%>, " + "version number with more than 2 level is not " + "supported", m_arch, ext, major, version); return NULL; } major = version; @@ -957,8 +961,9 @@ riscv_subset_list::parse_profiles (const char *arch) /* If isn't '_' after profile, need to add it and mention the user. */ if (after_part[0] != '_') { - warning_at (m_loc, 0, "Should use \"%c\" to contact Profiles with other " - "extensions", '_'); + if (m_loc) + warning_at (*m_loc, 0, "Should use \"%c\" to contact Profiles " + "with other extensions", '_'); return p_str + "_" + after_part; } @@ -996,9 +1001,10 @@ riscv_subset_list::parse_base_ext (const char *p) } else { - error_at (m_loc, "%<-march=%s%>: ISA string must begin with rv32, rv64," - " a supported RVA profile or refer to a supported CPU", - m_arch); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: ISA string must begin with rv32, " + "rv64, a supported RVA profile or refer to a supported CPU", + m_arch); return NULL; } @@ -1021,8 +1027,9 @@ riscv_subset_list::parse_base_ext (const char *p) if (m_xlen > 64) { - error_at (m_loc, "%<-march=%s%>: rv%de is not a valid base ISA", - m_arch, m_xlen); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: rv%de is not a valid base ISA", + m_arch, m_xlen); return NULL; } break; @@ -1033,8 +1040,9 @@ riscv_subset_list::parse_base_ext (const char *p) /* std_ext_p= */ true, &explicit_version_p); if (major_version != 0 || minor_version != 0) { - warning_at (m_loc, 0, "version of % will be omitted, please " - "specify version for individual extension"); + if (m_loc) + warning_at (*m_loc, 0, "version of % will be omitted, please " + "specify version for individual extension"); } /* We have special rule for G, we disallow rv32gm2p but allow rv32g_zicsr @@ -1052,8 +1060,9 @@ riscv_subset_list::parse_base_ext (const char *p) break; default: - error_at (m_loc, "%<-march=%s%>: first ISA subset must be %, " - "% or %", m_arch); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: first ISA subset must be %, " + "% or %", m_arch); return NULL; } return p; @@ -1074,10 +1083,9 @@ riscv_subset_list::parse_single_std_ext (const char *p, bool exact_single_p) { if (*p == 'x' || *p == 's' || *p == 'z') { - error_at (m_loc, - "%<-march=%s%>: Not single-letter extension. " - "%<%c%>", - m_arch, *p); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: Not single-letter extension. %<%c%>", + m_arch, *p); return nullptr; } @@ -1183,54 +1191,58 @@ riscv_subset_list::handle_combine_ext () void riscv_subset_list::check_conflict_ext () { + if (!m_loc) + return; + if (lookup ("zcf") && m_xlen == 64) - error_at (m_loc, "%<-march=%s%>: zcf extension supports in rv32 only", + error_at (*m_loc, "%<-march=%s%>: zcf extension supports in rv32 only", m_arch); if (lookup ("zilsd") && m_xlen == 64) - error_at (m_loc, "%<-march=%s%>: zilsd extension supports in rv32 only", + error_at (*m_loc, "%<-march=%s%>: zilsd extension supports in rv32 only", m_arch); if (lookup ("zclsd") && m_xlen == 64) - error_at (m_loc, "%<-march=%s%>: zclsd extension supports in rv32 only", + error_at (*m_loc, "%<-march=%s%>: zclsd extension supports in rv32 only", m_arch); if (lookup ("ssnpm") && m_xlen == 32) - error_at (m_loc, "%<-march=%s%>: ssnpm extension supports in rv64 only", + error_at (*m_loc, "%<-march=%s%>: ssnpm extension supports in rv64 only", m_arch); if (lookup ("smnpm") && m_xlen == 32) - error_at (m_loc, "%<-march=%s%>: smnpm extension supports in rv64 only", + error_at (*m_loc, "%<-march=%s%>: smnpm extension supports in rv64 only", m_arch); if (lookup ("smmpm") && m_xlen == 32) - error_at (m_loc, "%<-march=%s%>: smmpm extension supports in rv64 only", + error_at (*m_loc, "%<-march=%s%>: smmpm extension supports in rv64 only", m_arch); if (lookup ("sspm") && m_xlen == 32) - error_at (m_loc, "%<-march=%s%>: sspm extension supports in rv64 only", + error_at (*m_loc, "%<-march=%s%>: sspm extension supports in rv64 only", m_arch); if (lookup ("supm") && m_xlen == 32) - error_at (m_loc, "%<-march=%s%>: supm extension supports in rv64 only", + error_at (*m_loc, "%<-march=%s%>: supm extension supports in rv64 only", m_arch); if (lookup ("zfinx") && lookup ("f")) - error_at (m_loc, + error_at (*m_loc, "%<-march=%s%>: z*inx conflicts with floating-point " "extensions", m_arch); /* 'H' hypervisor extension requires base ISA with 32 registers. */ if (lookup ("e") && lookup ("h")) - error_at (m_loc, "%<-march=%s%>: h extension requires i extension", m_arch); + error_at (*m_loc, "%<-march=%s%>: h extension requires i extension", + m_arch); if (lookup ("zcd")) { if (lookup ("zcmt")) - error_at (m_loc, "%<-march=%s%>: zcd conflicts with zcmt", m_arch); + error_at (*m_loc, "%<-march=%s%>: zcd conflicts with zcmt", m_arch); if (lookup ("zcmp")) - error_at (m_loc, "%<-march=%s%>: zcd conflicts with zcmp", m_arch); + error_at (*m_loc, "%<-march=%s%>: zcd conflicts with zcmp", m_arch); } if ((lookup ("v") || lookup ("zve32x") @@ -1238,9 +1250,9 @@ riscv_subset_list::check_conflict_ext () || lookup ("zve64f") || lookup ("zve64d") || lookup ("zvl32b") || lookup ("zvl64b") || lookup ("zvl128b") || lookup ("zvfh")) - && lookup ("xtheadvector")) - error_at (m_loc, "%<-march=%s%>: xtheadvector conflicts with vector " - "extension or its sub-extensions", m_arch); + && lookup ("xtheadvector")) + error_at (*m_loc, "%<-march=%s%>: xtheadvector conflicts with vector " + "extension or its sub-extensions", m_arch); } /* Parsing function for multi-letter extensions. @@ -1336,8 +1348,9 @@ riscv_subset_list::parse_single_multiletter_ext (const char *p, if (strlen (subset) == 1) { - error_at (m_loc, "%<-march=%s%>: name of %s must be more than 1 letter", - m_arch, ext_type_str); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: name of %s must be more " + "than 1 letter", m_arch, ext_type_str); free (subset); return NULL; } @@ -1348,8 +1361,9 @@ riscv_subset_list::parse_single_multiletter_ext (const char *p, if (*p != '\0' && *p != '_') { - error_at (m_loc, "%<-march=%s%>: %s must separate with %<_%>", - m_arch, ext_type_str); + if (m_loc) + error_at (*m_loc, "%<-march=%s%>: %s must separate with %<_%>", + m_arch, ext_type_str); return NULL; } @@ -1386,10 +1400,12 @@ riscv_subset_list::parse_single_ext (const char *p, bool exact_single_p) } } -/* Parsing arch string to subset list, return NULL if parsing failed. */ +/* Parsing arch string to subset list, return NULL if parsing failed. + If LOC is nonnull, report diagnostics against location *LOC, otherwise + remain silent. */ riscv_subset_list * -riscv_subset_list::parse (const char *arch, location_t loc) +riscv_subset_list::parse (const char *arch, location_t *loc) { if (riscv_subset_list::parse_failed) return NULL; @@ -1412,8 +1428,9 @@ riscv_subset_list::parse (const char *arch, location_t loc) case 'e': case 'i': case 'g': - error_at (loc, "%<-march=%s%>: i, e or g must be the first extension", - arch); + if (loc) + error_at (*loc, "%<-march=%s%>: i, e or g must be " + "the first extension", arch); goto fail; default: p = subset_list->parse_single_ext (p, /*exact_single_p=*/ false); @@ -1448,7 +1465,7 @@ riscv_subset_list::clone () const } void -riscv_subset_list::set_loc (location_t loc) +riscv_subset_list::set_loc (location_t *loc) { m_loc = loc; } @@ -1622,12 +1639,15 @@ riscv_ext_is_subset (struct cl_target_option *opts, The minimal feature bits refer to using the earliest extension that appeared in the Linux hwprobe to support the specified ISA string. This ensures that older kernels, which may lack certain implied extensions, can still run the - FMV dispatcher correctly. */ + FMV dispatcher correctly. + + If LOC is nonnull, report any diagnostics against *LOC, otherwise remain + silent. */ bool riscv_minimal_hwprobe_feature_bits (const char *isa, struct riscv_feature_bits *res, - location_t loc) + location_t *loc) { riscv_subset_list *subset_list; subset_list = riscv_subset_list::parse (isa, loc); @@ -1697,7 +1717,7 @@ riscv_parse_arch_string (const char *isa, location_t loc) { riscv_subset_list *subset_list; - subset_list = riscv_subset_list::parse (isa, loc); + subset_list = riscv_subset_list::parse (isa, &loc); if (!subset_list) return; @@ -1758,6 +1778,11 @@ riscv_expand_arch (int argc, { gcc_assert (argc == 1); location_t loc = UNKNOWN_LOCATION; + + /* Filter out -march=unset, it will expand from -mcpu later. */ + if (strcmp (argv[0], "unset") == 0) + return ""; + /* Try to interpret the arch as CPU first. */ const char *arch_str = riscv_expand_arch_from_cpu (argc, argv); if (!strlen (arch_str)) @@ -1899,7 +1924,8 @@ riscv_multi_lib_info_t::parse ( } multi_lib_info->subset_list = - riscv_subset_list::parse (multi_lib_info->arch_str.c_str (), input_location); + riscv_subset_list::parse (multi_lib_info->arch_str.c_str (), + &input_location); return true; } @@ -2088,7 +2114,7 @@ riscv_compute_multilib ( return multilib_dir; subset_list = riscv_subset_list::parse (riscv_current_arch_str.c_str (), - input_location); + &input_location); /* Failed to parse -march, fallback to using what gcc use. */ if (subset_list == NULL) diff --git a/gcc/config.gcc b/gcc/config.gcc index 0d8dbc4fb199..04e88cce00d5 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -273,7 +273,7 @@ esac # Obsolete configurations. case ${target} in ia64*-*-hpux* | ia64*-*-*vms* | ia64*-*-elf* \ - | m32c*-*-* \ + | m32c*-*-* | epiphany*-*-* | rl78*-*-* \ ) if test "x$enable_obsolete" != xyes; then echo "*** Configuration ${target} is obsolete." >&2 @@ -351,7 +351,7 @@ aarch64*-*-*) c_target_objs="aarch64-c.o" cxx_target_objs="aarch64-c.o" d_target_objs="aarch64-d.o" - extra_objs="aarch64-builtins.o aarch-common.o aarch64-elf-metadata.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o aarch-bti-insert.o aarch64-cc-fusion.o aarch64-early-ra.o aarch64-ldp-fusion.o" + extra_objs="aarch64-builtins.o aarch-common.o aarch64-elf-metadata.o aarch64-sve-builtins.o aarch64-sve-builtins-shapes.o aarch64-sve-builtins-base.o aarch64-sve-builtins-sve2.o aarch64-sve-builtins-sme.o cortex-a57-fma-steering.o aarch64-speculation.o aarch-bti-insert.o aarch64-early-ra.o aarch64-ldp-fusion.o" target_gtfiles="\$(srcdir)/config/aarch64/aarch64-protos.h \$(srcdir)/config/aarch64/aarch64-builtins.h \$(srcdir)/config/aarch64/aarch64-builtins.cc \$(srcdir)/config/aarch64/aarch64-sve-builtins.h \$(srcdir)/config/aarch64/aarch64-sve-builtins.cc" target_has_targetm_common=yes ;; @@ -3207,7 +3207,7 @@ powerpcle-*-eabi*) ;; pru*-*-*) tm_file="elfos.h newlib-stdint.h ${tm_file}" - tmake_file="${tmake_file} pru/t-pru" + tmake_file="${tmake_file} pru/t-multilib pru/t-pru" extra_objs="pru-pragma.o pru-passes.o" use_gcc_stdint=wrap ;; @@ -4658,7 +4658,7 @@ case "${target}" in ;; i[34567]86-*-* | x86_64-*-*) - supported_defaults="abi arch arch_32 arch_64 cpu cpu_32 cpu_64 tune tune_32 tune_64" + supported_defaults="abi arch arch_32 arch_64 cpu cpu_32 cpu_64 tune tune_32 tune_64 tls" for which in arch arch_32 arch_64 cpu cpu_32 cpu_64 tune tune_32 tune_64; do eval "val=\$with_$which" case " $x86_archs " in @@ -4717,6 +4717,17 @@ case "${target}" in ;; esac done + + # Handle --with-tls. + case "$with_tls" in + ""|gnu|gnu2) + # OK + ;; + *) + echo "Unknown TLS method used in --with-tls=$with_tls" 1>&2 + exit 1 + ;; + esac ;; riscv*-*-*) @@ -4782,7 +4793,7 @@ case "${target}" in exit 1 ;; esac - PYTHON=`which python || which python3 || which python2` + PYTHON=`which python3 || which python` if test "x${PYTHON}" != x; then with_arch=`${PYTHON} ${srcdir}/config/riscv/arch-canonicalize -misa-spec=${with_isa_spec} ${with_arch}` fi diff --git a/gcc/config.in b/gcc/config.in index 353d1bc94078..183d0dfcada4 100644 --- a/gcc/config.in +++ b/gcc/config.in @@ -2375,6 +2375,12 @@ #endif +/* Define to 1 if ld64 supports '-no_deduplicate'. */ +#ifndef USED_FOR_TARGET +#undef LD64_HAS_NO_DEDUPLICATE +#endif + + /* Define to 1 if ld64 supports '-platform_version'. */ #ifndef USED_FOR_TARGET #undef LD64_HAS_PLATFORM_VERSION diff --git a/gcc/config/aarch64/aarch64-cc-fusion.cc b/gcc/config/aarch64/aarch64-cc-fusion.cc deleted file mode 100644 index cea54dee298b..000000000000 --- a/gcc/config/aarch64/aarch64-cc-fusion.cc +++ /dev/null @@ -1,297 +0,0 @@ -// Pass to fuse CC operations with other instructions. -// Copyright (C) 2021-2025 Free Software Foundation, Inc. -// -// This file is part of GCC. -// -// GCC is free software; you can redistribute it and/or modify it under -// the terms of the GNU General Public License as published by the Free -// Software Foundation; either version 3, or (at your option) any later -// version. -// -// GCC is distributed in the hope that it will be useful, but WITHOUT ANY -// WARRANTY; without even the implied warranty of MERCHANTABILITY or -// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -// for more details. -// -// You should have received a copy of the GNU General Public License -// along with GCC; see the file COPYING3. If not see -// . - -// This pass looks for sequences of the form: -// -// A: (set (reg R1) X1) -// B: ...instructions that might change the value of X1... -// C: (set (reg CC) X2) // X2 uses R1 -// -// and tries to change them to: -// -// C': [(set (reg CC) X2') -// (set (reg R1) X1)] -// B: ...instructions that might change the value of X1... -// -// where X2' is the result of replacing R1 with X1 in X2. -// -// This sequence occurs in SVE code in two important cases: -// -// (a) Sometimes, to deal correctly with overflow, we need to increment -// an IV after a WHILELO rather than before it. In this case: -// - A is a WHILELO, -// - B includes an IV increment and -// - C is a separate PTEST. -// -// (b) ACLE code of the form: -// -// svbool_t ok = svrdffr (); -// if (svptest_last (pg, ok)) -// ... -// -// must, for performance reasons, be code-generated as: -// -// RDFFRS Pok.B, Pg/Z -// ...branch on flags result... -// -// without a separate PTEST of Pok. In this case: -// - A is an aarch64_rdffr -// - B includes an aarch64_update_ffrt -// - C is a separate PTEST -// -// Combine can handle this optimization if B doesn't exist and if A and -// C are in the same BB. This pass instead handles cases where B does -// exist and cases where A and C are in different BBs of the same EBB. - -#define IN_TARGET_CODE 1 - -#define INCLUDE_ALGORITHM -#define INCLUDE_FUNCTIONAL -#define INCLUDE_ARRAY -#include "config.h" -#include "system.h" -#include "coretypes.h" -#include "backend.h" -#include "rtl.h" -#include "df.h" -#include "rtl-ssa.h" -#include "tree-pass.h" - -using namespace rtl_ssa; - -namespace { -const pass_data pass_data_cc_fusion = -{ - RTL_PASS, // type - "cc_fusion", // name - OPTGROUP_NONE, // optinfo_flags - TV_NONE, // tv_id - 0, // properties_required - 0, // properties_provided - 0, // properties_destroyed - 0, // todo_flags_start - TODO_df_finish, // todo_flags_finish -}; - -// Class that represents one run of the pass. -class cc_fusion -{ -public: - cc_fusion () : m_parallel () {} - void execute (); - -private: - rtx optimizable_set (const insn_info *); - bool parallelize_insns (def_info *, rtx, def_info *, rtx); - void optimize_cc_setter (def_info *, rtx); - - // A spare PARALLEL rtx, or null if none. - rtx m_parallel; -}; - -// See whether INSN is a single_set that we can optimize. Return the -// set if so, otherwise return null. -rtx -cc_fusion::optimizable_set (const insn_info *insn) -{ - if (!insn->can_be_optimized () - || insn->is_asm () - || insn->has_volatile_refs () - || insn->has_pre_post_modify ()) - return NULL_RTX; - - return single_set (insn->rtl ()); -} - -// CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise -// a single_set that sets (only) OTHER_DEF. CC_SET is known to set the -// CC register and the instruction that contains CC_SET is known to use -// OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel. -bool -cc_fusion::parallelize_insns (def_info *cc_def, rtx cc_set, - def_info *other_def, rtx other_set) -{ - auto attempt = crtl->ssa->new_change_attempt (); - - insn_info *cc_insn = cc_def->insn (); - insn_info *other_insn = other_def->insn (); - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "trying to parallelize insn %d and insn %d\n", - other_insn->uid (), cc_insn->uid ()); - - // Try to substitute OTHER_SET into CC_INSN. - insn_change_watermark rtl_watermark; - rtx_insn *cc_rtl = cc_insn->rtl (); - insn_propagation prop (cc_rtl, SET_DEST (other_set), - SET_SRC (other_set)); - if (!prop.apply_to_pattern (&PATTERN (cc_rtl)) - || prop.num_replacements == 0) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "-- failed to substitute all uses of r%d\n", - other_def->regno ()); - return false; - } - - // Restrict the uses to those outside notes. - use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ()); - use_array other_set_uses = remove_note_accesses (attempt, - other_insn->uses ()); - - // Remove the use of the substituted value. - access_array_builder uses_builder (attempt); - uses_builder.reserve (cc_uses.size ()); - for (use_info *use : cc_uses) - if (use->def () != other_def) - uses_builder.quick_push (use); - cc_uses = use_array (uses_builder.finish ()); - - // Get the list of uses for the new instruction. - insn_change cc_change (cc_insn); - cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses); - if (!cc_change.new_uses.is_valid ()) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "-- cannot merge uses\n"); - return false; - } - - // The instruction initially defines just two registers. recog can add - // extra clobbers if necessary. - auto_vec new_defs; - new_defs.quick_push (cc_def); - new_defs.quick_push (other_def); - sort_accesses (new_defs); - cc_change.new_defs = def_array (access_array (new_defs)); - - // Make sure there is somewhere that the new instruction could live. - auto other_change = insn_change::delete_insn (other_insn); - insn_change *changes[] = { &other_change, &cc_change }; - cc_change.move_range = cc_insn->ebb ()->insn_range (); - if (!restrict_movement (cc_change, ignore_changing_insns (changes))) - { - if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "-- cannot satisfy all definitions and uses\n"); - return false; - } - - // Tentatively install the new pattern. By convention, the CC set - // must be first. - if (m_parallel) - { - XVECEXP (m_parallel, 0, 0) = cc_set; - XVECEXP (m_parallel, 0, 1) = other_set; - } - else - { - rtvec vec = gen_rtvec (2, cc_set, other_set); - m_parallel = gen_rtx_PARALLEL (VOIDmode, vec); - } - validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1); - - // These routines report failures themselves. - if (!recog (attempt, cc_change, ignore_changing_insns (changes)) - || !changes_are_worthwhile (changes) - || !crtl->ssa->verify_insn_changes (changes)) - return false; - - remove_reg_equal_equiv_notes (cc_rtl); - confirm_change_group (); - crtl->ssa->change_insns (changes); - m_parallel = NULL_RTX; - return true; -} - -// Try to optimize the instruction that contains CC_DEF, where CC_DEF describes -// a definition of the CC register by CC_SET. -void -cc_fusion::optimize_cc_setter (def_info *cc_def, rtx cc_set) -{ - // Search the registers used by the CC setter for an easily-substitutable - // def-use chain. - for (use_info *other_use : cc_def->insn ()->uses ()) - if (def_info *other_def = other_use->def ()) - if (other_use->regno () != CC_REGNUM - && other_def->ebb () == cc_def->ebb ()) - if (rtx other_set = optimizable_set (other_def->insn ())) - { - rtx dest = SET_DEST (other_set); - if (REG_P (dest) - && REGNO (dest) == other_def->regno () - && REG_NREGS (dest) == 1 - && parallelize_insns (cc_def, cc_set, other_def, other_set)) - return; - } -} - -// Run the pass on the current function. -void -cc_fusion::execute () -{ - // Initialization. - calculate_dominance_info (CDI_DOMINATORS); - df_analyze (); - crtl->ssa = new rtl_ssa::function_info (cfun); - - // Walk through all instructions that set CC. Look for a PTEST instruction - // that we can optimize. - // - // ??? The PTEST test isn't needed for correctness, but it ensures that the - // pass no effect on non-SVE code. - for (def_info *def : crtl->ssa->reg_defs (CC_REGNUM)) - if (rtx cc_set = optimizable_set (def->insn ())) - if (REG_P (SET_DEST (cc_set)) - && REGNO (SET_DEST (cc_set)) == CC_REGNUM - && GET_CODE (SET_SRC (cc_set)) == UNSPEC - && XINT (SET_SRC (cc_set), 1) == UNSPEC_PTEST) - optimize_cc_setter (def, cc_set); - - // Finalization. - crtl->ssa->perform_pending_updates (); - free_dominance_info (CDI_DOMINATORS); -} - -class pass_cc_fusion : public rtl_opt_pass -{ -public: - pass_cc_fusion (gcc::context *ctxt) - : rtl_opt_pass (pass_data_cc_fusion, ctxt) - {} - - // opt_pass methods: - virtual bool gate (function *) { return TARGET_SVE && optimize >= 2; } - virtual unsigned int execute (function *); -}; - -unsigned int -pass_cc_fusion::execute (function *) -{ - cc_fusion ().execute (); - return 0; -} - -} // end namespace - -// Create a new CC fusion pass instance. - -rtl_opt_pass * -make_pass_cc_fusion (gcc::context *ctxt) -{ - return new pass_cc_fusion (ctxt); -} diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def index 9cf9d3e13b2c..6a53ff355917 100644 --- a/gcc/config/aarch64/aarch64-passes.def +++ b/gcc/config/aarch64/aarch64-passes.def @@ -24,6 +24,5 @@ INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation); INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm); INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation); INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti); -INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion); INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion); INSERT_PASS_BEFORE (pass_peephole2, 1, pass_ldp_fusion); diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index 36bd88593ffe..56efcf2c7f2c 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -1098,6 +1098,7 @@ bool aarch64_legitimate_address_p (machine_mode, rtx, bool, aarch64_addr_query_type = ADDR_QUERY_M); machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx); rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx); +rtx aarch64_gen_compare_split_imm24 (rtx, rtx, rtx); bool aarch64_maxmin_plus_const (rtx_code, rtx *, bool); rtx aarch64_load_tp (rtx); @@ -1236,7 +1237,6 @@ rtl_opt_pass *make_pass_fma_steering (gcc::context *); rtl_opt_pass *make_pass_track_speculation (gcc::context *); rtl_opt_pass *make_pass_late_track_speculation (gcc::context *); rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt); -rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt); rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt); rtl_opt_pass *make_pass_ldp_fusion (gcc::context *); @@ -1281,4 +1281,7 @@ extern bool aarch64_gcs_enabled (); extern unsigned aarch64_data_alignment (const_tree exp, unsigned align); extern unsigned aarch64_stack_alignment (const_tree exp, unsigned align); +extern rtx aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x, + rtx_code_label *label); + #endif /* GCC_AARCH64_PROTOS_H */ diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md index 6b1a747a0f4e..0123ea0e2349 100644 --- a/gcc/config/aarch64/aarch64-sme.md +++ b/gcc/config/aarch64/aarch64-sme.md @@ -400,7 +400,8 @@ auto label = gen_label_rtx (); auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM); emit_insn (gen_aarch64_read_tpidr2 (tpidr2)); - auto jump = emit_likely_jump_insn (gen_aarch64_cbznedi1 (tpidr2, label)); + auto pat = aarch64_gen_compare_zero_and_branch (NE, tpidr2, label); + auto jump = emit_likely_jump_insn (pat); JUMP_LABEL (jump) = label; aarch64_restore_za (operands[0]); diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc index e394c9a84a04..1764cf8f7e8f 100644 --- a/gcc/config/aarch64/aarch64-sve-builtins.cc +++ b/gcc/config/aarch64/aarch64-sve-builtins.cc @@ -4590,8 +4590,9 @@ function_expander::expand () { /* The last element of these functions is always an fpm_t that must be written to FPMR before the call to the instruction itself. */ - gcc_assert (args.last ()->mode == DImode); - emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), args.last ()); + rtx fpm = args.last (); + gcc_assert (CONST_INT_P (fpm) || GET_MODE (fpm) == DImode); + emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), fpm); } rtx result = base->expand (*this); if (function_returns_void_p ()) diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index 88d323af32dc..51e2d7d7e875 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -1542,18 +1542,18 @@ UNSPEC_LD1_GATHER))] "TARGET_SVE && TARGET_NON_STREAMING" {@ [cons: =0, 1, 2, 3, 4, 5 ] - [&w, Z, w, Ui1, Ui1, Upl] ld1\t%0.s, %5/z, [%2.s] - [?w, Z, 0, Ui1, Ui1, Upl] ^ - [&w, vgw, w, Ui1, Ui1, Upl] ld1\t%0.s, %5/z, [%2.s, #%1] - [?w, vgw, 0, Ui1, Ui1, Upl] ^ - [&w, rk, w, Z, Ui1, Upl] ld1\t%0.s, %5/z, [%1, %2.s, sxtw] - [?w, rk, 0, Z, Ui1, Upl] ^ - [&w, rk, w, Ui1, Ui1, Upl] ld1\t%0.s, %5/z, [%1, %2.s, uxtw] - [?w, rk, 0, Ui1, Ui1, Upl] ^ - [&w, rk, w, Z, i, Upl] ld1\t%0.s, %5/z, [%1, %2.s, sxtw %p4] - [?w, rk, 0, Z, i, Upl] ^ - [&w, rk, w, Ui1, i, Upl] ld1\t%0.s, %5/z, [%1, %2.s, uxtw %p4] - [?w, rk, 0, Ui1, i, Upl] ^ + [&w, Z, w, Ui1, Ui1, Upl] ld1\t%0.s, %5/z, [%2.s] + [?w, Z, 0, Ui1, Ui1, Upl] ^ + [&w, vg, w, Ui1, Ui1, Upl] ld1\t%0.s, %5/z, [%2.s, #%1] + [?w, vg, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, Ui1, Upl] ld1\t%0.s, %5/z, [%1, %2.s, sxtw] + [?w, rk, 0, Z, Ui1, Upl] ^ + [&w, rk, w, Ui1, Ui1, Upl] ld1\t%0.s, %5/z, [%1, %2.s, uxtw] + [?w, rk, 0, Ui1, Ui1, Upl] ^ + [&w, rk, w, Z, i, Upl] ld1\t%0.s, %5/z, [%1, %2.s, sxtw %p4] + [?w, rk, 0, Z, i, Upl] ^ + [&w, rk, w, Ui1, i, Upl] ld1\t%0.s, %5/z, [%1, %2.s, uxtw %p4] + [?w, rk, 0, Ui1, i, Upl] ^ } ) @@ -1572,14 +1572,14 @@ UNSPEC_LD1_GATHER))] "TARGET_SVE && TARGET_NON_STREAMING" {@ [cons: =0, 1, 2, 3, 4, 5] - [&w, Z, w, i, Ui1, Upl] ld1\t%0.d, %5/z, [%2.d] - [?w, Z, 0, i, Ui1, Upl] ^ - [&w, vgd, w, i, Ui1, Upl] ld1\t%0.d, %5/z, [%2.d, #%1] - [?w, vgd, 0, i, Ui1, Upl] ^ - [&w, rk, w, i, Ui1, Upl] ld1\t%0.d, %5/z, [%1, %2.d] - [?w, rk, 0, i, Ui1, Upl] ^ - [&w, rk, w, i, i, Upl] ld1\t%0.d, %5/z, [%1, %2.d, lsl %p4] - [?w, rk, 0, i, i, Upl] ^ + [&w, Z, w, i, Ui1, Upl] ld1\t%0.d, %5/z, [%2.d] + [?w, Z, 0, i, Ui1, Upl] ^ + [&w, vg, w, i, Ui1, Upl] ld1\t%0.d, %5/z, [%2.d, #%1] + [?w, vg, 0, i, Ui1, Upl] ^ + [&w, rk, w, i, Ui1, Upl] ld1\t%0.d, %5/z, [%1, %2.d] + [?w, rk, 0, i, Ui1, Upl] ^ + [&w, rk, w, i, i, Upl] ld1\t%0.d, %5/z, [%1, %2.d, lsl %p4] + [?w, rk, 0, i, i, Upl] ^ } ) @@ -2488,13 +2488,13 @@ (match_operand:SVE_4 4 "register_operand")] UNSPEC_ST1_SCATTER))] "TARGET_SVE && TARGET_NON_STREAMING" - {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ] - [ Z , w , Ui1 , Ui1 , w , Upl ] st1\t%4.s, %5, [%1.s] - [ vgw , w , Ui1 , Ui1 , w , Upl ] st1\t%4.s, %5, [%1.s, #%0] - [ rk , w , Z , Ui1 , w , Upl ] st1\t%4.s, %5, [%0, %1.s, sxtw] - [ rk , w , Ui1 , Ui1 , w , Upl ] st1\t%4.s, %5, [%0, %1.s, uxtw] - [ rk , w , Z , i , w , Upl ] st1\t%4.s, %5, [%0, %1.s, sxtw %p3] - [ rk , w , Ui1 , i , w , Upl ] st1\t%4.s, %5, [%0, %1.s, uxtw %p3] + {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ] + [ Z , w , Ui1 , Ui1 , w , Upl ] st1\t%4.s, %5, [%1.s] + [ vg , w , Ui1 , Ui1 , w , Upl ] st1\t%4.s, %5, [%1.s, #%0] + [ rk , w , Z , Ui1 , w , Upl ] st1\t%4.s, %5, [%0, %1.s, sxtw] + [ rk , w , Ui1 , Ui1 , w , Upl ] st1\t%4.s, %5, [%0, %1.s, uxtw] + [ rk , w , Z , i , w , Upl ] st1\t%4.s, %5, [%0, %1.s, sxtw %p3] + [ rk , w , Ui1 , i , w , Upl ] st1\t%4.s, %5, [%0, %1.s, uxtw %p3] } ) @@ -2511,11 +2511,11 @@ (match_operand:SVE_2 4 "register_operand")] UNSPEC_ST1_SCATTER))] "TARGET_SVE && TARGET_NON_STREAMING" - {@ [ cons: 0 , 1 , 3 , 4 , 5 ] - [ Z , w , Ui1 , w , Upl ] st1\t%4.d, %5, [%1.d] - [ vgd , w , Ui1 , w , Upl ] st1\t%4.d, %5, [%1.d, #%0] - [ rk , w , Ui1 , w , Upl ] st1\t%4.d, %5, [%0, %1.d] - [ rk , w , i , w , Upl ] st1\t%4.d, %5, [%0, %1.d, lsl %p3] + {@ [ cons: 0 , 1 , 3 , 4 , 5 ] + [ Z , w , Ui1 , w , Upl ] st1\t%4.d, %5, [%1.d] + [ vg , w , Ui1 , w , Upl ] st1\t%4.d, %5, [%1.d, #%0] + [ rk , w , Ui1 , w , Upl ] st1\t%4.d, %5, [%0, %1.d] + [ rk , w , i , w , Upl ] st1\t%4.d, %5, [%0, %1.d, lsl %p3] } ) diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index a4c3257d0876..a3cbbce8b311 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -2211,14 +2211,14 @@ ;; - FDOT (2-way, indexed) (FP8DOT2) ;; ------------------------------------------------------------------------- (define_insn "@aarch64_sve_dot" - [(set (match_operand:SVE_FULL_HSF 0 "register_operand") - (unspec:SVE_FULL_HSF - [(match_operand:SVE_FULL_HSF 1 "register_operand") + [(set (match_operand:SVE_FULL_HSF_FP8_FDOT 0 "register_operand") + (unspec:SVE_FULL_HSF_FP8_FDOT + [(match_operand:SVE_FULL_HSF_FP8_FDOT 1 "register_operand") (match_operand:VNx16QI 2 "register_operand") (match_operand:VNx16QI 3 "register_operand") (reg:DI FPM_REGNUM)] UNSPEC_DOT_FP8))] - "TARGET_SSVE_FP8DOT4 && !(mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)" + "" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] [ w , 0 , w , w ; * ] fdot\t%0., %2.b, %3.b [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fdot\t%0., %2.b, %3.b @@ -2226,15 +2226,15 @@ ) (define_insn "@aarch64_sve_dot_lane" - [(set (match_operand:SVE_FULL_HSF 0 "register_operand") - (unspec:SVE_FULL_HSF - [(match_operand:SVE_FULL_HSF 1 "register_operand") + [(set (match_operand:SVE_FULL_HSF_FP8_FDOT 0 "register_operand") + (unspec:SVE_FULL_HSF_FP8_FDOT + [(match_operand:SVE_FULL_HSF_FP8_FDOT 1 "register_operand") (match_operand:VNx16QI 2 "register_operand") (match_operand:VNx16QI 3 "register_operand") (match_operand:SI 4 "const_int_operand") (reg:DI FPM_REGNUM)] UNSPEC_DOT_LANE_FP8))] - "TARGET_SSVE_FP8DOT4 && !(mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)" + "" {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ] [ w , 0 , w , y ; * ] fdot\t%0., %2.b, %3.b[%4] [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;fdot\t%0., %2.b, %3.b[%4] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index d30c9c75e425..c51105662153 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -975,19 +975,24 @@ aarch64_cb_rhs (rtx_code op_code, rtx rhs) { case EQ: case NE: - case GT: - case GTU: case LT: case LTU: + case GE: + case GEU: + /* EQ/NE range is 0 .. 63. + LT/LTU range is 0 .. 63. + GE/GEU range is 1 .. 64 => GT x - 1, but also supports 0 via XZR. + So the intersection is 0 .. 63. */ return IN_RANGE (rhs_val, 0, 63); - case GE: /* CBGE: signed greater than or equal */ - case GEU: /* CBHS: unsigned greater than or equal */ - return IN_RANGE (rhs_val, 1, 64); - - case LE: /* CBLE: signed less than or equal */ - case LEU: /* CBLS: unsigned less than or equal */ - return IN_RANGE (rhs_val, -1, 62); + case GT: + case GTU: + case LE: + case LEU: + /* GT/GTU range is 0 .. 63 + LE/LEU range is -1 .. 62 => LT x + 1. + So the intersection is 0 .. 62. */ + return IN_RANGE (rhs_val, 0, 62); default: return false; @@ -2882,10 +2887,47 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y, return aarch64_gen_compare_reg (code, x, y); } +/* Split IMM into two 12-bit halves, producing an EQ/NE comparison vs X. + TMP may be a scratch. This optimizes a sequence from + mov x0, #imm1 + movk x0, #imm2, lsl 16 -- x0 contains CST + cmp x1, x0 + into the shorter: + sub tmp, x1, #(CST & 0xfff000) + subs tmp, tmp, #(CST & 0x000fff) +*/ +rtx +aarch64_gen_compare_split_imm24 (rtx x, rtx imm, rtx tmp) +{ + HOST_WIDE_INT lo_imm = UINTVAL (imm) & 0xfff; + HOST_WIDE_INT hi_imm = UINTVAL (imm) & 0xfff000; + enum machine_mode mode = GET_MODE (x); + + if (GET_CODE (tmp) == SCRATCH) + tmp = gen_reg_rtx (mode); + + emit_insn (gen_add3_insn (tmp, x, GEN_INT (-hi_imm))); + /* TODO: We don't need the gpr result of the second insn. */ + switch (mode) + { + case SImode: + tmp = gen_addsi3_compare0 (tmp, tmp, GEN_INT (-lo_imm)); + break; + case DImode: + tmp = gen_adddi3_compare0 (tmp, tmp, GEN_INT (-lo_imm)); + break; + default: + abort (); + } + emit_insn (tmp); + + return gen_rtx_REG (CC_NZmode, CC_REGNUM); +} + /* Generate conditional branch to LABEL, comparing X to 0 using CODE. Return the jump instruction. */ -static rtx +rtx aarch64_gen_compare_zero_and_branch (rtx_code code, rtx x, rtx_code_label *label) { @@ -14380,41 +14422,57 @@ aarch64_if_then_else_costs (rtx op0, rtx op1, rtx op2, int *cost, bool speed) if (GET_CODE (op1) == PC || GET_CODE (op2) == PC) { /* Conditional branch. */ - if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) + enum machine_mode cmpmode = GET_MODE (inner); + if (GET_MODE_CLASS (cmpmode) == MODE_CC) return true; - else + + if (comparator == const0_rtx) { - if (cmpcode == NE || cmpcode == EQ) + switch (cmpcode) { - if (comparator == const0_rtx) - { - /* TBZ/TBNZ/CBZ/CBNZ. */ - if (GET_CODE (inner) == ZERO_EXTRACT) - /* TBZ/TBNZ. */ - *cost += rtx_cost (XEXP (inner, 0), VOIDmode, - ZERO_EXTRACT, 0, speed); - else - /* CBZ/CBNZ. */ - *cost += rtx_cost (inner, VOIDmode, cmpcode, 0, speed); - - return true; - } - if (register_operand (inner, VOIDmode) - && aarch64_imm24 (comparator, VOIDmode)) + case NE: + case EQ: + if (cmpmode != SImode && cmpmode != DImode) + break; + if (GET_CODE (inner) == ZERO_EXTRACT) { - /* SUB and SUBS. */ - *cost += COSTS_N_INSNS (2); - if (speed) - *cost += extra_cost->alu.arith * 2; + /* TBZ/TBNZ. */ + *cost += rtx_cost (XEXP (inner, 0), VOIDmode, + ZERO_EXTRACT, 0, speed); return true; } + /* FALLTHRU */ + + case LT: + case GE: + /* CBZ/CBNZ/TBZ/TBNZ. */ + *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed); + return true; + + default: + break; } - else if (cmpcode == LT || cmpcode == GE) - { - /* TBZ/TBNZ. */ - if (comparator == const0_rtx) - return true; - } + } + + if ((cmpcode == NE || cmpcode == EQ) + && (cmpmode == SImode || cmpmode == DImode) + && aarch64_split_imm24 (comparator, cmpmode)) + { + /* SUB and SUBS. */ + *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed); + *cost += COSTS_N_INSNS (2); + if (speed) + *cost += extra_cost->alu.arith * 2; + return true; + } + + if (TARGET_CMPBR) + { + *cost += rtx_cost (inner, cmpmode, cmpcode, 0, speed); + if ((cmpmode != SImode && cmpmode != DImode) + || !aarch64_cb_rhs (cmpcode, comparator)) + *cost += rtx_cost (comparator, cmpmode, cmpcode, 1, speed); + return true; } } else if (GET_MODE_CLASS (GET_MODE (inner)) == MODE_CC) @@ -16999,6 +17057,14 @@ class aarch64_vector_costs : public vector_costs or vector loop. There is one entry for each tuning option of interest. */ auto_vec m_ops; + + /* When doing inner-loop vectorization the constraints on the data-refs in the + outer-loop could limit the inner loop references. i.e. the outerloop can + force the inner-loop to do a load and splat which will result in the loop + being entirely scalar as all lanes work on a duplicate. Currently we don't + support unrolling of the inner loop independently from the outerloop during + outer-loop vectorization which tends to lead to pipeline bubbles. */ + bool m_loop_fully_scalar_dup = false; }; aarch64_vector_costs::aarch64_vector_costs (vec_info *vinfo, @@ -17320,13 +17386,14 @@ aarch64_multiply_add_p (vec_info *vinfo, stmt_vec_info stmt_info, static bool aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info, - unsigned int vec_flags) + slp_tree node, unsigned int vec_flags) { gassign *assign = dyn_cast (stmt_info->stmt); if (!assign + || !node || gimple_assign_rhs_code (assign) != BIT_AND_EXPR - || !STMT_VINFO_VECTYPE (stmt_info) - || !VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_info))) + || !SLP_TREE_VECTYPE (node) + || !VECTOR_BOOLEAN_TYPE_P (SLP_TREE_VECTYPE (node))) return false; for (int i = 1; i < 3; ++i) @@ -17361,10 +17428,11 @@ aarch64_bool_compound_p (vec_info *vinfo, stmt_vec_info stmt_info, instructions. */ static unsigned int aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, + slp_tree node, stmt_vec_info stmt_info, const sve_vec_cost *sve_costs) { - switch (vect_reduc_type (vinfo, stmt_info)) + switch (vect_reduc_type (vinfo, node)) { case EXTRACT_LAST_REDUCTION: return sve_costs->clast_cost; @@ -17404,7 +17472,9 @@ aarch64_sve_in_loop_reduction_latency (vec_info *vinfo, - If VEC_FLAGS & VEC_ANY_SVE, return the loop carry latency of the SVE implementation. */ static unsigned int -aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info, +aarch64_in_loop_reduction_latency (vec_info *vinfo, + slp_tree node, + stmt_vec_info stmt_info, unsigned int vec_flags) { const cpu_vector_cost *vec_costs = aarch64_tune_params.vec_costs; @@ -17417,7 +17487,8 @@ aarch64_in_loop_reduction_latency (vec_info *vinfo, stmt_vec_info stmt_info, if (sve_costs) { unsigned int latency - = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs); + = aarch64_sve_in_loop_reduction_latency (vinfo, node, + stmt_info, sve_costs); if (latency) return latency; } @@ -17493,7 +17564,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (kind == scalar_load && node && sve_costs - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { unsigned int nunits = vect_nunits_for_cost (vectype); /* Test for VNx2 modes, which have 64-bit containers. */ @@ -17507,7 +17578,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, if (kind == scalar_store && node && sve_costs - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return sve_costs->scatter_store_elt_cost; /* Detect cases in which vec_to_scalar represents an in-loop reduction. */ @@ -17516,7 +17587,8 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind, && sve_costs) { unsigned int latency - = aarch64_sve_in_loop_reduction_latency (vinfo, stmt_info, sve_costs); + = aarch64_sve_in_loop_reduction_latency (vinfo, node, + stmt_info, sve_costs); if (latency) return latency; } @@ -17665,7 +17737,7 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind, /* For vector boolean ANDs with a compare operand we just need one insn. */ - if (aarch64_bool_compound_p (vinfo, stmt_info, vec_flags)) + if (aarch64_bool_compound_p (vinfo, stmt_info, node, vec_flags)) return 0; } @@ -17728,8 +17800,10 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, && vect_is_reduction (stmt_info)) { unsigned int base - = aarch64_in_loop_reduction_latency (m_vinfo, stmt_info, m_vec_flags); - if (aarch64_force_single_cycle (m_vinfo, stmt_info)) + = aarch64_in_loop_reduction_latency (m_vinfo, node, + stmt_info, m_vec_flags); + if (m_costing_for_scalar + || aarch64_force_single_cycle (m_vinfo, stmt_info)) /* ??? Ideally we'd use a tree to reduce the copies down to 1 vector, and then accumulate that, but at the moment the loop-carried dependency includes all copies. */ @@ -17746,7 +17820,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, /* Assume that bool AND with compare operands will become a single operation. */ - if (aarch64_bool_compound_p (m_vinfo, stmt_info, m_vec_flags)) + if (aarch64_bool_compound_p (m_vinfo, stmt_info, node, m_vec_flags)) return; } @@ -17763,7 +17837,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && kind == vec_to_scalar && (m_vec_flags & VEC_ADVSIMD) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { auto dr = STMT_VINFO_DATA_REF (stmt_info); tree dr_ref = DR_REF (dr); @@ -17842,7 +17916,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, have only accounted for one. */ if (stmt_info && (kind == vector_stmt || kind == vec_to_scalar) - && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION) + && vect_reduc_type (m_vinfo, node) == COND_REDUCTION) ops->general_ops += count; /* Count the predicate operations needed by an SVE comparison. */ @@ -17878,7 +17952,7 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind, if (stmt_info && sve_issue && (kind == scalar_load || kind == scalar_store) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { unsigned int pairs = CEIL (count, 2); ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs; @@ -17987,6 +18061,17 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, tree vectype, int misalign, vect_cost_model_location where) { + /* When costing for scalars, vectype will be NULL; so look up the type via + stmt_info's statement. */ + if (m_costing_for_scalar && stmt_info) + { + gcc_assert (!vectype); + /* This won't work for e.g. gconds or other statements without a lhs, + but those only work on GPR anyway and this is the best we can do. */ + if (tree lhs = gimple_get_lhs (STMT_VINFO_STMT (stmt_info))) + vectype = TREE_TYPE (lhs); + } + fractional_cost stmt_cost = aarch64_builtin_vectorization_cost (kind, vectype, misalign); @@ -18002,6 +18087,28 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, analyze_loop_vinfo (loop_vinfo); m_analyzed_vinfo = true; + if (in_inner_loop_p) + m_loop_fully_scalar_dup = true; + } + + /* Detect whether the loop is working on fully duplicated lanes. This would + only be possible with inner loop vectorization since otherwise we wouldn't + try to vectorize. */ + if (in_inner_loop_p + && node + && m_loop_fully_scalar_dup + && SLP_TREE_LANES (node) == 1 + && !SLP_TREE_CHILDREN (node).exists ()) + { + /* Check if load is a duplicate. */ + if (gimple_vuse (stmt_info->stmt) + && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_INVARIANT) + ; + else if (SLP_TREE_DEF_TYPE (node) == vect_constant_def + || SLP_TREE_DEF_TYPE (node) == vect_external_def) + ; + else + m_loop_fully_scalar_dup = false; } /* Apply the heuristic described above m_stp_sequence_cost. */ @@ -18036,7 +18143,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && node && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) { const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve; if (sve_costs) @@ -18368,8 +18475,19 @@ adjust_body_cost (loop_vec_info loop_vinfo, if (m_vec_flags & VEC_ANY_SVE) threshold = CEIL (threshold, aarch64_estimated_sve_vq ()); - if (m_num_vector_iterations >= 1 - && m_num_vector_iterations < threshold) + /* Increase the cost of the vector code if it looks like the vector code has + limited throughput due to outer-loop vectorization. */ + if (m_loop_fully_scalar_dup) + { + body_cost *= estimated_vf; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "Increasing body cost to %d because vector code has" + " low throughput of per iteration due to splats\n", + body_cost); + } + else if (m_num_vector_iterations >= 1 + && m_num_vector_iterations < threshold) { if (dump_enabled_p ()) dump_printf_loc (MSG_NOTE, vect_location, @@ -25435,20 +25553,41 @@ aarch64_asm_preferred_eh_data_format (int code ATTRIBUTE_UNUSED, int global) return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; } +/* Return true if function declaration FNDECL needs to be marked as + having a variant PCS. */ + +static bool +aarch64_is_variant_pcs (tree fndecl) +{ + /* Check for ABIs that preserve more registers than usual. */ + arm_pcs pcs = (arm_pcs) fndecl_abi (fndecl).id (); + if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE) + return true; + + /* Check for ABIs that allow PSTATE.SM to be 1 on entry. */ + tree fntype = TREE_TYPE (fndecl); + if (aarch64_fntype_pstate_sm (fntype) != AARCH64_ISA_MODE_SM_OFF) + return true; + + /* Check for ABIs that require PSTATE.ZA to be 1 on entry, either because + of ZA or ZT0. */ + if (aarch64_fntype_pstate_za (fntype) != 0) + return true; + + return false; +} + /* Output .variant_pcs for aarch64_vector_pcs function symbols. */ static void aarch64_asm_output_variant_pcs (FILE *stream, const tree decl, const char* name) { - if (TREE_CODE (decl) == FUNCTION_DECL) + if (TREE_CODE (decl) == FUNCTION_DECL + && aarch64_is_variant_pcs (decl)) { - arm_pcs pcs = (arm_pcs) fndecl_abi (decl).id (); - if (pcs == ARM_PCS_SIMD || pcs == ARM_PCS_SVE) - { - fprintf (stream, "\t.variant_pcs\t"); - assemble_name (stream, name); - fprintf (stream, "\n"); - } + fprintf (stream, "\t.variant_pcs\t"); + assemble_name (stream, name); + fprintf (stream, "\n"); } } diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 096c853af7ff..2b3610c86ed5 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -410,8 +410,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED /* CSSC instructions are enabled through +cssc. */ #define TARGET_CSSC AARCH64_HAVE_ISA (CSSC) -/* CB instructions are enabled through +cmpbr. */ -#define TARGET_CMPBR AARCH64_HAVE_ISA (CMPBR) +/* CB instructions are enabled through +cmpbr, + but are incompatible with -mtrack-speculation. */ +#define TARGET_CMPBR (AARCH64_HAVE_ISA (CMPBR) && !aarch64_track_speculation) /* Make sure this is always defined so we don't have to check for ifdefs but rather use normal ifs. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index dc2be815ede6..fedbd4026a06 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -441,6 +441,16 @@ ; must not operate on inactive inputs if doing so could induce a fault. (SVE_STRICT_GP 1)]) +;; These constants are used as a const_int in MTE instructions +(define_constants + [; 0xf0ff... + ; Tag mask for the 4-bit tag stored in the top 8 bits of a pointer. + (MEMTAG_TAG_MASK -1080863910568919041) + + ; 0x00ff... + ; Tag mask 56-bit address used by subp instruction. + (MEMTAG_ADDR_MASK 72057594037927935)]) + (include "constraints.md") (include "predicates.md") (include "iterators.md") @@ -725,8 +735,8 @@ (BRANCH_LEN_N_32KiB -32768) ;; +/- 1KiB. Used by CBB, CBH, CB. - (BRANCH_LEN_P_1Kib 1020) - (BRANCH_LEN_N_1Kib -1024) + (BRANCH_LEN_P_1KiB 1020) + (BRANCH_LEN_N_1KiB -1024) ] ) @@ -804,7 +814,7 @@ ) ;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ` -(define_insn "aarch64_cbz1" +(define_insn "*aarch64_cbz" [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") (const_int 0)) (label_ref (match_operand 1)) @@ -838,27 +848,13 @@ [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r") (const_int 0)) (label_ref (match_operand 1)) - (pc))) - (clobber (reg:CC CC_REGNUM))] + (pc)))] "!aarch64_track_speculation" { - if (get_attr_length (insn) == 8) - { - if (get_attr_far_branch (insn) == FAR_BRANCH_YES) - return aarch64_gen_far_branch (operands, 1, "Ltb", - "\\t%0, , "); - else - { - char buf[64]; - uint64_t val = ((uint64_t) 1) - << (GET_MODE_SIZE (mode) * BITS_PER_UNIT - 1); - sprintf (buf, "tst\t%%0, %" PRId64, val); - output_asm_insn (buf, operands); - return "\t%l1"; - } - } - else + if (get_attr_length (insn) == 4) return "\t%0, , %l1"; + return aarch64_gen_far_branch (operands, 1, "Ltb", + "\\t%0, , "); } [(set_attr "type" "branch") (set (attr "length") @@ -870,44 +866,44 @@ (const_int 8))) (set (attr "far_branch") (if_then_else (and (ge (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_N_1MiB)) + (const_int BRANCH_LEN_N_32KiB)) (lt (minus (match_dup 1) (pc)) - (const_int BRANCH_LEN_P_1MiB))) + (const_int BRANCH_LEN_P_32KiB))) (const_string "no") (const_string "yes")))] ) ;; Emit a `CB (register)` or `CB (immediate)` instruction. ;; The immediate range depends on the comparison code. -;; Comparisons against immediates outside this range fall back to -;; CMP + B. -(define_insn "aarch64_cb" - [(set (pc) (if_then_else (INT_CMP - (match_operand:GPI 0 "register_operand" "r") - (match_operand:GPI 1 "nonmemory_operand" - "r")) - (label_ref (match_operand 2)) - (pc)))] - "TARGET_CMPBR && aarch64_cb_rhs (, operands[1])" +(define_insn "*aarch64_cb" + [(set (pc) (if_then_else + (INT_CMP + (match_operand:GPI 0 "register_operand" "r") + (match_operand:GPI 1 + "aarch64_reg__operand" + "r")) + (label_ref (match_operand 2)) + (pc)))] + "TARGET_CMPBR" { - return (get_attr_far_branch (insn) == FAR_BRANCH_NO) - ? "cb\\t%0, %1, %l2" - : aarch64_gen_far_branch (operands, 2, "L", - "cb\\t%0, %1, "); + if (get_attr_length (insn) == 4) + return "cb\t%0, %1, %l2"; + return aarch64_gen_far_branch (operands, 2, "L", + "cb\t%0, %1, "); } [(set_attr "type" "branch") (set (attr "length") (if_then_else (and (ge (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_N_1Kib)) + (const_int BRANCH_LEN_N_1KiB)) (lt (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_P_1Kib))) + (const_int BRANCH_LEN_P_1KiB))) (const_int 4) (const_int 8))) (set (attr "far_branch") (if_then_else (and (ge (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_N_1Kib)) + (const_int BRANCH_LEN_N_1KiB)) (lt (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_P_1Kib))) + (const_int BRANCH_LEN_P_1KiB))) (const_string "no") (const_string "yes")))] ) @@ -929,16 +925,16 @@ [(set_attr "type" "branch") (set (attr "length") (if_then_else (and (ge (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_N_1Kib)) + (const_int BRANCH_LEN_N_1KiB)) (lt (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_P_1Kib))) + (const_int BRANCH_LEN_P_1KiB))) (const_int 4) (const_int 8))) (set (attr "far_branch") (if_then_else (and (ge (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_N_1Kib)) + (const_int BRANCH_LEN_N_1KiB)) (lt (minus (match_dup 2) (pc)) - (const_int BRANCH_LEN_P_1Kib))) + (const_int BRANCH_LEN_P_1KiB))) (const_string "no") (const_string "yes")))] ) @@ -978,37 +974,24 @@ (const_string "yes")))] ) -;; For a 24-bit immediate CST we can optimize the compare for equality -;; and branch sequence from: -;; mov x0, #imm1 -;; movk x0, #imm2, lsl 16 /* x0 contains CST. */ -;; cmp x1, x0 -;; b .Label -;; into the shorter: -;; sub x0, x1, #(CST & 0xfff000) -;; subs x0, x0, #(CST & 0x000fff) -;; b .Label +;; For a 24-bit immediate CST we can optimize the compare for equality. (define_insn_and_split "*aarch64_bcond_wide_imm" - [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r") - (match_operand:GPI 1 "aarch64_imm24" "n")) - (label_ref:P (match_operand 2)) - (pc)))] - "!aarch64_move_imm (INTVAL (operands[1]), mode) - && !aarch64_plus_operand (operands[1], mode) - && !reload_completed" + [(set (pc) (if_then_else + (match_operator 0 "aarch64_equality_operator" + [(match_operand:GPI 1 "register_operand" "r") + (match_operand:GPI 2 "aarch64_split_imm24" "n")]) + (label_ref (match_operand 3)) + (pc))) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:GPI 4 "=r"))] + "" "#" - "&& true" + "" [(const_int 0)] { - HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff; - HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000; - rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_add3 (tmp, operands[0], GEN_INT (-hi_imm))); - emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); - rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); - rtx cmp_rtx = gen_rtx_fmt_ee (, mode, - cc_reg, const0_rtx); - emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2])); + rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[1], operands[2], + operands[4]); + emit_jump_insn (gen_aarch64_bcond (operands[0], cc_reg, operands[3])); DONE; } ) @@ -1413,16 +1396,16 @@ /* Save GCS with code like mov x16, 1 chkfeat x16 - tbnz x16, 0, .L_done + cbnz x16, .L_done mrs tmp, gcspr_el0 str tmp, [%0, 8] .L_done: */ - rtx done_label = gen_label_rtx (); + auto done_label = gen_label_rtx (); rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); emit_move_insn (r16, const1_rtx); emit_insn (gen_aarch64_chkfeat ()); - emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); + emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label)); rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode)); rtx gcs = gen_reg_rtx (Pmode); emit_insn (gen_aarch64_load_gcspr (gcs)); @@ -1445,7 +1428,7 @@ /* Restore GCS with code like mov x16, 1 chkfeat x16 - tbnz x16, 0, .L_done + cbnz x16, .L_done ldr tmp1, [%1, 8] mrs tmp2, gcspr_el0 subs tmp2, tmp1, tmp2 @@ -1456,12 +1439,12 @@ b.ne .L_loop .L_done: */ - rtx loop_label = gen_label_rtx (); - rtx done_label = gen_label_rtx (); + auto loop_label = gen_label_rtx (); + auto done_label = gen_label_rtx (); rtx r16 = gen_rtx_REG (DImode, R16_REGNUM); emit_move_insn (r16, const1_rtx); emit_insn (gen_aarch64_chkfeat ()); - emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label)); + emit_jump_insn (aarch64_gen_compare_zero_and_branch (NE, r16, done_label)); rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode)); rtx gcs_old = gen_reg_rtx (Pmode); emit_move_insn (gcs_old, gcs_slot); @@ -4651,39 +4634,24 @@ [(set_attr "type" "csel")] ) -;; For a 24-bit immediate CST we can optimize the compare for equality -;; and branch sequence from: -;; mov x0, #imm1 -;; movk x0, #imm2, lsl 16 /* x0 contains CST. */ -;; cmp x1, x0 -;; cset x2, -;; into the shorter: -;; sub x0, x1, #(CST & 0xfff000) -;; subs x0, x0, #(CST & 0x000fff) -;; cset x2, . +;; For a 24-bit immediate CST we can optimize the compare for equality. (define_insn_and_split "*compare_cstore_insn" [(set (match_operand:GPI 0 "register_operand" "=r") - (EQL:GPI (match_operand:GPI 1 "register_operand" "r") - (match_operand:GPI 2 "aarch64_imm24" "n"))) - (clobber (reg:CC CC_REGNUM))] - "!aarch64_move_imm (INTVAL (operands[2]), mode) - && !aarch64_plus_operand (operands[2], mode) - && !reload_completed" + (match_operator:GPI 1 "aarch64_equality_operator" + [(match_operand:GPI 2 "register_operand" "r") + (match_operand:GPI 3 "aarch64_split_imm24" "n")])) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:GPI 4 "=r"))] + "" "#" - "&& true" + "" [(const_int 0)] { - HOST_WIDE_INT lo_imm = UINTVAL (operands[2]) & 0xfff; - HOST_WIDE_INT hi_imm = UINTVAL (operands[2]) & 0xfff000; - rtx tmp = gen_reg_rtx (mode); - emit_insn (gen_add3 (tmp, operands[1], GEN_INT (-hi_imm))); - emit_insn (gen_add3_compare0 (tmp, tmp, GEN_INT (-lo_imm))); - rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM); - rtx cmp_rtx = gen_rtx_fmt_ee (, mode, cc_reg, const0_rtx); - emit_insn (gen_aarch64_cstore (operands[0], cmp_rtx, cc_reg)); + rtx cc_reg = aarch64_gen_compare_split_imm24 (operands[2], operands[3], + operands[4]); + emit_insn (gen_aarch64_cstore (operands[0], operands[1], cc_reg)); DONE; } - [(set_attr "type" "csel")] ) ;; zero_extend version of the above @@ -4813,15 +4781,21 @@ (match_operand:ALLI 3 "register_operand")))] "" { - rtx ccreg; enum rtx_code code = GET_CODE (operands[1]); - if (code == UNEQ || code == LTGT) FAIL; - ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0), - XEXP (operands[1], 1)); - operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + rtx ccreg = XEXP (operands[1], 0); + enum machine_mode ccmode = GET_MODE (ccreg); + if (GET_MODE_CLASS (ccmode) == MODE_CC) + gcc_assert (XEXP (operands[1], 1) == const0_rtx); + else if (ccmode == QImode || ccmode == HImode) + FAIL; + else + { + ccreg = aarch64_gen_compare_reg (code, ccreg, XEXP (operands[1], 1)); + operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx); + } } ) @@ -8566,7 +8540,7 @@ [(set (match_operand:DI 0 "register_operand" "=rk") (ior:DI (and:DI (match_operand:DI 1 "register_operand" "rk") - (const_int -1080863910568919041)) ;; 0xf0ff... + (const_int MEMTAG_TAG_MASK)) (ashift:DI (unspec:QI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_GEN_TAG_RND) (const_int 56))))] @@ -8609,9 +8583,9 @@ [(set (match_operand:DI 0 "register_operand" "=r") (minus:DI (and:DI (match_operand:DI 1 "register_operand" "rk") - (const_int 72057594037927935)) ;; 0x00ff... + (const_int MEMTAG_ADDR_MASK)) (and:DI (match_operand:DI 2 "register_operand" "rk") - (const_int 72057594037927935))))] ;; 0x00ff... + (const_int MEMTAG_ADDR_MASK))))] "TARGET_MEMTAG" "subp\\t%0, %1, %2" [(set_attr "type" "memtag")] @@ -8621,7 +8595,7 @@ (define_insn "ldg" [(set (match_operand:DI 0 "register_operand" "+r") (ior:DI - (and:DI (match_dup 0) (const_int -1080863910568919041)) ;; 0xf0ff... + (and:DI (match_dup 0) (const_int MEMTAG_TAG_MASK)) (ashift:DI (mem:QI (unspec:DI [(and:DI (plus:DI (match_operand:DI 1 "register_operand" "rk") diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index dc1925dfb6c3..7b9e5583bc72 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -312,15 +312,9 @@ (define_constraint "Uc1" "@internal - A constraint that matches the integers 1...64." + A constraint that matches the integers 0...62." (and (match_code "const_int") - (match_test "IN_RANGE (ival, 1, 64)"))) - -(define_constraint "Uc2" - "@internal - A constraint that matches the integers -1...62." - (and (match_code "const_int") - (match_test "IN_RANGE (ival, -1, 62)"))) + (match_test "IN_RANGE (ival, 0, 62)"))) (define_constraint "Up3" "@internal diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 8f8237edf6cb..c3771d9402ba 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -543,6 +543,12 @@ ;; elements. (define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF]) +;; Like SVE_FULL_HSF, but selectively enables those modes that are valid +;; for the variant of the SVE2 FP8 FDOT instruction associated with that +;; mode. +(define_mode_iterator SVE_FULL_HSF_FP8_FDOT [(VNx4SF "TARGET_SSVE_FP8DOT4") + (VNx8HF "TARGET_SSVE_FP8DOT2")]) + ;; Partial SVE floating-point vector modes that have 16-bit or 32-bit ;; elements. (define_mode_iterator SVE_PARTIAL_HSF [VNx2HF VNx4HF VNx2SF]) @@ -2980,19 +2986,15 @@ (define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu]) +;; Inverse comparisons must have the same constraint so that +;; branches can be redirected during late compilation. (define_code_attr cmpbr_imm_constraint [ - (eq "Uc0") - (ne "Uc0") - (gt "Uc0") - (gtu "Uc0") - (lt "Uc0") - (ltu "Uc0") - - (ge "Uc1") - (geu "Uc1") - - (le "Uc2") - (leu "Uc2") + (eq "Uc0") (ne "Uc0") + (lt "Uc0") (ge "Uc0") + (ltu "Uc0") (geu "Uc0") + + (gt "Uc1") (le "Uc1") + (gtu "Uc1") (leu "Uc1") ]) (define_code_attr fix_trunc_optab [(fix "fix_trunc") diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 4d5d57f1e5d6..42304cef4391 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -286,10 +286,15 @@ (and (match_code "const_int") (match_test "UINTVAL (op) <= 7"))) -;; An immediate that fits into 24 bits. -(define_predicate "aarch64_imm24" - (and (match_code "const_int") - (match_test "IN_RANGE (UINTVAL (op), 0, 0xffffff)"))) +;; An immediate that fits into 24 bits, but needs splitting. +(define_predicate "aarch64_split_imm24" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT i = UINTVAL (op); + return (IN_RANGE (i, 0, 0xffffff) + && !aarch64_move_imm (i, mode) + && !aarch64_uimm12_shift (i)); +}) (define_predicate "aarch64_mem_pair_offset" (and (match_code "const_int") @@ -1084,3 +1089,13 @@ (define_special_predicate "aarch64_ptrue_all_operand" (and (match_code "const_vector") (match_test "aarch64_ptrue_all_mode (op) == mode"))) + +(define_predicate "aarch64_reg_Uc0_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "satisfies_constraint_Uc0 (op)")))) + +(define_predicate "aarch64_reg_Uc1_operand" + (ior (match_operand 0 "register_operand") + (and (match_code "const_int") + (match_test "satisfies_constraint_Uc1 (op)")))) diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64 index 38a8c0637255..63ca8e90c9d1 100644 --- a/gcc/config/aarch64/t-aarch64 +++ b/gcc/config/aarch64/t-aarch64 @@ -190,12 +190,6 @@ aarch-bti-insert.o: $(srcdir)/config/arm/aarch-bti-insert.cc \ $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ $(srcdir)/config/arm/aarch-bti-insert.cc -aarch64-cc-fusion.o: $(srcdir)/config/aarch64/aarch64-cc-fusion.cc \ - $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \ - $(RTL_SSA_H) tree-pass.h - $(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \ - $(srcdir)/config/aarch64/aarch64-cc-fusion.cc - aarch64-early-ra.o: $(srcdir)/config/aarch64/aarch64-early-ra.cc \ $(CONFIG_H) $(SYSTEM_H) $(CORETYPES_H) $(BACKEND_H) $(RTL_H) $(DF_H) \ $(RTL_SSA_H) tree-pass.h diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index d119464176b8..8f7e5373fdf1 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -66,9 +66,9 @@ ;; I signed 12-bit immediate (for ARCompact) ;; K unsigned 3-bit immediate (for ARCompact) ;; L unsigned 6-bit immediate (for ARCompact) -;; M unsinged 5-bit immediate (for ARCompact) -;; O unsinged 7-bit immediate (for ARCompact) -;; P unsinged 8-bit immediate (for ARCompact) +;; M unsigned 5-bit immediate (for ARCompact) +;; O unsigned 7-bit immediate (for ARCompact) +;; P unsigned 8-bit immediate (for ARCompact) ;; N constant '1' (for ARCompact) diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 29b45ae96bda..8b951f3d4a67 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -18983,7 +18983,8 @@ cmse_nonsecure_call_inline_register_clear (void) call = SET_SRC (call); /* Check if it is a cmse_nonsecure_call. */ - unspec = XEXP (call, 0); + unspec = XVECEXP (pat, 0, 2); + if (GET_CODE (unspec) != UNSPEC || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM) continue; @@ -19010,7 +19011,7 @@ cmse_nonsecure_call_inline_register_clear (void) /* Make sure the register used to hold the function address is not cleared. */ - address = RTVEC_ELT (XVEC (unspec, 0), 0); + address = XEXP (call, 0); gcc_assert (MEM_P (address)); gcc_assert (REG_P (XEXP (address, 0))); address_regnum = REGNO (XEXP (address, 0)); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 5e5e1120e77a..422ae549b65b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -8623,7 +8623,7 @@ if (detect_cmse_nonsecure_call (addr)) { pat = gen_nonsecure_call_internal (operands[0], operands[1], - operands[2]); + operands[2], const0_rtx); emit_call_insn (pat); } else @@ -8665,10 +8665,10 @@ (clobber (reg:SI LR_REGNUM))])]) (define_expand "nonsecure_call_internal" - [(parallel [(call (unspec:SI [(match_operand 0 "memory_operand")] - UNSPEC_NONSECURE_MEM) + [(parallel [(call (match_operand 0 "memory_operand") (match_operand 1 "general_operand")) (use (match_operand 2 "" "")) + (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))])] "use_cmse" { @@ -8745,7 +8745,8 @@ if (detect_cmse_nonsecure_call (addr)) { pat = gen_nonsecure_call_value_internal (operands[0], operands[1], - operands[2], operands[3]); + operands[2], operands[3], + const0_rtx); emit_call_insn (pat); } else @@ -8779,10 +8780,10 @@ (define_expand "nonsecure_call_value_internal" [(parallel [(set (match_operand 0 "" "") - (call (unspec:SI [(match_operand 1 "memory_operand")] - UNSPEC_NONSECURE_MEM) + (call (match_operand 1 "memory_operand") (match_operand 2 "general_operand"))) (use (match_operand 3 "" "")) + (unspec:SI [(match_operand 4)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))])] "use_cmse" " @@ -13025,7 +13026,7 @@ "arm_coproc_builtin_available (VUNSPEC_)" { arm_const_bounds (operands[0], 0, 16); - arm_const_bounds (operands[1], 0, 8); + arm_const_bounds (operands[1], 0, 16); arm_const_bounds (operands[3], 0, (1 << 5)); return "\\tp%c0, %1, %Q2, %R2, CR%c3"; } @@ -13040,7 +13041,7 @@ "arm_coproc_builtin_available (VUNSPEC_)" { arm_const_bounds (operands[1], 0, 16); - arm_const_bounds (operands[2], 0, 8); + arm_const_bounds (operands[2], 0, 16); arm_const_bounds (operands[3], 0, (1 << 5)); return "\\tp%c1, %2, %Q0, %R0, CR%c3"; } diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md index f9e89e991d9b..4da0086b2527 100644 --- a/gcc/config/arm/thumb1.md +++ b/gcc/config/arm/thumb1.md @@ -1874,10 +1874,10 @@ ) (define_insn "*nonsecure_call_reg_thumb1_v5" - [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] - UNSPEC_NONSECURE_MEM) + [(call (mem:SI (reg:SI R4_REGNUM)) (match_operand 0 "" "")) (use (match_operand 1 "" "")) + (unspec:SI [(match_operand 2)]UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))] "TARGET_THUMB1 && use_cmse && !SIBLING_CALL_P (insn)" "bl\\t__gnu_cmse_nonsecure_call" @@ -1919,11 +1919,10 @@ (define_insn "*nonsecure_call_value_reg_thumb1_v5" [(set (match_operand 0 "" "") - (call (unspec:SI - [(mem:SI (reg:SI R4_REGNUM))] - UNSPEC_NONSECURE_MEM) + (call (mem:SI (reg:SI R4_REGNUM)) (match_operand 1 "" ""))) (use (match_operand 2 "" "")) + (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))] "TARGET_THUMB1 && use_cmse" "bl\\t__gnu_cmse_nonsecure_call" diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md index 019f9d438c08..2c2026b1e747 100644 --- a/gcc/config/arm/thumb2.md +++ b/gcc/config/arm/thumb2.md @@ -537,10 +537,10 @@ ) (define_insn "*nonsecure_call_reg_thumb2_fpcxt" - [(call (unspec:SI [(mem:SI (match_operand:SI 0 "s_register_operand" "l*r"))] - UNSPEC_NONSECURE_MEM) + [(call (mem:SI (match_operand:SI 0 "s_register_operand" "l*r")) (match_operand 1 "" "")) (use (match_operand 2 "" "")) + (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))] "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE" "blxns\\t%0" @@ -549,10 +549,10 @@ ) (define_insn "*nonsecure_call_reg_thumb2" - [(call (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] - UNSPEC_NONSECURE_MEM) + [(call (mem:SI (reg:SI R4_REGNUM)) (match_operand 0 "" "")) (use (match_operand 1 "" "")) + (unspec:SI [(match_operand 2)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))] "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE" "bl\\t__gnu_cmse_nonsecure_call" @@ -573,11 +573,10 @@ (define_insn "*nonsecure_call_value_reg_thumb2_fpcxt" [(set (match_operand 0 "" "") - (call - (unspec:SI [(mem:SI (match_operand:SI 1 "register_operand" "l*r"))] - UNSPEC_NONSECURE_MEM) - (match_operand 2 "" ""))) + (call (mem:SI (match_operand:SI 1 "register_operand" "l*r")) + (match_operand 2 "" ""))) (use (match_operand 3 "" "")) + (unspec:SI [(match_operand 4)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))] "TARGET_THUMB2 && use_cmse && TARGET_HAVE_FPCXT_CMSE" "blxns\\t%1" @@ -587,10 +586,10 @@ (define_insn "*nonsecure_call_value_reg_thumb2" [(set (match_operand 0 "" "") - (call - (unspec:SI [(mem:SI (reg:SI R4_REGNUM))] UNSPEC_NONSECURE_MEM) - (match_operand 1 "" ""))) + (call (mem:SI (reg:SI R4_REGNUM)) + (match_operand 1 "" ""))) (use (match_operand 2 "" "")) + (unspec:SI [(match_operand 3)] UNSPEC_NONSECURE_MEM) (clobber (reg:SI LR_REGNUM))] "TARGET_THUMB2 && use_cmse && !TARGET_HAVE_FPCXT_CMSE" "bl\\t__gnu_cmse_nonsecure_call" diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h index ff269bf43bdf..c95c75856cb0 100644 --- a/gcc/config/avr/specs.h +++ b/gcc/config/avr/specs.h @@ -57,7 +57,7 @@ along with GCC; see the file COPYING3. If not see "%(asm_errata_skip) " #define LINK_RELAX_SPEC \ - "%{mrelax:--relax} " + "%{!r:%{mrelax:--relax}} " #undef LINK_SPEC #define LINK_SPEC \ diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h index 1681c79e24dc..f35667949c1b 100644 --- a/gcc/config/cris/cris.h +++ b/gcc/config/cris/cris.h @@ -171,7 +171,7 @@ extern int cris_cpu_version; /* For the cris-*-elf subtarget. */ #define CRIS_ASM_SUBTARGET_SPEC \ - "--em=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}" + "--emulation=criself %{!march=*:%{!mcpu=*:" CRIS_DEFAULT_ASM_ARCH_OPTION "}}" /* FIXME: We should propagate the -melf option to make the criself "emulation" unless a linker script is provided (-T*), but I don't know diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def index 44adcc6062db..76587c268cef 100644 --- a/gcc/config/darwin-sections.def +++ b/gcc/config/darwin-sections.def @@ -215,3 +215,10 @@ DEF_SECTION (objc2_method_names_section, 0, DEF_SECTION (objc2_method_types_section, 0, ".section __TEXT, __objc_methtype, cstring_literals", 1) + +/* ASAN sections. */ + +DEF_SECTION (asan_string_section, 0, ".section __TEXT, __asan_cstring", 0) +DEF_SECTION (asan_globals_section, 0, ".section __DATA, __asan_globals", 0) +DEF_SECTION (asan_liveness_section, 0, + ".section __DATA,__asan_liveness,regular,live_support", 0) diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc index be2daed4f66a..75ac3560954b 100644 --- a/gcc/config/darwin.cc +++ b/gcc/config/darwin.cc @@ -49,6 +49,7 @@ along with GCC; see the file COPYING3. If not see #include "optabs.h" #include "flags.h" #include "opts.h" +#include "asan.h" /* Fix and Continue. @@ -1298,6 +1299,39 @@ darwin_encode_section_info (tree decl, rtx rtl, int first) SYMBOL_FLAG_EXTERNAL. */ default_encode_section_info (decl, rtl, first); + if (CONSTANT_CLASS_P (decl)) + { + bool is_str = TREE_CODE (decl) == STRING_CST; + rtx sym_ref = XEXP (rtl, 0); + + /* Unless this is a string cst or we are in an anchored section we have + nothing more to do here. */ + if (!is_str && !SYMBOL_REF_HAS_BLOCK_INFO_P (sym_ref)) + return; + + tree sym_decl = SYMBOL_REF_DECL (sym_ref); + const char *name = XSTR (sym_ref, 0); + gcc_checking_assert (strncmp ("*lC", name, 3) == 0); + + char *buf; + if (is_str) + { + bool for_asan = (flag_sanitize & SANITIZE_ADDRESS) + && asan_protect_global (CONST_CAST_TREE (decl)); + /* When we are generating code for sanitized strings, the string + internal symbols are made visible in the object. */ + buf = xasprintf ("*%c.str.%s", for_asan ? 'l' : 'L', &name[3]); + } + else + /* Lets identify anchored constants with a different prefix, for the + sake of inspection only. */ + buf = xasprintf ("*LaC%s", &name[3]); + if (sym_decl) + DECL_NAME (sym_decl) = get_identifier (buf); + XSTR (sym_ref, 0) = ggc_strdup (buf); + free (buf); + } + if (! VAR_OR_FUNCTION_DECL_P (decl)) return; @@ -1683,6 +1717,17 @@ machopic_select_section (tree decl, ro = TREE_READONLY (decl) || TREE_CONSTANT (decl) ; + /* Trump categorize_decl_for_section () for ASAN stuff - the Darwin + categorisations are special. */ + if (flag_sanitize & SANITIZE_ADDRESS) + { + if (TREE_CODE (decl) == STRING_CST + && asan_protect_global (CONST_CAST_TREE (decl))) + { + return darwin_sections[asan_string_section]; + } + } + switch (categorize_decl_for_section (decl, reloc)) { case SECCAT_TEXT: @@ -1699,7 +1744,12 @@ machopic_select_section (tree decl, break; case SECCAT_RODATA_MERGE_STR_INIT: - base_section = darwin_mergeable_string_section (DECL_INITIAL (decl), align); + if ((flag_sanitize & SANITIZE_ADDRESS) + && asan_protect_global (CONST_CAST_TREE (decl))) + /* or !flag_merge_constants */ + return darwin_sections[asan_string_section]; + else + return darwin_mergeable_string_section (DECL_INITIAL (decl), align); break; case SECCAT_RODATA_MERGE_CONST: @@ -3297,11 +3347,16 @@ darwin_use_anchors_for_symbol_p (const_rtx symbol) { if (DARWIN_SECTION_ANCHORS && flag_section_anchors) { - section *sect; - /* If the section contains a zero-sized object it's ineligible. */ - sect = SYMBOL_REF_BLOCK (symbol)->sect; - /* This should have the effect of disabling anchors for vars that follow - any zero-sized one, in a given section. */ + tree decl = SYMBOL_REF_DECL (symbol); + /* If the symbol would be linker-visible, then it can split at that + so we must disallow. This is more strict than the default impl. + TODO: add other cases. */ + if (decl && DECL_P (decl) + && (TREE_PUBLIC (decl) || !DECL_ARTIFICIAL (decl))) + return false; + + /* We mark sections containing unsuitable entries. */ + section *sect = SYMBOL_REF_BLOCK (symbol)->sect; if (sect->common.flags & SECTION_NO_ANCHOR) return false; diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h index 9b9a3fe40388..c3e28e2fa81c 100644 --- a/gcc/config/darwin.h +++ b/gcc/config/darwin.h @@ -287,6 +287,19 @@ extern GTY(()) int darwin_ms_struct; #define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}" #endif +#if LD64_HAS_NO_DEDUPLICATE +/* What we want is "when the optimization level is debug OR when it is + a compile & link job with implied O0 optimization". */ +#define DARWIN_LD_NO_DEDUPLICATE \ + "%{O0|O1|O|Og: -no_deduplicate} \ + %{!O*:\ + %{.c|.cc|.C|.cpp|.cp|.c++|.cxx|.CPP|.m|.mm|.s|.S|.i|.ii|.mi|.mii|\ + .f|.for|.ftn|.fpp|.f90|.f95|.f03|.f08|.f77|.F|.F90|.F95|.F03|.F08|\ + .d|.mod: -no_deduplicate }} " +#else +#define DARWIN_LD_NO_DEDUPLICATE "" +#endif + #if LD64_HAS_MACOS_VERSION_MIN # define DARWIN_PLATFORM_ID \ "%{mmacosx-version-min=*:-macos_version_min %*} " @@ -403,10 +416,14 @@ extern GTY(()) int darwin_ms_struct; %(linker)" \ DARWIN_LD_DEMANGLE \ LINK_PLUGIN_SPEC \ + DARWIN_LD_NO_DEDUPLICATE \ "%{flto*:%" - [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr")) + [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr")) (match_operand:P 1 "general_operand" "g"))] "!SIBLING_CALL_P (insn)" { @@ -326,7 +326,7 @@ (define_insn "call_value_insn_" [(set (match_operand 0 "" "=r") - (call (mem:QI (match_operand 1 "call_insn_operand" "Cr")) + (call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr")) (match_operand:P 2 "general_operand" "g")))] "!SIBLING_CALL_P (insn)" { @@ -358,7 +358,7 @@ }) (define_insn "sibcall_insn_" - [(call (mem:QI (match_operand 0 "call_insn_operand" "Cr")) + [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Cr")) (match_operand:P 1 "general_operand" "g"))] "SIBLING_CALL_P (insn)" { @@ -396,7 +396,7 @@ (define_insn "sibcall_value_insn_" [(set (match_operand 0 "" "=r") - (call (mem:QI (match_operand 1 "call_insn_operand" "Cr")) + (call (mem:QI (match_operand:P 1 "call_insn_operand" "Cr")) (match_operand:P 2 "general_operand" "g")))] "SIBLING_CALL_P (insn)" { diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md index 694c9e60d4ea..3b43381e64a8 100644 --- a/gcc/config/h8300/testcompare.md +++ b/gcc/config/h8300/testcompare.md @@ -28,7 +28,7 @@ ;; (define_insn "" [(set (reg:CCZ CC_REG) - (eq (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r") + (eq:CCZ (zero_extract:HSI (match_operand:HSI 0 "register_operand" "r") (const_int 1) (match_operand 1 "const_int_operand" "n")) (const_int 0)))] @@ -54,7 +54,7 @@ (define_insn "*tsthi_upper" [(set (reg:CCZN CC_REG) - (compare (and:HI (match_operand:HI 0 "register_operand" "r") + (compare:CCZN (and:HI (match_operand:HI 0 "register_operand" "r") (const_int -256)) (const_int 0)))] "reload_completed" @@ -63,7 +63,7 @@ (define_insn "*tsthi_upper_z" [(set (reg:CCZ CC_REG) - (compare (and:HI (match_operand:HI 0 "register_operand" "r") + (compare:CCZ (and:HI (match_operand:HI 0 "register_operand" "r") (const_int -256)) (const_int 0)))] "reload_completed" @@ -72,7 +72,7 @@ (define_insn "*tstsi_upper" [(set (reg:CCZN CC_REG) - (compare (and:SI (match_operand:SI 0 "register_operand" "r") + (compare:CCZN (and:SI (match_operand:SI 0 "register_operand" "r") (const_int -65536)) (const_int 0)))] "reload_completed" @@ -81,7 +81,7 @@ (define_insn "*cmp_c" [(set (reg:CCC CC_REG) - (ltu (match_operand:QHSI 0 "h8300_dst_operand" "rQ") + (ltu:CCC (match_operand:QHSI 0 "h8300_dst_operand" "rQ") (match_operand:QHSI 1 "h8300_src_operand" "rQi")))] "reload_completed" { @@ -97,7 +97,7 @@ (define_insn "*cmpqi_z" [(set (reg:CCZ CC_REG) - (eq (match_operand:QI 0 "h8300_dst_operand" "rQ") + (eq:CCZ (match_operand:QI 0 "h8300_dst_operand" "rQ") (match_operand:QI 1 "h8300_src_operand" "rQi")))] "reload_completed" { return "cmp.b %X1,%X0"; } @@ -105,7 +105,7 @@ (define_insn "*cmphi_z" [(set (reg:CCZ CC_REG) - (eq (match_operand:HI 0 "h8300_dst_operand" "rQ") + (eq:CCZ (match_operand:HI 0 "h8300_dst_operand" "rQ") (match_operand:HI 1 "h8300_src_operand" "rQi")))] "reload_completed" { return "cmp.w %T1,%T0"; } @@ -113,7 +113,7 @@ (define_insn "*cmpsi_z" [(set (reg:CCZ CC_REG) - (eq (match_operand:SI 0 "h8300_dst_operand" "rQ") + (eq:CCZ (match_operand:SI 0 "h8300_dst_operand" "rQ") (match_operand:SI 1 "h8300_src_operand" "rQi")))] "reload_completed" { return "cmp.l %S1,%S0"; } @@ -121,7 +121,7 @@ (define_insn "*cmpqi" [(set (reg:CC CC_REG) - (compare (match_operand:QI 0 "h8300_dst_operand" "rQ") + (compare:CC (match_operand:QI 0 "h8300_dst_operand" "rQ") (match_operand:QI 1 "h8300_src_operand" "rQi")))] "reload_completed" "cmp.b %X1,%X0" @@ -129,7 +129,7 @@ (define_insn "*cmphi" [(set (reg:CC CC_REG) - (compare (match_operand:HI 0 "h8300_dst_operand" "rU,rQ") + (compare:CC (match_operand:HI 0 "h8300_dst_operand" "rU,rQ") (match_operand:HI 1 "h8300_src_operand" "P3>X,rQi")))] "reload_completed" { @@ -150,7 +150,7 @@ (define_insn "cmpsi" [(set (reg:CC CC_REG) - (compare (match_operand:SI 0 "h8300_dst_operand" "r,rQ") + (compare:CC (match_operand:SI 0 "h8300_dst_operand" "r,rQ") (match_operand:SI 1 "h8300_src_operand" "P3>X,rQi")))] "reload_completed" { @@ -176,7 +176,7 @@ (define_peephole2 [(match_scratch:QHSI 1 "r") (set (reg:CC CC_REG) - (compare (match_operand:QHSI 0 "memory_operand" "") + (compare:CC (match_operand:QHSI 0 "memory_operand" "") (const_int 0)))] "!mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))" [(parallel [(set (reg:CCZN CC_REG) (compare:CCZN (match_dup 0) (const_int 0))) @@ -187,7 +187,7 @@ (define_peephole2 [(match_scratch:QHSI 1 "r") (set (reg:CC CC_REG) - (compare (match_operand:QHSI 0 "memory_operand" "") + (compare:CC (match_operand:QHSI 0 "memory_operand" "") (const_int 0)))] "mode_dependent_address_p (XEXP (operands[0], 0), MEM_ADDR_SPACE (operands[0]))" [(parallel [(set (match_dup 1) (match_dup 0)) (clobber (reg:CC CC_REG))]) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 09aa9b1461cc..ef6c12cd5697 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -9574,8 +9574,9 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp, case vector_loop: need_zero_guard = true; unroll_factor = 4; - /* Get the vector mode to move MOVE_MAX bytes. */ - nunits = MOVE_MAX / GET_MODE_SIZE (word_mode); + /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes. */ + nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX; + nunits /= GET_MODE_SIZE (word_mode); if (nunits > 1) { move_mode = mode_for_vector (word_mode, nunits).require (); @@ -27033,6 +27034,109 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx, return target; } +/* GF2P8AFFINEQB matrixes to implement shift and rotate. */ + +static const uint64_t matrix_ashift[8] = +{ + 0, + 0x0001020408102040, /* 1 l */ + 0x0000010204081020, /* 2 l */ + 0x0000000102040810, /* 3 l */ + 0x0000000001020408, /* 4 l */ + 0x0000000000010204, /* 5 l */ + 0x0000000000000102, /* 6 l */ + 0x0000000000000001 /* 7 l */ +}; + +static const uint64_t matrix_lshiftrt[8] = +{ + 0, + 0x0204081020408000, /* 1 r */ + 0x0408102040800000, /* 2 r */ + 0x0810204080000000, /* 3 r */ + 0x1020408000000000, /* 4 r */ + 0x2040800000000000, /* 5 r */ + 0x4080000000000000, /* 6 r */ + 0x8000000000000000 /* 7 r */ +}; + +static const uint64_t matrix_ashiftrt[8] = +{ + 0, + 0x0204081020408080, /* 1 r */ + 0x0408102040808080, /* 2 r */ + 0x0810204080808080, /* 3 r */ + 0x1020408080808080, /* 4 r */ + 0x2040808080808080, /* 5 r */ + 0x4080808080808080, /* 6 r */ + 0x8080808080808080 /* 7 r */ +}; + +static const uint64_t matrix_rotate[8] = +{ + 0, + 0x8001020408102040, /* 1 rol8 */ + 0x4080010204081020, /* 2 rol8 */ + 0x2040800102040810, /* 3 rol8 */ + 0x1020408001020408, /* 4 rol8 */ + 0x0810204080010204, /* 5 rol8 */ + 0x0408102040800102, /* 6 rol8 */ + 0x0204081020408001 /* 7 rol8 */ +}; + +static const uint64_t matrix_rotatert[8] = +{ + 0, + 0x0204081020408001, /* 1 ror8 */ + 0x0408102040800102, /* 2 ror8 */ + 0x0810204080010204, /* 3 ror8 */ + 0x1020408001020408, /* 4 ror8 */ + 0x2040800102040810, /* 5 ror8 */ + 0x4080010204081020, /* 6 ror8 */ + 0x8001020408102040 /* 7 ror8 */ +}; + +/* Return rtx to load a 64bit GF2P8AFFINE GP(2) matrix implementing a shift + for CODE and shift count COUNT into register with vector of size of SRC. */ + +rtx +ix86_vgf2p8affine_shift_matrix (rtx src, rtx count, enum rtx_code code) +{ + machine_mode mode = GET_MODE (src); + const uint64_t *matrix; + unsigned shift = INTVAL (count) & 7; + gcc_assert (shift > 0 && shift < 8); + + switch (code) + { + case ASHIFT: + matrix = matrix_ashift; + break; + case ASHIFTRT: + matrix = matrix_ashiftrt; + break; + case LSHIFTRT: + matrix = matrix_lshiftrt; + break; + case ROTATE: + matrix = matrix_rotate; + break; + case ROTATERT: + matrix = matrix_rotatert; + break; + default: + gcc_unreachable (); + } + + int nelts = GET_MODE_NUNITS (mode); + rtvec vec = rtvec_alloc (nelts); + uint64_t ma = matrix[shift]; + for (int i = 0; i < nelts; i++) + RTVEC_ELT (vec, i) = gen_int_mode ((ma >> ((i % 8) * 8)) & 0xff, QImode); + + return force_reg (mode, gen_rtx_CONST_VECTOR (mode, vec)); +} + /* Trunc a vector to a narrow vector, like v4di -> v4si. */ void diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index 9941e61361c7..93e20947edf3 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -3089,10 +3089,13 @@ enum x86_cse_kind { X86_CSE_CONST0_VECTOR, X86_CSE_CONSTM1_VECTOR, - X86_CSE_VEC_DUP + X86_CSE_VEC_DUP, + X86_CSE_TLS_GD, + X86_CSE_TLS_LD_BASE, + X86_CSE_TLSDESC }; -struct redundant_load +struct redundant_pattern { /* Bitmap of basic blocks with broadcast instructions. */ auto_bitmap bbs; @@ -3130,7 +3133,7 @@ struct redundant_load static void ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs, - redundant_load *load = nullptr) + redundant_pattern *load = nullptr) { basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); /* For X86_CSE_VEC_DUP, don't place the vector set outside of the loop @@ -3639,6 +3642,8 @@ ix86_broadcast_inner (rtx op, machine_mode mode, Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an integer constant. */ op = src; + if (mode != GET_MODE (reg)) + op = gen_int_mode (INTVAL (src), mode); *insn_p = nullptr; } else @@ -3679,25 +3684,696 @@ ix86_broadcast_inner (rtx op, machine_mode mode, return op; } -/* At entry of the nearest common dominator for basic blocks with vector - CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest - vector set instruction for all CONST0_RTX and integer CONSTM1_RTX - uses. +/* Replace CALL instruction in TLS_CALL_INSNS with SET from SRC and + put the updated instruction in UPDATED_TLS_INSNS. */ - NB: We want to generate only a single widest vector set to cover the - whole function. The LCM algorithm isn't appropriate here since it - may place a vector set inside the loop. */ +static void +replace_tls_call (rtx src, auto_bitmap &tls_call_insns, + auto_bitmap &updated_tls_insns) +{ + bitmap_iterator bi; + unsigned int id; -static unsigned int -remove_redundant_vector_load (void) + EXECUTE_IF_SET_IN_BITMAP (tls_call_insns, 0, id, bi) + { + rtx_insn *insn = DF_INSN_UID_GET (id)->insn; + + /* If this isn't a CALL, only GNU2 TLS implicit CALL patterns are + allowed. */ + if (!CALL_P (insn)) + { + attr_tls64 tls64 = get_attr_tls64 (insn); + if (tls64 != TLS64_CALL && tls64 != TLS64_COMBINE) + gcc_unreachable (); + } + + rtx pat = PATTERN (insn); + gcc_assert (GET_CODE (pat) == PARALLEL); + rtx set = XVECEXP (pat, 0, 0); + gcc_assert (GET_CODE (set) == SET); + rtx dest = SET_DEST (set); + + set = gen_rtx_SET (dest, src); + rtx_insn *set_insn = emit_insn_after (set, insn); + if (recog_memoized (set_insn) < 0) + gcc_unreachable (); + + /* Put SET_INSN in UPDATED_TLS_INSNS. */ + bitmap_set_bit (updated_tls_insns, INSN_UID (set_insn)); + + if (dump_file) + { + fprintf (dump_file, "\nReplace:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nwith:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\n"); + } + + /* Delete the CALL insn. */ + delete_insn (insn); + + df_insn_rescan (set_insn); + } +} + +/* Return the basic block which dominates all basic blocks which set + hard register REGNO used in basic block BB. */ + +static basic_block +ix86_get_dominator_for_reg (unsigned int regno, basic_block bb) +{ + basic_block set_bb; + auto_bitmap set_bbs; + + /* Get all BBs which set REGNO and dominate the current BB from all + DEFs of REGNO. */ + for (df_ref def = DF_REG_DEF_CHAIN (regno); + def; + def = DF_REF_NEXT_REG (def)) + if (!DF_REF_IS_ARTIFICIAL (def) + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER) + && !DF_REF_FLAGS_IS_SET (def, DF_REF_MUST_CLOBBER)) + { + set_bb = DF_REF_BB (def); + if (dominated_by_p (CDI_DOMINATORS, bb, set_bb)) + bitmap_set_bit (set_bbs, set_bb->index); + } + + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); + return bb; +} + +/* Mark FLAGS register as live in DATA, a bitmap of live caller-saved + registers, if DEST is FLAGS register. */ + +static void +ix86_check_flags_reg (rtx dest, const_rtx, void *data) +{ + auto_bitmap *live_caller_saved_regs = (auto_bitmap *) data; + if (REG_P (dest) && REGNO (dest) == FLAGS_REG) + bitmap_set_bit (*live_caller_saved_regs, FLAGS_REG); +} + +/* Emit a TLS_SET instruction of KIND in basic block BB. Store the + insertion point in *BEFORE_P for emit_insn_before or in *AFTER_P + for emit_insn_after. UPDATED_GNU_TLS_INSNS contains instructions + which replace the GNU TLS instructions. UPDATED_GNU2_TLS_INSNS + contains instructions which replace the GNU2 TLS instructions. */ + +static rtx_insn * +ix86_emit_tls_call (rtx tls_set, x86_cse_kind kind, basic_block bb, + rtx_insn **before_p, rtx_insn **after_p, + auto_bitmap &updated_gnu_tls_insns, + auto_bitmap &updated_gnu2_tls_insns) +{ + rtx_insn *tls_insn; + + do + { + rtx_insn *insn = BB_HEAD (bb); + while (insn && !NONDEBUG_INSN_P (insn)) + { + if (insn == BB_END (bb)) + { + /* This must be the beginning basic block: + + (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) + (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG) + + or a basic block with only a label: + + (code_label 78 11 77 3 14 (nil) [1 uses]) + (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK) + + or a basic block with only a debug marker: + + (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) + (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG) + (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil)) + + */ + gcc_assert (DEBUG_INSN_P (insn) + || (NOTE_P (insn) + && ((NOTE_KIND (insn) + == NOTE_INSN_FUNCTION_BEG) + || (NOTE_KIND (insn) + == NOTE_INSN_BASIC_BLOCK)))); + insn = NULL; + break; + } + insn = NEXT_INSN (insn); + } + + /* TLS_GD and TLS_LD_BASE instructions are normal functions which + clobber caller-saved registers. TLSDESC instructions only + clobber FLAGS. If any registers clobbered by TLS instructions + are live in this basic block, we must insert TLS instructions + after all live registers clobbered are dead. */ + + auto_bitmap live_caller_saved_regs; + bitmap in = df_live ? DF_LIVE_IN (bb) : DF_LR_IN (bb); + + if (bitmap_bit_p (in, FLAGS_REG)) + bitmap_set_bit (live_caller_saved_regs, FLAGS_REG); + + unsigned int i; + + /* Get all live caller-saved registers for TLS_GD and TLS_LD_BASE + instructions. */ + if (kind != X86_CSE_TLSDESC) + for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) + if (call_used_regs[i] + && !fixed_regs[i] + && bitmap_bit_p (in, i)) + bitmap_set_bit (live_caller_saved_regs, i); + + if (bitmap_empty_p (live_caller_saved_regs)) + { + if (insn == BB_HEAD (bb)) + { + *before_p = insn; + tls_insn = emit_insn_before (tls_set, insn); + } + else + { + /* Emit the TLS call after NOTE_INSN_FUNCTION_BEG in the + beginning basic block: + + (note 4 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) + (note 2 4 26 2 NOTE_INSN_FUNCTION_BEG) + + or after NOTE_INSN_BASIC_BLOCK in a basic block with + only a label: + + (code_label 78 11 77 3 14 (nil) [1 uses]) + (note 77 78 54 3 [bb 3] NOTE_INSN_BASIC_BLOCK) + + or after debug marker in a basic block with only a + debug marker: + + (note 3 0 2 2 [bb 2] NOTE_INSN_BASIC_BLOCK) + (note 2 3 5 2 NOTE_INSN_FUNCTION_BEG) + (debug_insn 5 2 16 2 (debug_marker) "x.c":6:3 -1 (nil)) + + */ + insn = insn ? PREV_INSN (insn) : BB_END (bb); + *after_p = insn; + tls_insn = emit_insn_after (tls_set, insn); + } + return tls_insn; + } + + bool repeat = false; + + /* Search for REG_DEAD notes in this basic block. */ + FOR_BB_INSNS (bb, insn) + { + if (!NONDEBUG_INSN_P (insn)) + continue; + + /* NB: Conditional jump is the only instruction which reads + flags register and changes control flow. We can never + place the TLS call after unconditional jump. */ + if (JUMP_P (insn)) + { + /* This must be a conditional jump. */ + rtx label = JUMP_LABEL (insn); + if (label == nullptr + || ANY_RETURN_P (label) + || !(LABEL_P (label) || SYMBOL_REF_P (label))) + gcc_unreachable (); + + /* Place the call before all FLAGS_REG setting BBs since + we can't place a call before nor after a conditional + jump. */ + bb = ix86_get_dominator_for_reg (FLAGS_REG, bb); + + /* Start over again. */ + repeat = true; + break; + } + + if (bitmap_bit_p (updated_gnu_tls_insns, INSN_UID (insn))) + { + /* Insert the __tls_get_addr call before INSN which + replaces a __tls_get_addr call. */ + *before_p = insn; + tls_insn = emit_insn_before (tls_set, insn); + return tls_insn; + } + + if (bitmap_bit_p (updated_gnu2_tls_insns, INSN_UID (insn))) + { + /* Mark FLAGS register as dead since FLAGS register + would be clobbered by the GNU2 TLS instruction. */ + bitmap_clear_bit (live_caller_saved_regs, FLAGS_REG); + continue; + } + + /* Check if FLAGS register is live. */ + note_stores (insn, ix86_check_flags_reg, + &live_caller_saved_regs); + + rtx link; + for (link = REG_NOTES (insn); link; link = XEXP (link, 1)) + if (REG_NOTE_KIND (link) == REG_DEAD + && REG_P (XEXP (link, 0))) + { + /* Mark the live caller-saved register as dead. */ + for (i = REGNO (XEXP (link, 0)); + i < END_REGNO (XEXP (link, 0)); + i++) + if (i < FIRST_PSEUDO_REGISTER) + bitmap_clear_bit (live_caller_saved_regs, i); + + if (bitmap_empty_p (live_caller_saved_regs)) + { + *after_p = insn; + tls_insn = emit_insn_after (tls_set, insn); + return tls_insn; + } + } + } + + /* NB: Start over again for conditional jump. */ + if (repeat) + continue; + + gcc_assert (!bitmap_empty_p (live_caller_saved_regs)); + + /* If any live caller-saved registers aren't dead at the end of + this basic block, get the basic block which dominates all + basic blocks which set the remaining live registers. */ + auto_bitmap set_bbs; + bitmap_iterator bi; + unsigned int id; + EXECUTE_IF_SET_IN_BITMAP (live_caller_saved_regs, 0, id, bi) + { + basic_block set_bb = ix86_get_dominator_for_reg (id, bb); + bitmap_set_bit (set_bbs, set_bb->index); + } + bb = nearest_common_dominator_for_set (CDI_DOMINATORS, set_bbs); + } + while (true); +} + +/* Generate a TLS call of KIND with VAL and copy the call result to DEST, + at entry of the nearest dominator for basic block map BBS, which is in + the fake loop that contains the whole function, so that there is only + a single TLS CALL of KIND with VAL in the whole function. + UPDATED_GNU_TLS_INSNS contains instructions which replace the GNU TLS + instructions. UPDATED_GNU2_TLS_INSNS contains instructions which + replace the GNU2 TLS instructions. If TLSDESC_SET isn't nullptr, + insert it before the TLS call. */ + +static void +ix86_place_single_tls_call (rtx dest, rtx val, x86_cse_kind kind, + auto_bitmap &bbs, + auto_bitmap &updated_gnu_tls_insns, + auto_bitmap &updated_gnu2_tls_insns, + rtx tlsdesc_set = nullptr) +{ + basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs); + while (bb->loop_father->latch + != EXIT_BLOCK_PTR_FOR_FN (cfun)) + bb = get_immediate_dominator (CDI_DOMINATORS, + bb->loop_father->header); + + rtx rax = nullptr, rdi; + rtx eqv = nullptr; + rtx caddr; + rtx set; + rtx clob; + rtx symbol; + rtx tls; + + switch (kind) + { + case X86_CSE_TLS_GD: + rax = gen_rtx_REG (Pmode, AX_REG); + rdi = gen_rtx_REG (Pmode, DI_REG); + caddr = ix86_tls_get_addr (); + + symbol = XVECEXP (val, 0, 0); + tls = gen_tls_global_dynamic_64 (Pmode, rax, symbol, caddr, rdi); + + if (GET_MODE (symbol) != Pmode) + symbol = gen_rtx_ZERO_EXTEND (Pmode, symbol); + eqv = symbol; + break; + + case X86_CSE_TLS_LD_BASE: + rax = gen_rtx_REG (Pmode, AX_REG); + rdi = gen_rtx_REG (Pmode, DI_REG); + caddr = ix86_tls_get_addr (); + + tls = gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi); + + /* Attach a unique REG_EQUAL to DEST, to allow the RTL optimizers + to share the LD_BASE result with other LD model accesses. */ + eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), + UNSPEC_TLS_LD_BASE); + + break; + + case X86_CSE_TLSDESC: + set = gen_rtx_SET (dest, val); + clob = gen_rtx_CLOBBER (VOIDmode, + gen_rtx_REG (CCmode, FLAGS_REG)); + tls = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clob)); + break; + + default: + gcc_unreachable (); + } + + /* Emit the TLS CALL insn. */ + rtx_insn *before = nullptr; + rtx_insn *after = nullptr; + rtx_insn *tls_insn = ix86_emit_tls_call (tls, kind, bb, &before, + &after, + updated_gnu_tls_insns, + updated_gnu2_tls_insns); + + rtx_insn *tlsdesc_insn = nullptr; + if (tlsdesc_set) + { + rtx dest = copy_rtx (SET_DEST (tlsdesc_set)); + rtx src = copy_rtx (SET_SRC (tlsdesc_set)); + tlsdesc_set = gen_rtx_SET (dest, src); + tlsdesc_insn = emit_insn_before (tlsdesc_set, tls_insn); + } + + if (kind != X86_CSE_TLSDESC) + { + RTL_CONST_CALL_P (tls_insn) = 1; + + /* Indicate that this function can't jump to non-local gotos. */ + make_reg_eh_region_note_nothrow_nononlocal (tls_insn); + } + + if (recog_memoized (tls_insn) < 0) + gcc_unreachable (); + + if (dump_file) + { + if (after) + { + fprintf (dump_file, "\nPlace:\n\n"); + if (tlsdesc_insn) + print_rtl_single (dump_file, tlsdesc_insn); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, after); + fprintf (dump_file, "\n"); + } + else + { + fprintf (dump_file, "\nPlace:\n\n"); + if (tlsdesc_insn) + print_rtl_single (dump_file, tlsdesc_insn); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\nbefore:\n\n"); + print_rtl_single (dump_file, before); + fprintf (dump_file, "\n"); + } + } + + if (kind != X86_CSE_TLSDESC) + { + /* Copy RAX to DEST. */ + set = gen_rtx_SET (dest, rax); + rtx_insn *set_insn = emit_insn_after (set, tls_insn); + set_dst_reg_note (set_insn, REG_EQUAL, copy_rtx (eqv), dest); + if (dump_file) + { + fprintf (dump_file, "\nPlace:\n\n"); + print_rtl_single (dump_file, set_insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, tls_insn); + fprintf (dump_file, "\n"); + } + } +} + +namespace { + +const pass_data pass_data_x86_cse = +{ + RTL_PASS, /* type */ + "x86_cse", /* name */ + OPTGROUP_NONE, /* optinfo_flags */ + TV_MACH_DEP, /* tv_id */ + 0, /* properties_required */ + 0, /* properties_provided */ + 0, /* properties_destroyed */ + 0, /* todo_flags_start */ + 0, /* todo_flags_finish */ +}; + +class pass_x86_cse : public rtl_opt_pass +{ +public: + pass_x86_cse (gcc::context *ctxt) + : rtl_opt_pass (pass_data_x86_cse, ctxt) + {} + + /* opt_pass methods: */ + bool gate (function *fun) final override + { + return (TARGET_SSE2 + && optimize + && optimize_function_for_speed_p (fun)); + } + + unsigned int execute (function *) final override + { + return x86_cse (); + } + +private: + /* The redundant source value. */ + rtx val; + /* The instruction which defines the redundant value. */ + rtx_insn *def_insn; + /* Mode of the destination of the candidate redundant instruction. */ + machine_mode mode; + /* Mode of the source of the candidate redundant instruction. */ + machine_mode scalar_mode; + /* The classification of the candidate redundant instruction. */ + x86_cse_kind kind; + + unsigned int x86_cse (void); + bool candidate_gnu_tls_p (rtx_insn *, attr_tls64); + bool candidate_gnu2_tls_p (rtx, attr_tls64); + bool candidate_vector_p (rtx); +}; // class pass_x86_cse + +/* Return true and output def_insn, val, mode, scalar_mode and kind if + INSN is UNSPEC_TLS_GD or UNSPEC_TLS_LD_BASE. */ + +bool +pass_x86_cse::candidate_gnu_tls_p (rtx_insn *insn, attr_tls64 tls64) +{ + if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p) + return false; + + /* Record the redundant TLS CALLs for 64-bit: + + (parallel [ + (set (reg:DI 0 ax) + (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr"))) + (const_int 0 [0]))) + (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50]) + (reg/f:DI 7 sp)] UNSPEC_TLS_GD) + (clobber (reg:DI 5 di))]) + + + and + + (parallel [ + (set (reg:DI 0 ax) + (call:DI (mem:QI (symbol_ref:DI ("__tls_get_addr"))) + (const_int 0 [0]))) + (unspec:DI [(reg/f:DI 7 sp)] UNSPEC_TLS_LD_BASE)]) + + */ + + rtx pat = PATTERN (insn); + rtx set = XVECEXP (pat, 0, 0); + gcc_assert (GET_CODE (set) == SET); + rtx dest = SET_DEST (set); + scalar_mode = mode = GET_MODE (dest); + val = XVECEXP (pat, 0, 1); + gcc_assert (GET_CODE (val) == UNSPEC); + + if (tls64 == TLS64_GD) + kind = X86_CSE_TLS_GD; + else + kind = X86_CSE_TLS_LD_BASE; + + def_insn = nullptr; + return true; +} + +/* Return true and output def_insn, val, mode, scalar_mode and kind if + SET is UNSPEC_TLSDESC. */ + +bool +pass_x86_cse::candidate_gnu2_tls_p (rtx set, attr_tls64 tls64) +{ + if (!TARGET_64BIT || !cfun->machine->tls_descriptor_call_multiple_p) + return false; + + /* Record GNU2 TLS CALLs for 64-bit: + + (set (reg/f:DI 104) + (plus:DI (unspec:DI [ + (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10]) + (reg:DI 114) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC) + (const:DI (unspec:DI [ + (symbol_ref:DI ("e") [flags 0x1a]) + ] UNSPEC_DTPOFF)))) + + (set (reg/f:DI 104) + (plus:DI (unspec:DI [ + (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10]) + (unspec:DI [ + (symbol_ref:DI ("_TLS_MODULE_BASE_") [flags 0x10]) + ] UNSPEC_TLSDESC) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC) + (const:DI (unspec:DI [ + (symbol_ref:DI ("e") [flags 0x1a]) + ] UNSPEC_DTPOFF)))) + + and + + (set (reg:DI 101) + (unspec:DI [(symbol_ref:DI ("foo") [flags 0x50]) + (reg:DI 112) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC)) + + */ + + rtx src = SET_SRC (set); + val = src; + if (tls64 != TLS64_CALL) + src = XEXP (src, 0); + + kind = X86_CSE_TLSDESC; + gcc_assert (GET_CODE (src) == UNSPEC); + rtx tls_symbol = XVECEXP (src, 0, 0); + src = XVECEXP (src, 0, 1); + scalar_mode = mode = GET_MODE (src); + if (REG_P (src)) + { + /* All definitions of reg:DI 129 in + + (set (reg:DI 110) + (unspec:DI [(symbol_ref:DI ("foo")) + (reg:DI 129) + (reg/f:DI 7 sp)] UNSPEC_TLSDESC)) + + should have the same source as in + + (set (reg:DI 129) + (unspec:DI [(symbol_ref:DI ("foo"))] UNSPEC_TLSDESC)) + + */ + + df_ref ref; + rtx_insn *set_insn = nullptr; + for (ref = DF_REG_DEF_CHAIN (REGNO (src)); + ref; + ref = DF_REF_NEXT_REG (ref)) + { + if (DF_REF_IS_ARTIFICIAL (ref)) + break; + + set_insn = DF_REF_INSN (ref); + tls64 = get_attr_tls64 (set_insn); + if (tls64 != TLS64_LEA) + { + set_insn = nullptr; + break; + } + + rtx tls_set = PATTERN (set_insn); + rtx tls_src = XVECEXP (SET_SRC (tls_set), 0, 0); + if (!rtx_equal_p (tls_symbol, tls_src)) + { + set_insn = nullptr; + break; + } + } + + if (!set_insn) + return false; + + def_insn = set_insn; + } + else if (GET_CODE (src) == UNSPEC + && XINT (src, 1) == UNSPEC_TLSDESC + && SYMBOL_REF_P (XVECEXP (src, 0, 0))) + def_insn = nullptr; + else + gcc_unreachable (); + + return true; +} + +/* Return true and output def_insn, val, mode, scalar_mode and kind if + INSN is a vector broadcast instruction. */ + +bool +pass_x86_cse::candidate_vector_p (rtx set) +{ + rtx src = SET_SRC (set); + rtx dest = SET_DEST (set); + mode = GET_MODE (dest); + /* Skip non-vector instruction. */ + if (!VECTOR_MODE_P (mode)) + return false; + + /* Skip non-vector load instruction. */ + if (!REG_P (dest) && !SUBREG_P (dest)) + return false; + + val = ix86_broadcast_inner (src, mode, &scalar_mode, &kind, + &def_insn); + return val ? true : false; +} + +/* At entry of the nearest common dominator for basic blocks with + + 1. Vector CONST0_RTX patterns. + 2. Vector CONSTM1_RTX patterns. + 3. Vector broadcast patterns. + 4. UNSPEC_TLS_GD patterns. + 5. UNSPEC_TLS_LD_BASE patterns. + 6. UNSPEC_TLSDESC patterns. + + generate a single pattern whose destination is used to replace the + source in all identical patterns. + + NB: We want to generate a pattern, which is executed only once, to + cover the whole function. The LCM algorithm isn't appropriate here + since it may place a pattern inside the loop. */ + +unsigned int +pass_x86_cse::x86_cse (void) { timevar_push (TV_MACH_DEP); - auto_vec loads; - redundant_load *load; + auto_vec loads; + redundant_pattern *load; basic_block bb; rtx_insn *insn; unsigned int i; + auto_bitmap updated_gnu_tls_insns; + auto_bitmap updated_gnu2_tls_insns; df_set_flags (DF_DEFER_INSN_RESCAN); @@ -3710,61 +4386,72 @@ remove_redundant_vector_load (void) if (!NONDEBUG_INSN_P (insn)) continue; + bool matched = false; + /* Remove redundant pattens if there are more than 2 of + them. */ + unsigned int threshold = 2; + rtx set = single_set (insn); - if (!set) + if (!set && !CALL_P (insn)) continue; - /* Record single set vector instruction with CONST0_RTX and - CONSTM1_RTX source. Record basic blocks with CONST0_RTX and - CONSTM1_RTX. Count CONST0_RTX and CONSTM1_RTX. Record the - maximum size of CONST0_RTX and CONSTM1_RTX. */ + attr_tls64 tls64 = get_attr_tls64 (insn); + switch (tls64) + { + case TLS64_GD: + case TLS64_LD_BASE: + /* Verify UNSPEC_TLS_GD and UNSPEC_TLS_LD_BASE. */ + if (candidate_gnu_tls_p (insn, tls64)) + break; + continue; - rtx dest = SET_DEST (set); - machine_mode mode = GET_MODE (dest); - /* Skip non-vector instruction. */ - if (!VECTOR_MODE_P (mode)) - continue; + case TLS64_CALL: + case TLS64_COMBINE: + /* Verify UNSPEC_TLSDESC. */ + if (candidate_gnu2_tls_p (set, tls64)) + break; + continue; - rtx src = SET_SRC (set); - /* Skip non-vector load instruction. */ - if (!REG_P (dest) && !SUBREG_P (dest)) - continue; + case TLS64_LEA: + /* Skip TLS64_LEA. */ + continue; - rtx_insn *def_insn; - machine_mode scalar_mode; - x86_cse_kind kind; - rtx val = ix86_broadcast_inner (src, mode, &scalar_mode, - &kind, &def_insn); - if (!val) - continue; + case TLS64_NONE: + if (!set) + continue; - /* Remove redundant register loads if there are more than 2 - loads will be used. */ - unsigned int threshold = 2; + /* Check for vector broadcast. */ + if (candidate_vector_p (set)) + break; + continue; + } - /* Check if there is a matching redundant vector load. */ - bool matched = false; + /* Check if there is a matching redundant load. */ FOR_EACH_VEC_ELT (loads, i, load) if (load->val && load->kind == kind && load->mode == scalar_mode && (load->bb == bb - || kind < X86_CSE_VEC_DUP + || kind != X86_CSE_VEC_DUP /* Non all 0s/1s vector load must be in the same basic block if it is in a recursive call. */ || !recursive_call_p) && rtx_equal_p (load->val, val)) { - /* Record vector instruction. */ + /* Record instruction. */ bitmap_set_bit (load->insns, INSN_UID (insn)); /* Record the maximum vector size. */ - if (load->size < GET_MODE_SIZE (mode)) + if (kind <= X86_CSE_VEC_DUP + && load->size < GET_MODE_SIZE (mode)) load->size = GET_MODE_SIZE (mode); /* Record the basic block. */ bitmap_set_bit (load->bbs, bb->index); + + /* Increment the count. */ load->count++; + matched = true; break; } @@ -3772,8 +4459,11 @@ remove_redundant_vector_load (void) if (matched) continue; - /* We see this vector broadcast the first time. */ - load = new redundant_load; + /* We see this instruction the first time. Record the + redundant source value, its mode, the destination size, + instruction which defines the redundant source value, + instruction basic block and the instruction kind. */ + load = new redundant_pattern; load->val = copy_rtx (val); load->mode = scalar_mode; @@ -3792,49 +4482,64 @@ remove_redundant_vector_load (void) } bool replaced = false; - rtx reg, broadcast_source, broadcast_reg; FOR_EACH_VEC_ELT (loads, i, load) if (load->count >= load->threshold) { - machine_mode mode = ix86_get_vector_cse_mode (load->size, - load->mode); - broadcast_reg = gen_reg_rtx (mode); - if (load->def_insn) - { - /* Replace redundant vector loads with a single vector load - in the same basic block. */ - reg = load->val; - if (load->mode != GET_MODE (reg)) - reg = gen_rtx_SUBREG (load->mode, reg, 0); - broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); - replace_vector_const (mode, broadcast_reg, load->insns, - load->mode); - } - else + machine_mode mode; + rtx reg, broadcast_source, broadcast_reg; + replaced = true; + switch (load->kind) { - /* This is a constant integer/double vector. If the - inner scalar is 0 or -1, set vector to CONST0_RTX - or CONSTM1_RTX directly. */ - rtx reg; - switch (load->kind) + case X86_CSE_TLS_GD: + case X86_CSE_TLS_LD_BASE: + case X86_CSE_TLSDESC: + broadcast_reg = gen_reg_rtx (load->mode); + replace_tls_call (broadcast_reg, load->insns, + (load->kind == X86_CSE_TLSDESC + ? updated_gnu2_tls_insns + : updated_gnu_tls_insns)); + load->broadcast_reg = broadcast_reg; + break; + + case X86_CSE_CONST0_VECTOR: + case X86_CSE_CONSTM1_VECTOR: + case X86_CSE_VEC_DUP: + mode = ix86_get_vector_cse_mode (load->size, load->mode); + broadcast_reg = gen_reg_rtx (mode); + if (load->def_insn) { - case X86_CSE_CONST0_VECTOR: - broadcast_source = CONST0_RTX (mode); - break; - case X86_CSE_CONSTM1_VECTOR: - broadcast_source = CONSTM1_RTX (mode); - break; - default: - reg = gen_reg_rtx (load->mode); + /* Replace redundant vector loads with a single vector + load in the same basic block. */ + reg = load->val; + if (load->mode != GET_MODE (reg)) + reg = gen_rtx_SUBREG (load->mode, reg, 0); broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); - break; } + else + /* This is a constant integer/double vector. If the + inner scalar is 0 or -1, set vector to CONST0_RTX + or CONSTM1_RTX directly. */ + switch (load->kind) + { + case X86_CSE_CONST0_VECTOR: + broadcast_source = CONST0_RTX (mode); + break; + case X86_CSE_CONSTM1_VECTOR: + broadcast_source = CONSTM1_RTX (mode); + break; + case X86_CSE_VEC_DUP: + reg = gen_reg_rtx (load->mode); + broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg); + break; + default: + gcc_unreachable (); + } replace_vector_const (mode, broadcast_reg, load->insns, load->mode); + load->broadcast_source = broadcast_source; + load->broadcast_reg = broadcast_reg; + break; } - load->broadcast_source = broadcast_source; - load->broadcast_reg = broadcast_reg; - replaced = true; } if (replaced) @@ -3849,40 +4554,73 @@ remove_redundant_vector_load (void) FOR_EACH_VEC_ELT (loads, i, load) if (load->count >= load->threshold) { + rtx set; if (load->def_insn) - { - /* Insert a broadcast after the original scalar - definition. */ - rtx set = gen_rtx_SET (load->broadcast_reg, - load->broadcast_source); - insn = emit_insn_after (set, load->def_insn); - - if (cfun->can_throw_non_call_exceptions) - { - /* Handle REG_EH_REGION note in DEF_INSN. */ - rtx note = find_reg_note (load->def_insn, - REG_EH_REGION, nullptr); - if (note) - { - control_flow_insns.safe_push (load->def_insn); - add_reg_note (insn, REG_EH_REGION, - XEXP (note, 0)); - } - } + switch (load->kind) + { + case X86_CSE_TLSDESC: + ix86_place_single_tls_call (load->broadcast_reg, + load->val, + load->kind, + load->bbs, + updated_gnu_tls_insns, + updated_gnu2_tls_insns, + PATTERN (load->def_insn)); + break; + case X86_CSE_VEC_DUP: + /* Insert a broadcast after the original scalar + definition. */ + set = gen_rtx_SET (load->broadcast_reg, + load->broadcast_source); + insn = emit_insn_after (set, load->def_insn); + + if (cfun->can_throw_non_call_exceptions) + { + /* Handle REG_EH_REGION note in DEF_INSN. */ + rtx note = find_reg_note (load->def_insn, + REG_EH_REGION, nullptr); + if (note) + { + control_flow_insns.safe_push (load->def_insn); + add_reg_note (insn, REG_EH_REGION, + XEXP (note, 0)); + } + } - if (dump_file) - { - fprintf (dump_file, "\nAdd:\n\n"); - print_rtl_single (dump_file, insn); - fprintf (dump_file, "\nafter:\n\n"); - print_rtl_single (dump_file, load->def_insn); - fprintf (dump_file, "\n"); - } - } + if (dump_file) + { + fprintf (dump_file, "\nAdd:\n\n"); + print_rtl_single (dump_file, insn); + fprintf (dump_file, "\nafter:\n\n"); + print_rtl_single (dump_file, load->def_insn); + fprintf (dump_file, "\n"); + } + break; + default: + gcc_unreachable (); + } else - ix86_place_single_vector_set (load->broadcast_reg, - load->broadcast_source, - load->bbs, load); + switch (load->kind) + { + case X86_CSE_TLS_GD: + case X86_CSE_TLS_LD_BASE: + case X86_CSE_TLSDESC: + ix86_place_single_tls_call (load->broadcast_reg, + load->val, + load->kind, + load->bbs, + updated_gnu_tls_insns, + updated_gnu2_tls_insns); + break; + case X86_CSE_CONST0_VECTOR: + case X86_CSE_CONSTM1_VECTOR: + case X86_CSE_VEC_DUP: + ix86_place_single_vector_set (load->broadcast_reg, + load->broadcast_source, + load->bbs, + load); + break; + } } loop_optimizer_finalize (); @@ -3912,48 +4650,12 @@ remove_redundant_vector_load (void) return 0; } -namespace { - -const pass_data pass_data_remove_redundant_vector_load = -{ - RTL_PASS, /* type */ - "rrvl", /* name */ - OPTGROUP_NONE, /* optinfo_flags */ - TV_MACH_DEP, /* tv_id */ - 0, /* properties_required */ - 0, /* properties_provided */ - 0, /* properties_destroyed */ - 0, /* todo_flags_start */ - 0, /* todo_flags_finish */ -}; - -class pass_remove_redundant_vector_load : public rtl_opt_pass -{ -public: - pass_remove_redundant_vector_load (gcc::context *ctxt) - : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt) - {} - - /* opt_pass methods: */ - bool gate (function *fun) final override - { - return (TARGET_SSE2 - && optimize - && optimize_function_for_speed_p (fun)); - } - - unsigned int execute (function *) final override - { - return remove_redundant_vector_load (); - } -}; // class pass_remove_redundant_vector_load - } // anon namespace rtl_opt_pass * -make_pass_remove_redundant_vector_load (gcc::context *ctxt) +make_pass_x86_cse (gcc::context *ctxt) { - return new pass_remove_redundant_vector_load (ctxt); + return new pass_x86_cse (ctxt); } /* Convert legacy instructions that clobbers EFLAGS to APX_NF diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index 09a35ef62980..abb5dd7700e7 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -1172,6 +1172,10 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], OPT_mrecip, MASK_RECIP), + IX86_ATTR_YES ("80387", + OPT_m80387, + MASK_80387), + IX86_ATTR_IX86_YES ("general-regs-only", OPT_mgeneral_regs_only, OPTION_MASK_GENERAL_REGS_ONLY), @@ -1281,6 +1285,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], else if (type == ix86_opt_yes || type == ix86_opt_no) { + opts_set->x_target_flags |= mask; + if (type == ix86_opt_no) opt_set_p = !opt_set_p; @@ -3556,6 +3562,10 @@ ix86_set_current_function (tree fndecl) isa = "AVX"; else if (cfun->machine->func_type != TYPE_NORMAL) isa = "SSE"; + else if (TARGET_MMX) + isa = "MMX/3Dnow"; + else if (TARGET_80387) + isa = "80387"; else isa = NULL; } diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def index 06f0288b0671..553b46d1fdc1 100644 --- a/gcc/config/i386/i386-passes.def +++ b/gcc/config/i386/i386-passes.def @@ -35,6 +35,6 @@ along with GCC; see the file COPYING3. If not see PR116174. */ INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops); - INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load); + INSERT_PASS_AFTER (pass_late_combine, 1, pass_x86_cse); INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency); INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert); diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index 69bc0ee570dd..bdb8bb963b5d 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -290,6 +290,7 @@ extern rtx ix86_tls_module_base (void); extern bool ix86_gpr_tls_address_pattern_p (rtx); extern bool ix86_tls_address_pattern_p (rtx); extern rtx ix86_rewrite_tls_address (rtx); +extern rtx ix86_tls_get_addr (void); extern void ix86_expand_vector_init (bool, rtx, rtx); extern void ix86_expand_vector_set (bool, rtx, rtx, int); @@ -430,8 +431,7 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area (gcc::context *); extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); -extern rtl_opt_pass *make_pass_remove_redundant_vector_load - (gcc::context *); +extern rtl_opt_pass *make_pass_x86_cse (gcc::context *); extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *); extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *); @@ -448,3 +448,4 @@ extern void ix86_set_handled_components (sbitmap); /* In i386-expand.cc. */ bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*, HOST_WIDE_INT*); +rtx ix86_vgf2p8affine_shift_matrix (rtx, rtx, enum rtx_code); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 65e04d3760d5..1ca6c6121371 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -12439,7 +12439,7 @@ ix86_tls_index (void) static GTY(()) rtx ix86_tls_symbol; -static rtx +rtx ix86_tls_get_addr (void) { if (cfun->machine->call_saved_registers @@ -22102,6 +22102,15 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, } /* FALLTHRU */ case V32QImode: + if (TARGET_GFNI && constant_op1) + { + /* Use vgf2p8affine. One extra load for the mask, but in a loop + with enough registers it will be moved out. So for now don't + account the constant mask load. This is not quite right + for non loop vectorization. */ + extra = 0; + return ix86_vec_cost (mode, cost->sse_op) + extra; + } if (TARGET_AVX2) /* Use vpbroadcast. */ extra = cost->sse_op; @@ -22136,6 +22145,11 @@ ix86_shift_rotate_cost (const struct processor_costs *cost, count = 9; return ix86_vec_cost (mode, cost->sse_op * count) + extra; + case V64QImode: + /* Ignore the mask load for GF2P8AFFINEQB. */ + extra = 0; + return ix86_vec_cost (mode, cost->sse_op) + extra; + case V2DImode: case V4DImode: /* V*DImode arithmetic right shift is emulated. */ @@ -25794,15 +25808,20 @@ class ix86_vector_costs : public vector_costs unsigned m_num_sse_needed[3]; /* Number of 256-bit vector permutation. */ unsigned m_num_avx256_vec_perm[3]; + /* Number of reductions for FMA/DOT_PROD_EXPR/SAD_EXPR */ + unsigned m_num_reduc[X86_REDUC_LAST]; + /* Don't do unroll if m_prefer_unroll is false, default is true. */ + bool m_prefer_unroll; }; ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar) : vector_costs (vinfo, costing_for_scalar), m_num_gpr_needed (), m_num_sse_needed (), - m_num_avx256_vec_perm () -{ -} + m_num_avx256_vec_perm (), + m_num_reduc (), + m_prefer_unroll (true) +{} /* Implement targetm.vectorize.create_costs. */ @@ -26099,6 +26118,125 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, } } + /* Record number of load/store/gather/scatter in vectorized body. */ + if (where == vect_body && !m_costing_for_scalar) + { + switch (kind) + { + /* Emulated gather/scatter or any scalarization. */ + case scalar_load: + case scalar_stmt: + case scalar_store: + case vector_gather_load: + case vector_scatter_store: + m_prefer_unroll = false; + break; + + case vector_stmt: + case vec_to_scalar: + /* Count number of reduction FMA and "real" DOT_PROD_EXPR, + unroll in the vectorizer will enable partial sum. */ + if (stmt_info + && vect_is_reduction (stmt_info) + && stmt_info->stmt) + { + /* Handle __builtin_fma. */ + if (gimple_call_combined_fn (stmt_info->stmt) == CFN_FMA) + { + m_num_reduc[X86_REDUC_FMA] += count; + break; + } + + if (!is_gimple_assign (stmt_info->stmt)) + break; + + tree_code subcode = gimple_assign_rhs_code (stmt_info->stmt); + machine_mode inner_mode = GET_MODE_INNER (mode); + tree rhs1, rhs2; + bool native_vnni_p = true; + gimple* def; + machine_mode mode_rhs; + switch (subcode) + { + case PLUS_EXPR: + case MINUS_EXPR: + if (!fp || !flag_associative_math + || flag_fp_contract_mode != FP_CONTRACT_FAST) + break; + + /* FMA condition for different modes. */ + if (((inner_mode == DFmode || inner_mode == SFmode) + && !TARGET_FMA && !TARGET_AVX512VL) + || (inner_mode == HFmode && !TARGET_AVX512FP16) + || (inner_mode == BFmode && !TARGET_AVX10_2)) + break; + + /* MULT_EXPR + PLUS_EXPR/MINUS_EXPR is transformed + to FMA/FNMA after vectorization. */ + rhs1 = gimple_assign_rhs1 (stmt_info->stmt); + rhs2 = gimple_assign_rhs2 (stmt_info->stmt); + if (subcode == PLUS_EXPR + && TREE_CODE (rhs1) == SSA_NAME + && (def = SSA_NAME_DEF_STMT (rhs1), true) + && is_gimple_assign (def) + && gimple_assign_rhs_code (def) == MULT_EXPR) + m_num_reduc[X86_REDUC_FMA] += count; + else if (TREE_CODE (rhs2) == SSA_NAME + && (def = SSA_NAME_DEF_STMT (rhs2), true) + && is_gimple_assign (def) + && gimple_assign_rhs_code (def) == MULT_EXPR) + m_num_reduc[X86_REDUC_FMA] += count; + break; + + /* Vectorizer lane_reducing_op_p supports DOT_PROX_EXPR, + WIDEN_SUM_EXPR and SAD_EXPR, x86 backend only supports + SAD_EXPR (usad{v16qi,v32qi,v64qi}) and DOT_PROD_EXPR. */ + case DOT_PROD_EXPR: + rhs1 = gimple_assign_rhs1 (stmt_info->stmt); + mode_rhs = TYPE_MODE (TREE_TYPE (rhs1)); + if (mode_rhs == QImode) + { + rhs2 = gimple_assign_rhs2 (stmt_info->stmt); + signop signop1_p = TYPE_SIGN (TREE_TYPE (rhs1)); + signop signop2_p = TYPE_SIGN (TREE_TYPE (rhs2)); + + /* vpdpbusd. */ + if (signop1_p != signop2_p) + native_vnni_p + = (GET_MODE_SIZE (mode) == 64 + ? TARGET_AVX512VNNI + : ((TARGET_AVX512VNNI && TARGET_AVX512VL) + || TARGET_AVXVNNI)); + else + /* vpdpbssd. */ + native_vnni_p + = (GET_MODE_SIZE (mode) == 64 + ? TARGET_AVX10_2 + : (TARGET_AVXVNNIINT8 || TARGET_AVX10_2)); + } + m_num_reduc[X86_REDUC_DOT_PROD] += count; + + /* Dislike to do unroll and partial sum for + emulated DOT_PROD_EXPR. */ + if (!native_vnni_p) + m_num_reduc[X86_REDUC_DOT_PROD] += 3 * count; + break; + + case SAD_EXPR: + m_num_reduc[X86_REDUC_SAD] += count; + break; + + default: + break; + } + } + + default: + break; + } + } + + combined_fn cfn; if ((kind == vector_stmt || kind == scalar_stmt) && stmt_info @@ -26161,8 +26299,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (node)))) != INTEGER_CST)) - || (SLP_TREE_MEMORY_ACCESS_TYPE (node) - == VMAT_GATHER_SCATTER))))) + || mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node)))))) { stmt_cost = ix86_default_vector_cost (kind, mode); stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1); @@ -26306,6 +26443,41 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs) && (exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()) > ceil_log2 (LOOP_VINFO_INT_NITERS (loop_vinfo)))) m_costs[vect_body] = INT_MAX; + + bool any_reduc_p = false; + for (int i = 0; i != X86_REDUC_LAST; i++) + if (m_num_reduc[i]) + { + any_reduc_p = true; + break; + } + + if (any_reduc_p + /* Not much gain for loop with gather and scatter. */ + && m_prefer_unroll + && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)) + { + unsigned unroll_factor + = OPTION_SET_P (ix86_vect_unroll_limit) + ? ix86_vect_unroll_limit + : ix86_cost->vect_unroll_limit; + + if (unroll_factor > 1) + { + for (int i = 0 ; i != X86_REDUC_LAST; i++) + { + if (m_num_reduc[i]) + { + unsigned tmp = CEIL (ix86_cost->reduc_lat_mult_thr[i], + m_num_reduc[i]); + unroll_factor = MIN (unroll_factor, tmp); + } + } + + m_suggested_unroll_factor = 1 << ceil_log2 (unroll_factor); + } + } + } ix86_vect_estimate_reg_pressure (); @@ -27189,9 +27361,9 @@ ix86_memtag_can_tag_addresses () return ix86_lam_type != lam_none && TARGET_LP64; } -/* Implement TARGET_MEMTAG_TAG_SIZE. */ +/* Implement TARGET_MEMTAG_TAG_BITSIZE. */ unsigned char -ix86_memtag_tag_size () +ix86_memtag_tag_bitsize () { return IX86_HWASAN_TAG_SIZE; } @@ -28165,8 +28337,8 @@ ix86_libgcc_floating_mode_supported_p #undef TARGET_MEMTAG_UNTAGGED_POINTER #define TARGET_MEMTAG_UNTAGGED_POINTER ix86_memtag_untagged_pointer -#undef TARGET_MEMTAG_TAG_SIZE -#define TARGET_MEMTAG_TAG_SIZE ix86_memtag_tag_size +#undef TARGET_MEMTAG_TAG_BITSIZE +#define TARGET_MEMTAG_TAG_BITSIZE ix86_memtag_tag_bitsize #undef TARGET_GEN_CCMP_FIRST #define TARGET_GEN_CCMP_FIRST ix86_gen_ccmp_first diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 791f3b9e1338..2eb141bab1ad 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -102,6 +102,15 @@ struct stringop_algs #define COSTS_N_BYTES(N) ((N) * 2) #endif + +enum ix86_reduc_unroll_factor{ + X86_REDUC_FMA, + X86_REDUC_DOT_PROD, + X86_REDUC_SAD, + + X86_REDUC_LAST +}; + /* Define the specific costs for a given cpu. NB: hard_register is used by TARGET_REGISTER_MOVE_COST and TARGET_MEMORY_MOVE_COST to compute hard register move costs by register allocator. Relative costs of @@ -225,6 +234,13 @@ struct processor_costs { to number of instructions executed in parallel. See also ix86_reassociation_width. */ + const unsigned reduc_lat_mult_thr[X86_REDUC_LAST]; + /* Latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + const unsigned vect_unroll_limit; /* Limit how much the autovectorizer + may unroll a loop. */ struct stringop_algs *memcpy, *memset; const int cond_taken_branch_cost; /* Cost of taken branch for vectorizer cost model. */ @@ -644,7 +660,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); {"cpu_64", "%{" OPT_ARCH64 ":%{!mtune=*:%{!mcpu=*:%{!march=*:-mtune=%(VALUE)}}}}" }, \ {"arch", "%{!march=*:-march=%(VALUE)}"}, \ {"arch_32", "%{" OPT_ARCH32 ":%{!march=*:-march=%(VALUE)}}"}, \ - {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"}, + {"arch_64", "%{" OPT_ARCH64 ":%{!march=*:-march=%(VALUE)}}"}, \ + {"tls", "%{!mtls-dialect=*:-mtls-dialect=%(VALUE)}"}, /* Specs for the compiler proper */ @@ -2477,9 +2494,9 @@ constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR; constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 - | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4 - | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; + | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT + | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL + | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; constexpr wide_int_bitmask PTA_BDVER2 = PTA_BDVER1 | PTA_BMI | PTA_TBM | PTA_F16C | PTA_FMA; constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT @@ -2487,13 +2504,13 @@ constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT constexpr wide_int_bitmask PTA_BDVER4 = PTA_BDVER3 | PTA_AVX2 | PTA_BMI2 | PTA_RDRND | PTA_MOVBE | PTA_MWAITX; -constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 - | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 - | PTA_F16C | PTA_FMA | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT - | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_MWAITX | PTA_ADX | PTA_RDSEED - | PTA_CLZERO | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SHA | PTA_LZCNT - | PTA_POPCNT; +constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE + | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_POPCNT | PTA_LZCNT + | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL + | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_PRFCHW + | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE + | PTA_MWAITX | PTA_ADX | PTA_RDSEED | PTA_CLZERO | PTA_CLFLUSHOPT + | PTA_XSAVEC | PTA_XSAVES | PTA_SHA; constexpr wide_int_bitmask PTA_ZNVER2 = PTA_ZNVER1 | PTA_CLWB | PTA_RDPID | PTA_WBNOINVD; constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ @@ -2506,19 +2523,19 @@ constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI; constexpr wide_int_bitmask PTA_BTVER1 = PTA_64BIT | PTA_MMX | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16 - | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; + | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_LZCNT | PTA_POPCNT + | PTA_ABM | PTA_CX16 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE; constexpr wide_int_bitmask PTA_BTVER2 = PTA_BTVER1 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_BMI | PTA_F16C | PTA_MOVBE | PTA_XSAVEOPT; constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE - | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 - | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW - | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE - | PTA_ADX | PTA_RDSEED | PTA_POPCNT; + | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_LZCNT | PTA_POPCNT | PTA_ABM + | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI + | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE + | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED; constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2 - | PTA_F16C | PTA_FMA | PTA_SHA | PTA_LZCNT; + | PTA_F16C | PTA_FMA | PTA_SHA; #ifndef GENERATOR_FILE @@ -2865,6 +2882,9 @@ struct GTY(()) machine_function { approximation. */ BOOL_BITFIELD tls_descriptor_call_expanded_p : 1; + /* True if TLS descriptor is called more than once. */ + BOOL_BITFIELD tls_descriptor_call_multiple_p : 1; + /* If true, the current function has a STATIC_CHAIN is placed on the stack below the return address. */ BOOL_BITFIELD static_chain_on_stack : 1; diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 2b0dd66c68b6..cea6c152f2b9 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -901,6 +901,10 @@ (define_attr "avx_partial_xmm_update" "false,true" (const_string "false")) +;; Define attribute to indicate 64-bit TLS insns. +(define_attr "tls64" "gd,ld_base,call,combine,lea,none" + (const_string "none")) + ;; Define attribute to classify add/sub insns that consumes carry flag (CF) (define_attr "use_carry" "0,1" (const_string "0")) @@ -18298,17 +18302,17 @@ (any_rotate:SWI (match_operand:SWI 1 "const_int_operand") (subreg:QI - (and - (match_operand 2 "int248_register_operand") - (match_operand 3 "const_int_operand")) 0)))] + (match_operator 4 "and_operator" + [(match_operand 2 "int248_register_operand") + (match_operand 3 "const_int_operand")]) 0)))] "(INTVAL (operands[3]) & (GET_MODE_BITSIZE (mode) - 1)) == GET_MODE_BITSIZE (mode) - 1" - [(set (match_dup 4) (match_dup 1)) + [(set (match_dup 5) (match_dup 1)) (set (match_dup 0) - (any_rotate:SWI (match_dup 4) + (any_rotate:SWI (match_dup 5) (subreg:QI - (and:SI (match_dup 2) (match_dup 3)) 0)))] - "operands[4] = gen_reg_rtx (mode);") + (match_op_dup 4 [(match_dup 2) (match_dup 3)]) 0)))] + "operands[5] = gen_reg_rtx (mode);") (define_insn_and_split "*3_mask_1" [(set (match_operand:SWI 0 "nonimmediate_operand") @@ -23153,6 +23157,7 @@ return "call\t{*%p2@GOTPCREL(%%rip)|[QWORD PTR %p2@GOTPCREL[rip]]}"; } [(set_attr "type" "multi") + (set_attr "tls64" "gd") (set (attr "length") (symbol_ref "TARGET_X32 ? 15 : 16"))]) @@ -23191,7 +23196,11 @@ UNSPEC_TLS_GD) (clobber (match_operand:P 3 "register_operand"))])] "TARGET_64BIT" - "ix86_tls_descriptor_calls_expanded_in_cfun = true;") +{ + if (ix86_tls_descriptor_calls_expanded_in_cfun) + cfun->machine->tls_descriptor_call_multiple_p = true; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) (define_insn "*tls_local_dynamic_base_32_gnu" [(set (match_operand:SI 0 "register_operand" "=a") @@ -23253,6 +23262,7 @@ return "call\t{*%p1@GOTPCREL(%%rip)|[QWORD PTR %p1@GOTPCREL[rip]]}"; } [(set_attr "type" "multi") + (set_attr "tls64" "ld_base") (set_attr "length" "12")]) (define_insn "*tls_local_dynamic_base_64_largepic" @@ -23286,7 +23296,11 @@ (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE) (clobber (match_operand:P 2 "register_operand"))])] "TARGET_64BIT" - "ix86_tls_descriptor_calls_expanded_in_cfun = true;") +{ + if (ix86_tls_descriptor_calls_expanded_in_cfun) + cfun->machine->tls_descriptor_call_multiple_p = true; + ix86_tls_descriptor_calls_expanded_in_cfun = true; +}) ;; Local dynamic of a single variable is a lose. Show combine how ;; to convert that back to global dynamic. @@ -23480,6 +23494,8 @@ "TARGET_64BIT && TARGET_GNU2_TLS" { operands[2] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0]; + if (ix86_tls_descriptor_calls_expanded_in_cfun) + cfun->machine->tls_descriptor_call_multiple_p = true; ix86_tls_descriptor_calls_expanded_in_cfun = true; }) @@ -23491,6 +23507,7 @@ "lea%z0\t{%E1@TLSDESC(%%rip), %0|%0, %E1@TLSDESC[rip]}" [(set_attr "type" "lea") (set_attr "mode" "") + (set_attr "tls64" "lea") (set_attr "length" "7") (set_attr "length_address" "4")]) @@ -23504,6 +23521,7 @@ "TARGET_64BIT && TARGET_GNU2_TLS" "call\t{*%a1@TLSCALL(%2)|[QWORD PTR [%2+%a1@TLSCALL]]}" [(set_attr "type" "call") + (set_attr "tls64" "call") (set_attr "length" "2") (set_attr "length_address" "0")]) @@ -23525,7 +23543,8 @@ { operands[4] = can_create_pseudo_p () ? gen_reg_rtx (ptr_mode) : operands[0]; emit_insn (gen_tls_dynamic_gnu2_64 (ptr_mode, operands[4], operands[1])); -}) +} + [(set_attr "tls64" "combine")]) (define_split [(match_operand 0 "tls_address_pattern")] diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index c93c0b1bb381..6bda22f48437 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1246,6 +1246,10 @@ munroll-only-small-loops Target Var(ix86_unroll_only_small_loops) Init(0) Optimization Enable conservative small loop unrolling. +-param=ix86-vect-unroll-limit= +Target Joined UInteger Var(ix86_vect_unroll_limit) Init(4) Param +Limit how much the autovectorizer may unroll a loop. + mlam= Target RejectNegative Joined Enum(lam_type) Var(ix86_lam_type) Init(lam_none) -mlam=[none|u48|u57] Instrument meta data position in user data pointers. diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 0f310902e7b4..175798cff69b 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -1714,10 +1714,14 @@ (define_predicate "div_operator" (match_code "div")) -;; Return true if this is a and, ior or xor operation. +;; Return true if this is an and, ior or xor operation. (define_predicate "logic_operator" (match_code "and,ior,xor")) +;; Return true if this is an and operation. +(define_predicate "and_operator" + (match_code "and")) + ;; Return true if this is a plus, minus, and, ior or xor operation. (define_predicate "plusminuslogic_operator" (match_code "plus,minus,and,ior,xor")) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ec74f93731d8..505095040f75 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -326,6 +326,9 @@ (define_mode_iterator VI1_AVX512VL [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")]) +(define_mode_iterator VI1_AVX512_3264 + [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX")]) + ;; All vector modes (define_mode_iterator V [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI @@ -26559,9 +26562,9 @@ ;; XOP packed rotate instructions (define_expand "rotl3" - [(set (match_operand:VI_128 0 "register_operand") - (rotate:VI_128 - (match_operand:VI_128 1 "nonimmediate_operand") + [(set (match_operand:VI248_128 0 "register_operand") + (rotate:VI248_128 + (match_operand:VI248_128 1 "nonimmediate_operand") (match_operand:SI 2 "general_operand")))] "TARGET_XOP" { @@ -26590,9 +26593,9 @@ }) (define_expand "rotr3" - [(set (match_operand:VI_128 0 "register_operand") - (rotatert:VI_128 - (match_operand:VI_128 1 "nonimmediate_operand") + [(set (match_operand:VI248_128 0 "register_operand") + (rotatert:VI248_128 + (match_operand:VI248_128 1 "nonimmediate_operand") (match_operand:SI 2 "general_operand")))] "TARGET_XOP" { @@ -26964,31 +26967,122 @@ int i; if ( != ASHIFT) - { - if (CONST_INT_P (operands[2])) - operands[2] = GEN_INT (-INTVAL (operands[2])); - else - negate = true; - } + { + if (CONST_INT_P (operands[2])) + operands[2] = GEN_INT (-INTVAL (operands[2])); + else + negate = true; + } par = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16)); tmp = lowpart_subreg (QImode, operands[2], SImode); for (i = 0; i < 16; i++) - XVECEXP (par, 0, i) = tmp; + XVECEXP (par, 0, i) = tmp; tmp = gen_reg_rtx (V16QImode); emit_insn (gen_vec_initv16qiqi (tmp, par)); if (negate) - emit_insn (gen_negv16qi2 (tmp, tmp)); + emit_insn (gen_negv16qi2 (tmp, tmp)); gen = ( == LSHIFTRT ? gen_xop_shlv16qi3 : gen_xop_shav16qi3); emit_insn (gen (operands[0], operands[1], tmp)); } + else if (TARGET_GFNI && CONST_INT_P (operands[2]) + && ( == 64 + || !(INTVAL (operands[2]) == 7 && == ASHIFTRT))) + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], + ); + emit_insn (gen_vgf2p8affineqb_ (operands[0], operands[1], matrix, + const0_rtx)); + } else ix86_expand_vecop_qihi (, operands[0], operands[1], operands[2]); DONE; }) +; not generated by vectorizer? +(define_expand "cond_" + [(set (match_operand:VI1_AVX512VL 0 "register_operand") + (vec_merge:VI1_AVX512VL + (any_shift:VI1_AVX512VL + (match_operand:VI1_AVX512VL 2 "register_operand") + (match_operand:VI1_AVX512VL 3 "nonimmediate_or_const_vec_dup_operand")) + (match_operand:VI1_AVX512VL 4 "nonimm_or_0_operand") + (match_operand: 1 "register_operand")))] + "TARGET_GFNI && TARGET_AVX512F" +{ + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], ); + emit_insn (gen_vgf2p8affineqb__mask (operands[0], operands[1], matrix, + const0_rtx, operands[4], + operands[1])); + DONE; +}) + +(define_expand "3" + [(set (match_operand:VI1_AVX512_3264 0 "register_operand") + (any_rotate:VI1_AVX512_3264 + (match_operand:VI1_AVX512_3264 1 "register_operand") + (match_operand:SI 2 "const_int_operand")))] + "TARGET_GFNI" +{ + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], ); + emit_insn (gen_vgf2p8affineqb_ (operands[0], operands[1], matrix, + const0_rtx)); + DONE; +}) + +(define_expand "v16qi3" + [(set (match_operand:V16QI 0 "register_operand") + (any_rotate:V16QI + (match_operand:V16QI 1 "nonimmediate_operand") + (match_operand:SI 2 "general_operand")))] + "TARGET_GFNI || TARGET_XOP" +{ + /* Handle the V16QI XOP case to avoid a conflict with the other expand. */ + if (TARGET_XOP) + { + if (! const_0_to_7_operand (operands[2], SImode)) + { + rtvec vs = rtvec_alloc (16); + rtx par = gen_rtx_PARALLEL (V16QImode, vs); + rtx reg = gen_reg_rtx (V16QImode); + rtx op2 = operands[2]; + int i; + + if (GET_MODE (op2) != QImode) + { + op2 = gen_reg_rtx (QImode); + convert_move (op2, operands[2], false); + } + + for (i = 0; i < 16; i++) + RTVEC_ELT (vs, i) = op2; + + emit_insn (gen_vec_initv16qiqi (reg, par)); + if ( == ROTATERT) + { + rtx neg = gen_reg_rtx (V16QImode); + emit_insn (gen_negv16qi2 (neg, reg)); + emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], neg)); + reg = neg; + } + emit_insn (gen_xop_vrotlv16qi3 (operands[0], operands[1], reg)); + DONE; + } + } + else if (TARGET_GFNI && CONST_INT_P (operands[2])) + { + rtx matrix = ix86_vgf2p8affine_shift_matrix (operands[0], operands[2], ); + emit_insn (gen_vgf2p8affineqb_v16qi (operands[0], + force_reg (V16QImode, operands[1]), + matrix, const0_rtx)); + DONE; + } + else + FAIL; +}) + (define_expand "ashrv2di3" [(set (match_operand:V2DI 0 "register_operand") (ashiftrt:V2DI diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h index c8603b982af4..1649ea2fe3e5 100644 --- a/gcc/config/i386/x86-tune-costs.h +++ b/gcc/config/i386/x86-tune-costs.h @@ -141,6 +141,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */ COSTS_N_BYTES (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ ix86_size_memcpy, ix86_size_memset, COSTS_N_BYTES (1), /* cond_taken_branch_cost. */ @@ -261,6 +267,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ i386_memcpy, i386_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -382,6 +394,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */ COSTS_N_INSNS (27), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (27), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ i486_memcpy, i486_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -501,6 +519,12 @@ struct processor_costs pentium_cost = { COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ pentium_memcpy, pentium_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -613,6 +637,12 @@ struct processor_costs lakemont_cost = { COSTS_N_INSNS (5), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ pentium_memcpy, pentium_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -740,6 +770,12 @@ struct processor_costs pentiumpro_cost = { COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ pentiumpro_memcpy, pentiumpro_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -858,6 +894,12 @@ struct processor_costs geode_cost = { COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ geode_memcpy, geode_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -979,6 +1021,12 @@ struct processor_costs k6_cost = { COSTS_N_INSNS (2), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (2), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ k6_memcpy, k6_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -1101,6 +1149,12 @@ struct processor_costs athlon_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ athlon_memcpy, athlon_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -1232,6 +1286,12 @@ struct processor_costs k8_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (5), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ k8_memcpy, k8_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -1371,6 +1431,12 @@ struct processor_costs amdfam10_cost = { COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ amdfam10_memcpy, amdfam10_memset, COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ @@ -1503,6 +1569,12 @@ const struct processor_costs bdver_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ bdver_memcpy, bdver_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -1668,6 +1740,12 @@ struct processor_costs znver1_cost = { plus/minus operations per cycle but only one multiply. This is adjusted in ix86_reassociation_width. */ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + {5, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ znver1_memcpy, znver1_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -1836,6 +1914,12 @@ struct processor_costs znver2_cost = { plus/minus operations per cycle but only one multiply. This is adjusted in ix86_reassociation_width. */ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + {10, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ znver2_memcpy, znver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -1979,6 +2063,12 @@ struct processor_costs znver3_cost = { plus/minus operations per cycle but only one multiply. This is adjusted in ix86_reassociation_width. */ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 6}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ znver2_memcpy, znver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -2125,6 +2215,12 @@ struct processor_costs znver4_cost = { plus/minus operations per cycle but only one multiply. This is adjusted in ix86_reassociation_width. */ 4, 4, 3, 6, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 8, 6}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ znver2_memcpy, znver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -2287,6 +2383,12 @@ struct processor_costs znver5_cost = { We increase width to 6 for multiplications in ix86_reassociation_width. */ 6, 6, 4, 6, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 8, 6}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ znver2_memcpy, znver2_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -2422,6 +2524,12 @@ struct processor_costs skylake_cost = { COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ skylake_memcpy, skylake_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -2559,6 +2667,12 @@ struct processor_costs icelake_cost = { COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 10, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ icelake_memcpy, icelake_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -2690,6 +2804,12 @@ struct processor_costs alderlake_cost = { COSTS_N_INSNS (7), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (6), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 8, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ alderlake_memcpy, alderlake_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -2814,6 +2934,12 @@ const struct processor_costs btver1_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ btver1_memcpy, btver1_memset, COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ @@ -2935,6 +3061,12 @@ const struct processor_costs btver2_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ btver2_memcpy, btver2_memset, COSTS_N_INSNS (2), /* cond_taken_branch_cost. */ @@ -3055,6 +3187,12 @@ struct processor_costs pentium4_cost = { COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ pentium4_memcpy, pentium4_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -3178,6 +3316,12 @@ struct processor_costs nocona_cost = { COSTS_N_INSNS (12), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (8), /* cost of CVT(T)PS2PI instruction. */ 1, 1, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {1, 1, 1}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ nocona_memcpy, nocona_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -3299,6 +3443,12 @@ struct processor_costs atom_cost = { COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 2, 2, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 8, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 2, /* Limit how much the autovectorizer + may unroll a loop. */ atom_memcpy, atom_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -3420,6 +3570,12 @@ struct processor_costs slm_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 2, 1, 1, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 8, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ slm_memcpy, slm_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -3555,6 +3711,12 @@ struct processor_costs tremont_cost = { COSTS_N_INSNS (4), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (4), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ tremont_memcpy, tremont_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -3681,6 +3843,12 @@ struct processor_costs lujiazui_cost = { COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ lujiazui_memcpy, lujiazui_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -3805,6 +3973,12 @@ struct processor_costs yongfeng_cost = { COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ yongfeng_memcpy, yongfeng_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -3929,6 +4103,12 @@ struct processor_costs shijidadao_cost = { COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ shijidadao_memcpy, shijidadao_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ @@ -4078,6 +4258,12 @@ struct processor_costs generic_cost = { COSTS_N_INSNS (3), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (3), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 3, 3, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 8, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 4, /* Limit how much the autovectorizer + may unroll a loop. */ generic_memcpy, generic_memset, COSTS_N_INSNS (4), /* cond_taken_branch_cost. */ @@ -4215,6 +4401,12 @@ struct processor_costs core_cost = { COSTS_N_INSNS (6), /* cost of CVTPI2PS instruction. */ COSTS_N_INSNS (7), /* cost of CVT(T)PS2PI instruction. */ 1, 4, 2, 2, /* reassoc int, fp, vec_int, vec_fp. */ + {8, 1, 3}, /* latency times throughput of + FMA/DOT_PROD_EXPR/SAD_EXPR, + it's used to determine unroll + factor in the vectorizer. */ + 1, /* Limit how much the autovectorizer + may unroll a loop. */ core_memcpy, core_memset, COSTS_N_INSNS (3), /* cond_taken_branch_cost. */ diff --git a/gcc/config/loongarch/genopts/isa-evolution.in b/gcc/config/loongarch/genopts/isa-evolution.in index 50f72d5a0bcc..836d93a00382 100644 --- a/gcc/config/loongarch/genopts/isa-evolution.in +++ b/gcc/config/loongarch/genopts/isa-evolution.in @@ -2,4 +2,5 @@ 2 26 div32 1.1 Support div.w[u] and mod.w[u] instructions with inputs not sign-extended. 2 27 lam-bh 1.1 Support am{swap/add}[_db].{b/h} instructions. 2 28 lamcas 1.1 Support amcas[_db].{b/h/w/d} instructions. +2 30 scq 1.1 Support sc.q instruction. 3 23 ld-seq-sa 1.1 Do not need load-load barriers (dbar 0x700). diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc index 04b277eb7425..dcd8d905c5fd 100644 --- a/gcc/config/loongarch/loongarch-def.cc +++ b/gcc/config/loongarch/loongarch-def.cc @@ -72,7 +72,7 @@ array_arch loongarch_cpu_default_isa = .simd_ (ISA_EXT_SIMD_LASX) .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS - | OPTION_MASK_ISA_FRECIPE)) + | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ)) .set (ARCH_LA64V1_0, loongarch_isa () .base_ (ISA_BASE_LA64) @@ -86,7 +86,7 @@ array_arch loongarch_cpu_default_isa = .simd_ (ISA_EXT_SIMD_LSX) .evolution_ (OPTION_MASK_ISA_DIV32 | OPTION_MASK_ISA_LD_SEQ_SA | OPTION_MASK_ISA_LAM_BH | OPTION_MASK_ISA_LAMCAS - | OPTION_MASK_ISA_FRECIPE)); + | OPTION_MASK_ISA_FRECIPE | OPTION_MASK_ISA_SCQ)); static inline loongarch_cache la464_cache () diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h index 0bcd2a70121b..0a7d0c923fef 100644 --- a/gcc/config/loongarch/loongarch-def.h +++ b/gcc/config/loongarch/loongarch-def.h @@ -78,12 +78,10 @@ extern loongarch_def_array /* Base ABI */ -enum { - ABI_BASE_LP64D = 0, - ABI_BASE_LP64F = 1, - ABI_BASE_LP64S = 2, - N_ABI_BASE_TYPES = 3 -}; +#define ABI_BASE_LP64D 0 +#define ABI_BASE_LP64F 1 +#define ABI_BASE_LP64S 2 +#define N_ABI_BASE_TYPES 3 extern loongarch_def_array loongarch_abi_base_strings; diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc index de68624f9493..a92a6455df6e 100644 --- a/gcc/config/loongarch/loongarch-evolution.cc +++ b/gcc/config/loongarch/loongarch-evolution.cc @@ -32,6 +32,7 @@ int la_evo_feature_masks[] = { OPTION_MASK_ISA_DIV32, OPTION_MASK_ISA_LAM_BH, OPTION_MASK_ISA_LAMCAS, + OPTION_MASK_ISA_SCQ, OPTION_MASK_ISA_LD_SEQ_SA, }; @@ -40,6 +41,7 @@ const char* la_evo_macro_name[] = { "__loongarch_div32", "__loongarch_lam_bh", "__loongarch_lamcas", + "__loongarch_scq", "__loongarch_ld_seq_sa", }; @@ -48,6 +50,7 @@ int la_evo_version_major[] = { 1, /* DIV32 */ 1, /* LAM_BH */ 1, /* LAMCAS */ + 1, /* SCQ */ 1, /* LD_SEQ_SA */ }; @@ -56,5 +59,6 @@ int la_evo_version_minor[] = { 1, /* DIV32 */ 1, /* LAM_BH */ 1, /* LAMCAS */ + 1, /* SCQ */ 1, /* LD_SEQ_SA */ }; diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h index 5f908394c22c..7fb7b0d3d860 100644 --- a/gcc/config/loongarch/loongarch-evolution.h +++ b/gcc/config/loongarch/loongarch-evolution.h @@ -36,6 +36,7 @@ static constexpr struct { { 2, 1u << 26, OPTION_MASK_ISA_DIV32 }, { 2, 1u << 27, OPTION_MASK_ISA_LAM_BH }, { 2, 1u << 28, OPTION_MASK_ISA_LAMCAS }, + { 2, 1u << 30, OPTION_MASK_ISA_SCQ }, { 3, 1u << 23, OPTION_MASK_ISA_LD_SEQ_SA }, }; @@ -58,8 +59,9 @@ enum { EVO_DIV32 = 1, EVO_LAM_BH = 2, EVO_LAMCAS = 3, - EVO_LD_SEQ_SA = 4, - N_EVO_FEATURES = 5 + EVO_SCQ = 4, + EVO_LD_SEQ_SA = 5, + N_EVO_FEATURES = 6 }; /* Condition macros */ @@ -71,6 +73,8 @@ enum { (la_target.isa.evolution & OPTION_MASK_ISA_LAM_BH) #define ISA_HAS_LAMCAS \ (la_target.isa.evolution & OPTION_MASK_ISA_LAMCAS) +#define ISA_HAS_SCQ \ + (la_target.isa.evolution & OPTION_MASK_ISA_SCQ) #define ISA_HAS_LD_SEQ_SA \ (la_target.isa.evolution & OPTION_MASK_ISA_LD_SEQ_SA) diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h index 1546ea394435..583cce8643e3 100644 --- a/gcc/config/loongarch/loongarch-str.h +++ b/gcc/config/loongarch/loongarch-str.h @@ -70,6 +70,7 @@ along with GCC; see the file COPYING3. If not see #define OPTSTR_DIV32 "div32" #define OPTSTR_LAM_BH "lam-bh" #define OPTSTR_LAMCAS "lamcas" +#define OPTSTR_SCQ "scq" #define OPTSTR_LD_SEQ_SA "ld-seq-sa" #endif /* LOONGARCH_STR_H */ diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index 493f95e16191..0935d7ba0925 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -4388,6 +4388,7 @@ loongarch_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind, break; } else if (TARGET_RECIP_VEC_DIV + && vectype && gimple_code (stmt_info->stmt) == GIMPLE_ASSIGN) { machine_mode mode = TYPE_MODE (vectype); @@ -6221,9 +6222,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part, 'Q' Print R_LARCH_RELAX for TLS IE. 'r' Print address 12-31bit relocation associated with OP. 'R' Print address 32-51bit relocation associated with OP. - 'T' Print 'f' for (eq:CC ...), 't' for (ne:CC ...), - 'z' for (eq:?I ...), 'n' for (ne:?I ...). - 't' Like 'T', but with the EQ/NE cases reversed + 'T' Print a comment marker if %G outputs nothing. + 't' Print the register containing the higher 64 bits of a TImode. 'u' Print a LASX register. 'v' Print the insn size suffix b, h, w or d for vector modes V16QI, V8HI, V4SI, V2SI, and w, d for vector modes V4SF, V2DF respectively. @@ -6306,6 +6306,13 @@ loongarch_print_operand (FILE *file, rtx op, int letter) fputs ("dbar\t0x700", file); break; + case 'T': + if (!loongarch_cas_failure_memorder_needs_acquire ( + memmodel_from_int (INTVAL (op))) + && ISA_HAS_LD_SEQ_SA) + fprintf (file, "%s", ASM_COMMENT_START); + break; + case 'h': if (code == HIGH) op = XEXP (op, 0); @@ -6384,14 +6391,6 @@ loongarch_print_operand (FILE *file, rtx op, int letter) false /* lo_reloc */); break; - case 't': - case 'T': - { - int truth = (code == NE) == (letter == 'T'); - fputc ("zfnt"[truth * 2 + FCC_REG_P (REGNO (XEXP (op, 0)))], file); - } - break; - case 'V': if (CONST_VECTOR_P (op)) { @@ -6495,6 +6494,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter) } break; + case 't': + if (GET_MODE (op) != TImode + || (op != CONST0_RTX (TImode) && code != REG)) + { + output_operand_lossage ("invalid use of '%%%c'", letter); + break; + } + op = loongarch_subword (op, 1); + letter = 'z'; + /* fall through */ default: switch (code) { @@ -10786,9 +10795,9 @@ loongarch_expand_vec_cmp (rtx operands[]) to a fixed type. */ static machine_mode -loongarch_promote_function_mode (const_tree type ATTRIBUTE_UNUSED, +loongarch_promote_function_mode (const_tree type, machine_mode mode, - int *punsignedp ATTRIBUTE_UNUSED, + int *punsignedp, const_tree fntype ATTRIBUTE_UNUSED, int for_return ATTRIBUTE_UNUSED) { @@ -11154,6 +11163,46 @@ loongarch_c_mode_for_suffix (char suffix) return VOIDmode; } +/* Implement TARGET_C_BITINT_TYPE_INFO. + Return true if _BitInt(N) is supported and fill its details into *INFO. */ +bool +loongarch_bitint_type_info (int n, struct bitint_info *info) +{ + if (n <= 8) + info->limb_mode = QImode; + else if (n <= 16) + info->limb_mode = HImode; + else if (n <= 32) + info->limb_mode = SImode; + else if (n <= 64) + info->limb_mode = DImode; + else if (n <= 128) + info->limb_mode = TImode; + else + info->limb_mode = DImode; + + info->abi_limb_mode = info->limb_mode; + + if (n > 64) + info->abi_limb_mode = TImode; + + info->big_endian = false; + info->extended = true; + return true; +} + +/* Implement TARGET_COMPUTE_PRESSURE_CLASSES. */ + +static int +loongarch_compute_pressure_classes (reg_class *classes) +{ + int i = 0; + classes[i++] = GENERAL_REGS; + classes[i++] = FP_REGS; + classes[i++] = FCC_REGS; + return i; +} + /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" @@ -11428,6 +11477,12 @@ loongarch_c_mode_for_suffix (char suffix) #undef TARGET_C_MODE_FOR_SUFFIX #define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix +#undef TARGET_C_BITINT_TYPE_INFO +#define TARGET_C_BITINT_TYPE_INFO loongarch_bitint_type_info + +#undef TARGET_COMPUTE_PRESSURE_CLASSES +#define TARGET_COMPUTE_PRESSURE_CLASSES loongarch_compute_pressure_classes + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-loongarch.h" diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h index 5fc86652f5f8..e8819bf14801 100644 --- a/gcc/config/loongarch/loongarch.h +++ b/gcc/config/loongarch/loongarch.h @@ -270,7 +270,9 @@ along with GCC; see the file COPYING3. If not see if (GET_MODE_CLASS (MODE) == MODE_INT \ && GET_MODE_SIZE (MODE) < UNITS_PER_WORD) \ { \ - if ((MODE) == SImode) \ + if ((MODE) == SImode \ + && !(TYPE && TREE_CODE (TYPE) == BITINT_TYPE \ + && TYPE_PRECISION (TYPE) < 32)) \ (UNSIGNEDP) = 0; \ (MODE) = Pmode; \ } diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt index 4d85cf5a8045..fbe61c0bf7c2 100644 --- a/gcc/config/loongarch/loongarch.opt +++ b/gcc/config/loongarch/loongarch.opt @@ -334,6 +334,10 @@ mlamcas Target Mask(ISA_LAMCAS) Var(la_isa_evolution) Support amcas[_db].{b/h/w/d} instructions. +mscq +Target Mask(ISA_SCQ) Var(la_isa_evolution) +Support sc.q instruction. + mld-seq-sa Target Mask(ISA_LD_SEQ_SA) Var(la_isa_evolution) Do not need load-load barriers (dbar 0x700). diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls index 5f644f6c3152..606a211f3223 100644 --- a/gcc/config/loongarch/loongarch.opt.urls +++ b/gcc/config/loongarch/loongarch.opt.urls @@ -90,6 +90,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mlam-bh) mlamcas UrlSuffix(gcc/LoongArch-Options.html#index-mlamcas) +mscq +UrlSuffix(gcc/LoongArch-Options.html#index-mscq) + mld-seq-sa UrlSuffix(gcc/LoongArch-Options.html#index-mld-seq-sa) diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md index dd17cd13fc52..4156b269f9ad 100644 --- a/gcc/config/loongarch/simd.md +++ b/gcc/config/loongarch/simd.md @@ -773,7 +773,7 @@ (vec_select: (match_operand:IVEC 2 "register_operand" "f") (match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half"))) - (any_extend: + (any_extend: (vec_select: (match_operand:IVEC 3 "register_operand" "f") (match_dup 4)))) diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md index fd8d732dd675..2ee400e23815 100644 --- a/gcc/config/loongarch/sync.md +++ b/gcc/config/loongarch/sync.md @@ -21,25 +21,25 @@ (define_c_enum "unspec" [ UNSPEC_COMPARE_AND_SWAP + UNSPEC_COMPARE_AND_SWAP_AMCAS UNSPEC_COMPARE_AND_SWAP_ADD UNSPEC_COMPARE_AND_SWAP_SUB - UNSPEC_COMPARE_AND_SWAP_AND - UNSPEC_COMPARE_AND_SWAP_XOR - UNSPEC_COMPARE_AND_SWAP_OR UNSPEC_COMPARE_AND_SWAP_NAND UNSPEC_SYNC_OLD_OP UNSPEC_SYNC_EXCHANGE UNSPEC_ATOMIC_STORE UNSPEC_ATOMIC_LOAD UNSPEC_MEMORY_BARRIER + + UNSPEC_TI_FETCH_ADD + UNSPEC_TI_FETCH_SUB + UNSPEC_TI_FETCH_AND + UNSPEC_TI_FETCH_XOR + UNSPEC_TI_FETCH_OR + UNSPEC_TI_FETCH_NAND_MASK_INVERTED ]) (define_code_iterator any_atomic [plus ior xor and]) -(define_code_attr atomic_optab - [(plus "add") (ior "or") (xor "xor") (and "and")]) - -;; This attribute gives the format suffix for atomic memory operations. -(define_mode_attr amo [(QI "b") (HI "h") (SI "w") (DI "d")]) ;; expands to the name of the atomic operand that implements a ;; particular code. @@ -107,7 +107,7 @@ (define_insn "atomic_load" [(set (match_operand:QHWD 0 "register_operand" "=r") (unspec_volatile:QHWD - [(match_operand:QHWD 1 "memory_operand" "+m") + [(match_operand:QHWD 1 "memory_operand" "m") (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_ATOMIC_LOAD))] "" @@ -142,9 +142,50 @@ } [(set (attr "length") (const_int 12))]) +(define_insn "atomic_loadti_lsx" + [(set (match_operand:V2DI 0 "register_operand" "=f") + (unspec_volatile:V2DI + [(match_operand:TI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_ATOMIC_LOAD))] + "ISA_HAS_LSX && TARGET_64BIT" +{ + enum memmodel model = memmodel_base (INTVAL (operands[2])); + + switch (model) + { + case MEMMODEL_SEQ_CST: + output_asm_insn ("dbar\t0x11", operands); + /* fall through */ + case MEMMODEL_ACQUIRE: + case MEMMODEL_RELAXED: + return "vld\t%w0,%1\\n\\t%G2"; + + default: + gcc_unreachable (); + } +} + [(set (attr "length") (const_int 12))]) + +(define_expand "atomic_loadti" + [(match_operand:TI 0 "register_operand" "=r") + (match_operand:TI 1 "memory_operand" "m") + (match_operand:SI 2 "const_int_operand")] + "ISA_HAS_LSX && TARGET_64BIT" +{ + rtx vr = gen_reg_rtx (V2DImode); + + emit_insn (gen_atomic_loadti_lsx (vr, operands[1], operands[2])); + for (int i = 0; i < 2; i++) + emit_insn ( + gen_lsx_vpickve2gr_d (loongarch_subword (operands[0], i), vr, + GEN_INT (i))); + DONE; +}) + ;; Implement atomic stores with amoswap. Fall back to fences for atomic loads. (define_insn "atomic_store" - [(set (match_operand:QHWD 0 "memory_operand" "+m") + [(set (match_operand:QHWD 0 "memory_operand" "=m") (unspec_volatile:QHWD [(match_operand:QHWD 1 "reg_or_0_operand" "rJ") (match_operand:SI 2 "const_int_operand")] ;; model @@ -175,7 +216,67 @@ } [(set (attr "length") (const_int 12))]) -(define_insn "atomic_" +(define_insn "atomic_storeti_lsx" + [(set (match_operand:TI 0 "memory_operand" "=m") + (unspec_volatile:TI + [(match_operand:V2DI 1 "register_operand" "f") + (match_operand:SI 2 "const_int_operand")] ;; model + UNSPEC_ATOMIC_STORE))] + "ISA_HAS_LSX && TARGET_64BIT" +{ + enum memmodel model = memmodel_base (INTVAL (operands[2])); + + switch (model) + { + case MEMMODEL_SEQ_CST: + return "dbar\t0x12\\n\\t" + "vst\t%w1,%0\\n\\t" + "dbar\t0x18"; + case MEMMODEL_RELEASE: + return "dbar\t0x12\\n\\t" + "vst\t%w1,%0"; + case MEMMODEL_RELAXED: + return "vst\t%w1,%0"; + default: + gcc_unreachable (); + } +} + [(set (attr "length") (const_int 12))]) + +(define_insn "atomic_storeti_scq" + [(set (match_operand:TI 0 "memory_operand" "=m") + (unspec_volatile:TI + [(match_operand:TI 1 "register_operand" "r")] + UNSPEC_ATOMIC_STORE)) + (clobber (match_scratch:DI 2 "=&r"))] + "TARGET_64BIT && ISA_HAS_SCQ" + "1:\\n\\tll.d\t$r0,%0\n\tmove\t%2,%1\n\tsc.q\t%2,%t1,%0\n\tbeqz\t%2,1b" + [(set (attr "length") (const_int 16))]) + +(define_expand "atomic_storeti" + [(match_operand:TI 0 "memory_operand" "=m") + (match_operand:TI 1 "reg_or_0_operand" "rJ") + (match_operand:SI 2 "const_int_operand")] + "TARGET_64BIT && (ISA_HAS_LSX || ISA_HAS_SCQ)" +{ + if (!ISA_HAS_LSX) + { + emit_insn (gen_atomic_storeti_scq (operands[0], operands[1])); + DONE; + } + + rtx vr = gen_reg_rtx (V2DImode), op1 = operands[1]; + rtvec v = rtvec_alloc (2); + + for (int i = 0; i < 2; i++) + RTVEC_ELT (v, i) = loongarch_subword (op1, i); + + emit_insn (gen_vec_initv2didi (vr, gen_rtx_PARALLEL (V2DImode, v))); + emit_insn (gen_atomic_storeti_lsx (operands[0], vr, operands[2])); + DONE; +}) + +(define_insn "atomic_" [(set (match_operand:GPR 0 "memory_operand" "+ZB") (unspec_volatile:GPR [(any_atomic:GPR (match_dup 0) @@ -183,7 +284,7 @@ (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "" - "am%A2.\t$zero,%z1,%0" + "am%A2.\t$zero,%z1,%0" [(set (attr "length") (const_int 4))]) (define_insn "atomic_add" @@ -194,10 +295,10 @@ (match_operand:SI 2 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "ISA_HAS_LAM_BH" - "amadd%A2.\t$zero,%z1,%0" + "amadd%A2.\t$zero,%z1,%0" [(set (attr "length") (const_int 4))]) -(define_insn "atomic_fetch_" +(define_insn "atomic_fetch_" [(set (match_operand:GPR 0 "register_operand" "=&r") (match_operand:GPR 1 "memory_operand" "+ZB")) (set (match_dup 1) @@ -207,9 +308,52 @@ (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "" - "am%A3.\t%0,%z2,%1" + "am%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) +(define_insn "atomic_fetch_nand_mask_inverted" + [(set (match_operand:GPR 0 "register_operand" "=&r") + (match_operand:GPR 1 "memory_operand" "+ZC")) + (set (match_dup 1) + (unspec_volatile:GPR + [(ior:GPR (not (match_dup 1)) + (match_operand:GPR 2 "register_operand" "r"))] + UNSPEC_SYNC_OLD_OP)) + (clobber (match_scratch:GPR 3 "=&r"))] + "" + { + return "1:\\n\\t" + "ll.\\t%0,%1\\n\\t" + "orn\\t%3,%2,%0\\n\\t" + "sc.\\t%3,%1\\n\\t" + "beqz\\t%3,1b"; + } + [(set (attr "length") (const_int 16))]) + +(define_mode_iterator ALL_SC [GPR (TI "TARGET_64BIT && ISA_HAS_SCQ")]) +(define_mode_attr _scq [(SI "") (DI "") (TI "_scq")]) +(define_expand "atomic_fetch_nand" + [(match_operand:ALL_SC 0 "register_operand") + (match_operand:ALL_SC 1 "memory_operand") + (match_operand:ALL_SC 2 "reg_or_0_operand") + (match_operand:SI 3 "const_int_operand")] + "" + { + /* ~(atom & mask) = (~mask) | (~atom), so we can hoist + (~mask) out of the ll/sc loop and use the orn instruction in the + ll/sc loop. */ + rtx inverted_mask = gen_reg_rtx (mode); + emit_move_insn (inverted_mask, + expand_simple_unop (mode, NOT, operands[2], + NULL_RTX, false)); + + emit_insn ( + gen_atomic_fetch_nand_mask_inverted<_scq> (operands[0], + operands[1], + inverted_mask)); + DONE; + }) + (define_insn "atomic_exchange" [(set (match_operand:GPR 0 "register_operand" "=&r") (unspec_volatile:GPR @@ -219,9 +363,44 @@ (set (match_dup 1) (match_operand:GPR 2 "register_operand" "r"))] "" - "amswap%A3.\t%0,%z2,%1" + "amswap%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) +(define_insn "atomic_exchangeti_scq" + [(set (match_operand:TI 0 "register_operand" "=&r") + (unspec_volatile:TI + [(match_operand:TI 1 "memory_operand" "+ZB")] + UNSPEC_SYNC_EXCHANGE)) + (set (match_dup 1) + (match_operand:TI 2 "register_operand" "rJ")) + (clobber (match_scratch:DI 3 "=&r"))] + "TARGET_64BIT && ISA_HAS_SCQ" +{ + output_asm_insn ("1:", operands); + output_asm_insn ("ll.d\t%0,%1", operands); + if (!ISA_HAS_LD_SEQ_SA) + output_asm_insn ("dbar\t0x700", operands); + output_asm_insn ("ld.d\t%t0,%b1,8", operands); + output_asm_insn ("move\t%3,%z2", operands); + output_asm_insn ("sc.q\t%3,%t2,%1", operands); + output_asm_insn ("beqz\t%3,1b", operands); + + return ""; +} + [(set (attr "length") (const_int 24))]) + +(define_expand "atomic_exchangeti" + [(match_operand:TI 0 "register_operand" "=&r") + (match_operand:TI 1 "memory_operand" "+ZB") + (match_operand:TI 2 "register_operand" "rJ") + (match_operand:SI 3 "const_int_operand")] ;; model + "TARGET_64BIT && ISA_HAS_SCQ" +{ + emit_insn (gen_atomic_exchangeti_scq (operands[0], operands[1], + operands[2])); + DONE; +}) + (define_insn "atomic_exchange_short" [(set (match_operand:SHORT 0 "register_operand" "=&r") (unspec_volatile:SHORT @@ -231,7 +410,7 @@ (set (match_dup 1) (match_operand:SHORT 2 "register_operand" "r"))] "ISA_HAS_LAM_BH" - "amswap%A3.\t%0,%z2,%1" + "amswap%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) (define_insn "atomic_cas_value_strong" @@ -240,13 +419,13 @@ (set (match_dup 1) (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") (match_operand:GPR 3 "reg_or_0_operand" "rJ") - (match_operand:SI 4 "const_int_operand")] ;; mod_s + (match_operand:SI 4 "const_int_operand")] ;; mod_f UNSPEC_COMPARE_AND_SWAP)) (clobber (match_scratch:GPR 5 "=&r"))] "" { output_asm_insn ("1:", operands); - output_asm_insn ("ll.\t%0,%1", operands); + output_asm_insn ("ll.\t%0,%1", operands); /* Like the test case atomic-cas-int.C, in loongarch64, O1 and higher, the return value of the val_without_const_folding will not be truncated and @@ -266,9 +445,9 @@ output_asm_insn ("bne\t%0,%z2,2f", operands); output_asm_insn ("or%i3\t%5,$zero,%3", operands); - output_asm_insn ("sc.\t%5,%1", operands); + output_asm_insn ("sc.\t%5,%1", operands); output_asm_insn ("beqz\t%5,1b", operands); - output_asm_insn ("b\t3f", operands); + output_asm_insn ("%T4b\t3f", operands); output_asm_insn ("2:", operands); output_asm_insn ("%G4", operands); output_asm_insn ("3:", operands); @@ -288,10 +467,10 @@ (set (match_dup 1) (unspec_volatile:QHWD [(match_operand:QHWD 2 "reg_or_0_operand" "rJ") (match_operand:QHWD 3 "reg_or_0_operand" "rJ") - (match_operand:SI 4 "const_int_operand")] ;; mod_s - UNSPEC_COMPARE_AND_SWAP))] + (match_operand:SI 4 "const_int_operand")] ;; mod + UNSPEC_COMPARE_AND_SWAP_AMCAS))] "ISA_HAS_LAMCAS" - "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" + "ori\t%0,%z2,0\n\tamcas%A4.\t%0,%z3,%1" [(set (attr "length") (const_int 8))]) (define_expand "atomic_compare_and_swap" @@ -318,16 +497,14 @@ && is_mm_release (memmodel_base (INTVAL (mod_s)))) mod_s = GEN_INT (MEMMODEL_ACQ_REL); - operands[6] = mod_s; - if (ISA_HAS_LAMCAS) emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], operands[3], operands[4], - operands[6])); + mod_s)); else emit_insn (gen_atomic_cas_value_strong (operands[1], operands[2], operands[3], operands[4], - operands[6])); + mod_f)); rtx compare = operands[1]; if (operands[3] != const0_rtx) @@ -349,49 +526,74 @@ DONE; }) -(define_expand "atomic_test_and_set" - [(match_operand:QI 0 "register_operand" "") ;; bool output - (match_operand:QI 1 "memory_operand" "+ZB") ;; memory - (match_operand:SI 2 "const_int_operand" "")] ;; model +(define_expand "atomic_fetch_" + [(match_operand:SHORT 0 "register_operand" "") ;; output + (any_bitwise (match_operand:SHORT 1 "memory_operand" "+ZB") ;; memory + (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) ;; val + (match_operand:SI 3 "const_int_operand" "")] ;; model "" { - /* We have no QImode atomics, so use the address LSBs to form a mask, - then use an aligned SImode atomic. */ + /* We have no QI/HImode bitwise atomics, so use the address LSBs to form + a mask, then use an aligned SImode atomic. */ rtx result = operands[0]; rtx mem = operands[1]; - rtx model = operands[2]; + rtx model = operands[3]; rtx addr = force_reg (Pmode, XEXP (mem, 0)); - rtx tmp_reg = gen_reg_rtx (Pmode); - rtx zero_reg = gen_rtx_REG (Pmode, 0); - + rtx mask = gen_int_mode (-4, Pmode); rtx aligned_addr = gen_reg_rtx (Pmode); - emit_move_insn (tmp_reg, gen_rtx_PLUS (Pmode, zero_reg, GEN_INT (-4))); - emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, tmp_reg)); + + if (!and_operand (mask, Pmode)) + mask = force_reg (Pmode, mask); + + emit_move_insn (aligned_addr, gen_rtx_AND (Pmode, addr, mask)); rtx aligned_mem = change_address (mem, SImode, aligned_addr); set_mem_alias_set (aligned_mem, 0); - rtx offset = gen_reg_rtx (SImode); - emit_move_insn (offset, gen_rtx_AND (SImode, gen_lowpart (SImode, addr), - GEN_INT (3))); - rtx tmp = gen_reg_rtx (SImode); - emit_move_insn (tmp, GEN_INT (1)); + emit_move_insn (tmp, simplify_gen_unary (ZERO_EXTEND, SImode, + operands[2], mode)); + /* Note that we have defined SHIFT_COUNT_TRUNCATED to 1, so we don't need + to mask addr with 0b11 here. */ rtx shmt = gen_reg_rtx (SImode); - emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, offset, GEN_INT (3))); + emit_move_insn (shmt, gen_rtx_ASHIFT (SImode, gen_lowpart (SImode, addr), + GEN_INT (3))); rtx word = gen_reg_rtx (SImode); emit_move_insn (word, gen_rtx_ASHIFT (SImode, tmp, shmt)); + if () + { + /* word = word | ~(mode_mask << shmt) */ + rtx tmp = force_reg (SImode, + gen_int_mode (GET_MODE_MASK (mode), + SImode)); + emit_move_insn (tmp, gen_rtx_ASHIFT (SImode, tmp, shmt)); + emit_move_insn (word, gen_rtx_IOR (SImode, gen_rtx_NOT (SImode, tmp), + word)); + } + tmp = gen_reg_rtx (SImode); - emit_insn (gen_atomic_fetch_orsi (tmp, aligned_mem, word, model)); + emit_insn (gen_atomic_fetch_si (tmp, aligned_mem, word, model)); emit_move_insn (gen_lowpart (SImode, result), gen_rtx_LSHIFTRT (SImode, tmp, shmt)); DONE; }) +(define_expand "atomic_test_and_set" + [(match_operand:QI 0 "register_operand" "") ;; bool output + (match_operand:QI 1 "memory_operand" "+ZB") ;; memory + (match_operand:SI 2 "const_int_operand" "")] ;; model + "" +{ + rtx one = force_reg (QImode, gen_int_mode (1, QImode)); + emit_insn (gen_atomic_fetch_orqi (operands[0], operands[1], one, + operands[2])); + DONE; +}) + (define_insn "atomic_cas_value_cmp_and_7_" [(set (match_operand:GPR 0 "register_operand" "=&r") (match_operand:GPR 1 "memory_operand" "+ZC")) @@ -400,20 +602,20 @@ (match_operand:GPR 3 "reg_or_0_operand" "rJ") (match_operand:GPR 4 "reg_or_0_operand" "rJ") (match_operand:GPR 5 "reg_or_0_operand" "rJ") - (match_operand:SI 6 "const_int_operand")] ;; model + (match_operand:SI 6 "const_int_operand")] ;; mod_f UNSPEC_COMPARE_AND_SWAP)) (clobber (match_scratch:GPR 7 "=&r"))] "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%2\\n\\t" "bne\\t%7,%z4,2f\\n\\t" "and\\t%7,%0,%z3\\n\\t" "or%i5\\t%7,%7,%5\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b\\n\\t" - "b\\t3f\\n\\t" + "%T6b\\t3f\\n\\t" "2:\\n\\t" "%G6\\n\\t" "3:\\n\\t"; @@ -444,18 +646,16 @@ && is_mm_release (memmodel_base (INTVAL (mod_s)))) mod_s = GEN_INT (MEMMODEL_ACQ_REL); - operands[6] = mod_s; - if (ISA_HAS_LAMCAS) emit_insn (gen_atomic_cas_value_strong_amcas (operands[1], operands[2], operands[3], operands[4], - operands[6])); + mod_s)); else { union loongarch_gen_fn_ptrs generator; generator.fn_7 = gen_atomic_cas_value_cmp_and_7_si; loongarch_expand_atomic_qihi (generator, operands[1], operands[2], - operands[3], operands[4], operands[6]); + operands[3], operands[4], mod_f); } rtx compare = operands[1]; @@ -481,83 +681,96 @@ DONE; }) -(define_insn "atomic_cas_value_add_7_" - [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res - (match_operand:GPR 1 "memory_operand" "+ZC")) +(define_insn "atomic_compare_and_swapti_scq" + [(set (match_operand:TI 0 "register_operand" "=&r") + (match_operand:TI 1 "memory_operand" "+ZB")) (set (match_dup 1) - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask - (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask - (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val - (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val - (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_ADD)) - (clobber (match_scratch:GPR 7 "=&r")) - (clobber (match_scratch:GPR 8 "=&r"))] - "" + (unspec_volatile:TI [(match_operand:TI 2 "reg_or_0_operand" "rJ") + (match_operand:TI 3 "reg_or_0_operand" "rJ") + (match_operand:SI 4 "const_int_operand")] ;; mod_f + UNSPEC_COMPARE_AND_SWAP)) + (clobber (match_scratch:DI 5 "=&r"))] + "TARGET_64BIT && ISA_HAS_SCQ" { - return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" - "and\\t%7,%0,%3\\n\\t" - "add.w\\t%8,%0,%z5\\n\\t" - "and\\t%8,%8,%z2\\n\\t" - "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" - "beq\\t$zero,%7,1b"; -} + output_asm_insn ("1:", operands); + output_asm_insn ("ll.d\t%0,%1", operands); - [(set (attr "length") (const_int 28))]) + /* Compare the low word */ + output_asm_insn ("bne\t%0,%z2,2f", operands); -(define_insn "atomic_cas_value_sub_7_" - [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res - (match_operand:GPR 1 "memory_operand" "+ZC")) - (set (match_dup 1) - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask - (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask - (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val - (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val - (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_SUB)) - (clobber (match_scratch:GPR 7 "=&r")) - (clobber (match_scratch:GPR 8 "=&r"))] - "" -{ - return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" - "and\\t%7,%0,%3\\n\\t" - "sub.w\\t%8,%0,%z5\\n\\t" - "and\\t%8,%8,%z2\\n\\t" - "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" - "beq\\t$zero,%7,1b"; + /* Don't reorder the load of high word before ll.d. As the TImode + must be aligned in the memory, the high and low words must be in + the same cacheline, thus dbar 0x700 is enough. */ + if (!ISA_HAS_LD_SEQ_SA) + output_asm_insn ("dbar\t0x700", operands); + + /* Now load the high word. As the high and low words are in the same + cacheline, in case another core has clobbered the high word before the + sc.q instruction is executed, the LL bit for the low word will be + cleared. Thus a normal load is sufficient. */ + output_asm_insn ("ld.d\t%t0,%b1,8", operands); + + /* Compare the high word. */ + output_asm_insn ("bne\t%t0,%t2,2f", operands); + + /* Copy the low word of the new value as it'll be clobbered by sc.q. */ + output_asm_insn ("move\t%5,%z3", operands); + + /* Store both words if LL bit is still set. */ + output_asm_insn ("sc.q\t%5,%t3,%1", operands); + + /* Check if sc.q has done the store. */ + output_asm_insn ("beqz\t%5,1b", operands); + + /* Jump over the mod_f barrier if sc.q has succeeded. */ + output_asm_insn ("%T4b\t3f", operands); + + /* The barrier for mod_f. */ + output_asm_insn ("2:", operands); + output_asm_insn ("%G4", operands); + + output_asm_insn ("3:", operands); + return ""; } - [(set (attr "length") (const_int 28))]) + [(set_attr "length" "40")]) -(define_insn "atomic_cas_value_and_7_" - [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res - (match_operand:GPR 1 "memory_operand" "+ZC")) - (set (match_dup 1) - (unspec_volatile:GPR [(match_operand:GPR 2 "reg_or_0_operand" "rJ") ;; mask - (match_operand:GPR 3 "reg_or_0_operand" "rJ") ;; inverted_mask - (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val - (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val - (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_AND)) - (clobber (match_scratch:GPR 7 "=&r")) - (clobber (match_scratch:GPR 8 "=&r"))] - "" +(define_expand "atomic_compare_and_swapti" + [(match_operand:SI 0 "register_operand" "") ;; bool output + (match_operand:TI 1 "register_operand" "") ;; val output + (match_operand:TI 2 "memory_operand" "") ;; memory + (match_operand:TI 3 "reg_or_0_operand" "") ;; expected value + (match_operand:TI 4 "reg_or_0_operand" "") ;; desired value + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; mod_s + (match_operand:SI 7 "const_int_operand" "")] ;; mod_f + "TARGET_64BIT && ISA_HAS_SCQ" { - return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" - "and\\t%7,%0,%3\\n\\t" - "and\\t%8,%0,%z5\\n\\t" - "and\\t%8,%8,%z2\\n\\t" - "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" - "beq\\t$zero,%7,1b"; -} - [(set (attr "length") (const_int 28))]) + emit_insn (gen_atomic_compare_and_swapti_scq (operands[1], operands[2], + operands[3], operands[4], + operands[7])); + + rtx t[2]; -(define_insn "atomic_cas_value_xor_7_" + for (int i = 0; i < 2; i++) + { + rtx compare = loongarch_subword (operands[1], i); + rtx expect = loongarch_subword (operands[3], i); + + t[i] = gen_reg_rtx (DImode); + + if (expect != const0_rtx) + emit_insn (gen_xordi3 (t[i], compare, expect)); + else + emit_move_insn (t[i], compare); + } + + emit_insn (gen_iordi3 (t[0], t[0], t[1])); + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_EQ (SImode, t[0], const0_rtx))); + DONE; +}) + +(define_insn "atomic_cas_value_add_7_" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res (match_operand:GPR 1 "memory_operand" "+ZC")) (set (match_dup 1) @@ -566,24 +779,24 @@ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_XOR)) + UNSPEC_COMPARE_AND_SWAP_ADD)) (clobber (match_scratch:GPR 7 "=&r")) (clobber (match_scratch:GPR 8 "=&r"))] "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" - "xor\\t%8,%0,%z5\\n\\t" + "add.w\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } [(set (attr "length") (const_int 28))]) -(define_insn "atomic_cas_value_or_7_" +(define_insn "atomic_cas_value_sub_7_" [(set (match_operand:GPR 0 "register_operand" "=&r") ;; res (match_operand:GPR 1 "memory_operand" "+ZC")) (set (match_dup 1) @@ -592,21 +805,20 @@ (match_operand:GPR 4 "reg_or_0_operand" "rJ") ;; old val (match_operand:GPR 5 "reg_or_0_operand" "rJ") ;; new val (match_operand:SI 6 "const_int_operand")] ;; model - UNSPEC_COMPARE_AND_SWAP_OR)) + UNSPEC_COMPARE_AND_SWAP_SUB)) (clobber (match_scratch:GPR 7 "=&r")) (clobber (match_scratch:GPR 8 "=&r"))] "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" - "or\\t%8,%0,%z5\\n\\t" + "sub.w\\t%8,%0,%z5\\n\\t" "and\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } - [(set (attr "length") (const_int 28))]) (define_insn "atomic_cas_value_nand_7_" @@ -624,12 +836,12 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%3\\n\\t" "and\\t%8,%0,%z5\\n\\t" "xor\\t%8,%8,%z2\\n\\t" "or%i8\\t%7,%7,%8\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beq\\t$zero,%7,1b"; } [(set (attr "length") (const_int 28))]) @@ -648,10 +860,10 @@ "" { return "1:\\n\\t" - "ll.\\t%0,%1\\n\\t" + "ll.\\t%0,%1\\n\\t" "and\\t%7,%0,%z3\\n\\t" "or%i5\\t%7,%7,%5\\n\\t" - "sc.\\t%7,%1\\n\\t" + "sc.\\t%7,%1\\n\\t" "beqz\\t%7,1b\\n\\t"; } [(set (attr "length") (const_int 20))]) @@ -678,6 +890,101 @@ DONE; }) +(define_int_iterator UNSPEC_TI_FETCH_DIRECT + [UNSPEC_TI_FETCH_ADD + UNSPEC_TI_FETCH_SUB + UNSPEC_TI_FETCH_AND + UNSPEC_TI_FETCH_XOR + UNSPEC_TI_FETCH_OR]) +(define_int_iterator UNSPEC_TI_FETCH + [UNSPEC_TI_FETCH_DIRECT UNSPEC_TI_FETCH_NAND_MASK_INVERTED]) +(define_int_attr amop_ti_fetch + [(UNSPEC_TI_FETCH_ADD "add") + (UNSPEC_TI_FETCH_SUB "sub") + (UNSPEC_TI_FETCH_AND "and") + (UNSPEC_TI_FETCH_XOR "xor") + (UNSPEC_TI_FETCH_OR "or") + (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "nand_mask_inverted")]) +(define_int_attr size_ti_fetch + [(UNSPEC_TI_FETCH_ADD "36") + (UNSPEC_TI_FETCH_SUB "36") + (UNSPEC_TI_FETCH_AND "28") + (UNSPEC_TI_FETCH_XOR "28") + (UNSPEC_TI_FETCH_OR "28") + (UNSPEC_TI_FETCH_NAND_MASK_INVERTED "28")]) + +(define_insn "atomic_fetch_ti_scq" + [(set (match_operand:TI 0 "register_operand" "=&r") + (match_operand:TI 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:TI + [(match_dup 0) + (match_operand:TI 2 "reg_or_0_operand" "rJ")] + UNSPEC_TI_FETCH)) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (match_scratch:DI 4 "=&r"))] + "TARGET_64BIT && ISA_HAS_SCQ" +{ + output_asm_insn ("1:", operands); + output_asm_insn ("ll.d\t%0,%1", operands); + if (!ISA_HAS_LD_SEQ_SA) + output_asm_insn ("dbar\t0x700", operands); + output_asm_insn ("ld.d\t%t0,%b1,8", operands); + + switch () + { + case UNSPEC_TI_FETCH_AND: + case UNSPEC_TI_FETCH_OR: + case UNSPEC_TI_FETCH_XOR: + output_asm_insn ("\t%3,%0,%z2", operands); + output_asm_insn ("\t%4,%t0,%t2", operands); + break; + case UNSPEC_TI_FETCH_NAND_MASK_INVERTED: + output_asm_insn ("orn\t%3,%z2,%0", operands); + output_asm_insn ("orn\t%4,%t2,%t0", operands); + break; + case UNSPEC_TI_FETCH_ADD: + case UNSPEC_TI_FETCH_SUB: + output_asm_insn (".d\t%3,%0,%z2", operands); + + /* Generate carry bit. */ + output_asm_insn ( + == UNSPEC_TI_FETCH_ADD ? "sltu\t%4,%3,%0" + : "sltu\t%4,%0,%3", + operands); + + output_asm_insn (".d\t%4,%t0,%4", operands); + output_asm_insn (".d\t%4,%4,%t2", operands); + break; + default: + gcc_unreachable (); + } + + output_asm_insn ("sc.q\t%3,%4,%1", operands); + output_asm_insn ("beqz\t%3,1b", operands); + + return ""; +} + [(set_attr "length" "")]) + +(define_expand "atomic_fetch_ti" + [(set (match_operand:TI 0 "register_operand" "=&r") + (match_operand:TI 1 "memory_operand" "+ZB")) + (set (match_dup 1) + (unspec_volatile:TI + [(match_dup 0) + (match_operand:TI 2 "reg_or_0_operand" "rJ")] + UNSPEC_TI_FETCH_DIRECT)) + (match_operand:SI 3 "const_int_operand")] ;; model + "TARGET_64BIT && ISA_HAS_SCQ" +{ + /* Model is ignored as sc.q implies a full barrier. */ + emit_insn (gen_atomic_fetch_ti_scq (operands[0], + operands[1], + operands[2])); + DONE; +}) + (define_insn "atomic_fetch_add_short" [(set (match_operand:SHORT 0 "register_operand" "=&r") (match_operand:SHORT 1 "memory_operand" "+ZB")) @@ -688,7 +995,7 @@ (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] "ISA_HAS_LAM_BH" - "amadd%A3.\t%0,%z2,%1" + "amadd%A3.\t%0,%z2,%1" [(set (attr "length") (const_int 4))]) (define_expand "atomic_fetch_add" @@ -724,7 +1031,7 @@ (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_OLD_OP))] - "" + "!ISA_HAS_LAM_BH" { union loongarch_gen_fn_ptrs generator; generator.fn_7 = gen_atomic_cas_value_sub_7_si; @@ -733,60 +1040,6 @@ DONE; }) -(define_expand "atomic_fetch_and" - [(set (match_operand:SHORT 0 "register_operand" "=&r") - (match_operand:SHORT 1 "memory_operand" "+ZB")) - (set (match_dup 1) - (unspec_volatile:SHORT - [(and:SHORT (match_dup 1) - (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPEC_SYNC_OLD_OP))] - "" -{ - union loongarch_gen_fn_ptrs generator; - generator.fn_7 = gen_atomic_cas_value_and_7_si; - loongarch_expand_atomic_qihi (generator, operands[0], operands[1], - operands[1], operands[2], operands[3]); - DONE; -}) - -(define_expand "atomic_fetch_xor" - [(set (match_operand:SHORT 0 "register_operand" "=&r") - (match_operand:SHORT 1 "memory_operand" "+ZB")) - (set (match_dup 1) - (unspec_volatile:SHORT - [(xor:SHORT (match_dup 1) - (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPEC_SYNC_OLD_OP))] - "" -{ - union loongarch_gen_fn_ptrs generator; - generator.fn_7 = gen_atomic_cas_value_xor_7_si; - loongarch_expand_atomic_qihi (generator, operands[0], operands[1], - operands[1], operands[2], operands[3]); - DONE; -}) - -(define_expand "atomic_fetch_or" - [(set (match_operand:SHORT 0 "register_operand" "=&r") - (match_operand:SHORT 1 "memory_operand" "+ZB")) - (set (match_dup 1) - (unspec_volatile:SHORT - [(ior:SHORT (match_dup 1) - (match_operand:SHORT 2 "reg_or_0_operand" "rJ")) - (match_operand:SI 3 "const_int_operand")] ;; model - UNSPEC_SYNC_OLD_OP))] - "" -{ - union loongarch_gen_fn_ptrs generator; - generator.fn_7 = gen_atomic_cas_value_or_7_si; - loongarch_expand_atomic_qihi (generator, operands[0], operands[1], - operands[1], operands[2], operands[3]); - DONE; -}) - (define_expand "atomic_fetch_nand" [(set (match_operand:SHORT 0 "register_operand" "=&r") (match_operand:SHORT 1 "memory_operand" "+ZB")) diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index e224ade2a1aa..494f14cc18fa 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -2363,8 +2363,14 @@ enum reg_class #define STACK_GROWS_DOWNWARD 1 -#define FRAME_GROWS_DOWNWARD (flag_stack_protect != 0 \ - || (flag_sanitize & SANITIZE_ADDRESS) != 0) +/* Growing the frame downwards allows us to put spills closest to + the stack pointer which is good as they are likely to be accessed + frequently. We can also arrange for normal stack usage to place + scalars last so that they too are close to the stack pointer. */ +#define FRAME_GROWS_DOWNWARD ((TARGET_MIPS16 \ + && TARGET_FRAME_GROWS_DOWNWARDS) \ + || (flag_stack_protect != 0 \ + || (flag_sanitize & SANITIZE_ADDRESS) != 0)) /* Size of the area allocated in the frame to save the GP. */ diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt index e24565469d99..f07db5ad7f4a 100644 --- a/gcc/config/mips/mips.opt +++ b/gcc/config/mips/mips.opt @@ -473,6 +473,10 @@ mframe-header-opt Target Var(flag_frame_header_optimization) Optimization Optimize frame header. +mgrow-frame-downwards +Target Var(TARGET_FRAME_GROWS_DOWNWARDS) Init(1) Undocumented +Change the behaviour to grow the frame downwards. + noasmopt Driver diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc index 322e3196a6e4..3fdc56e373b7 100644 --- a/gcc/config/pru/pru.cc +++ b/gcc/config/pru/pru.cc @@ -941,10 +941,19 @@ pru_init_libfuncs (void) /* Long long. */ set_optab_libfunc (ashr_optab, DImode, "__pruabi_asrll"); - set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll"); set_optab_libfunc (ashl_optab, DImode, "__pruabi_lslll"); set_optab_libfunc (lshr_optab, DImode, "__pruabi_lsrll"); + if (TARGET_OPT_MUL) + { + set_optab_libfunc (smul_optab, DImode, "__pruabi_mpyll"); + } + else + { + set_optab_libfunc (smul_optab, DImode, "__pruabi_softmpyll"); + set_optab_libfunc (smul_optab, SImode, "__pruabi_softmpyi"); + } + set_optab_libfunc (sdiv_optab, SImode, "__pruabi_divi"); set_optab_libfunc (udiv_optab, SImode, "__pruabi_divu"); set_optab_libfunc (smod_optab, SImode, "__pruabi_remi"); diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h index 6c0719b5c1dc..9d547ed3bad0 100644 --- a/gcc/config/pru/pru.h +++ b/gcc/config/pru/pru.h @@ -65,6 +65,9 @@ #undef ENDFILE_SPEC #define ENDFILE_SPEC "%{!mabi=ti:-lgloss} " +#undef MULTILIB_DEFAULTS +#define MULTILIB_DEFAULTS { "mloop", "mmul", "mfillzero" } + /* TI ABI mandates that ELF symbols do not start with any prefix. */ #undef USER_LABEL_PREFIX #define USER_LABEL_PREFIX "" diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md index 3504e42e9002..b8ef55b98f93 100644 --- a/gcc/config/pru/pru.md +++ b/gcc/config/pru/pru.md @@ -215,7 +215,7 @@ mov\\t%0, %1 ldi\\t%0, %%pmem(%1) ldi\\t%0, %1 - fill\\t%0, 4 + * return TARGET_OPT_FILLZERO ? \"fill\\t%0, 4\" : \"ldi32\\t%0, 0xffffffff\"; ldi32\\t%0, %1" [(set_attr "type" "st,ld,alu,alu,alu,alu,alu") (set_attr "length" "4,4,4,4,4,4,8")]) @@ -259,9 +259,11 @@ case 1: return "lb%B1o\\t%b0, %1, %S1"; case 2: - return "zero\\t%F0, 8"; + return TARGET_OPT_FILLZERO ? "zero\\t%F0, 8" + : "ldi\\t%F0, 0\;ldi\\t%N0, 0"; case 3: - return "fill\\t%F0, 8"; + return TARGET_OPT_FILLZERO ? "fill\\t%F0, 8" + : "ldi32\\t%F0, 0xffffffff\;mov\\t%N0, %F0"; case 4: /* careful with overlapping source and destination regs. */ gcc_assert (GP_REG_P (REGNO (operands[0]))); @@ -502,7 +504,7 @@ (define_insn "zero_extendqidi2" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (match_operand:QI 1 "register_operand" "0,r")))] - "" + "TARGET_OPT_FILLZERO" "@ zero\\t%F0.b1, 7 mov\\t%F0.b0, %1\;zero\\t%F0.b1, 7" @@ -512,7 +514,7 @@ (define_insn "zero_extendhidi2" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (match_operand:HI 1 "register_operand" "0,r")))] - "" + "TARGET_OPT_FILLZERO" "@ zero\\t%F0.b2, 6 mov\\t%F0.w0, %1\;zero\\t%F0.b2, 6" @@ -522,7 +524,7 @@ (define_insn "zero_extendsidi2" [(set (match_operand:DI 0 "register_operand" "=r,r") (zero_extend:DI (match_operand:SI 1 "register_operand" "0,r")))] - "" + "TARGET_OPT_FILLZERO" "@ zero\\t%N0, 4 mov\\t%F0, %1\;zero\\t%N0, 4" @@ -535,7 +537,7 @@ (define_expand "extend2" [(set (match_operand:EQDHIDI 0 "register_operand" "=r") (sign_extend:EQDHIDI (match_operand:EQS0 1 "register_operand" "r")))] - "" + "TARGET_OPT_FILLZERO" { rtx_code_label *skip_hiset_label; @@ -744,7 +746,7 @@ (ior:HIDI (match_operand:HIDI 1 "register_operand" "0") (match_operand:HIDI 2 "const_fillbytes_operand" "Uf")))] - "" + "TARGET_OPT_FILLZERO" { static char line[64]; pru_byterange r; @@ -767,7 +769,7 @@ (and:HIDI (match_operand:HIDI 1 "register_operand" "0") (match_operand:HIDI 2 "const_zerobytes_operand" "Uz")))] - "" + "TARGET_OPT_FILLZERO" { static char line[64]; pru_byterange r; @@ -1114,7 +1116,8 @@ /* Try with the more efficient zero/fill patterns first. */ if ( == IOR && CONST_INT_P (operands[2]) - && const_fillbytes_operand (operands[2], DImode)) + && const_fillbytes_operand (operands[2], DImode) + && TARGET_OPT_FILLZERO) { rtx insn = maybe_gen_pru_ior_fillbytes (DImode, operands[0], @@ -1130,7 +1133,8 @@ } if ( == AND && CONST_INT_P (operands[2]) - && const_zerobytes_operand (operands[2], DImode)) + && const_zerobytes_operand (operands[2], DImode) + && TARGET_OPT_FILLZERO) { rtx insn = maybe_gen_pru_and_zerobytes (DImode, operands[0], @@ -1212,7 +1216,7 @@ [(set (match_operand:SI 0 "pru_muldst_operand" "=Rmd0") (mult:SI (match_operand:SI 1 "pru_mulsrc0_operand" "%Rms0") (match_operand:SI 2 "pru_mulsrc1_operand" "Rms1")))] - "" + "TARGET_OPT_MUL" "nop\;xin\\t0, %0, 4" [(set_attr "type" "alu") (set_attr "length" "8")]) diff --git a/gcc/config/pru/pru.opt b/gcc/config/pru/pru.opt index 8385beba567e..5206b2aec820 100644 --- a/gcc/config/pru/pru.opt +++ b/gcc/config/pru/pru.opt @@ -39,6 +39,14 @@ mloop Target Mask(OPT_LOOP) Allow (or do not allow) gcc to use the LOOP instruction. +mmul +Target Mask(OPT_MUL) +Allow (or do not allow) gcc to use the PRU multiplier unit. + +mfillzero +Target Mask(OPT_FILLZERO) +Allow (or do not allow) gcc to use the FILL and ZERO instructions. + mabi= Target RejectNegative Joined Enum(pru_abi_t) Var(pru_current_abi) Init(PRU_ABI_GNU) Save Select target ABI variant. diff --git a/gcc/config/pru/pru.opt.urls b/gcc/config/pru/pru.opt.urls index c87affb112b8..5c5789214bd5 100644 --- a/gcc/config/pru/pru.opt.urls +++ b/gcc/config/pru/pru.opt.urls @@ -12,6 +12,12 @@ UrlSuffix(gcc/PRU-Options.html#index-mno-relax-1) mloop UrlSuffix(gcc/PRU-Options.html#index-mloop) +mmul +UrlSuffix(gcc/PRU-Options.html#index-mmul) + +mfillzero +UrlSuffix(gcc/PRU-Options.html#index-mfillzero) + mabi= UrlSuffix(gcc/PRU-Options.html#index-mabi-4) diff --git a/gcc/config/pru/t-multilib b/gcc/config/pru/t-multilib new file mode 100644 index 000000000000..1e3c2b8abaa7 --- /dev/null +++ b/gcc/config/pru/t-multilib @@ -0,0 +1,29 @@ +# Copyright (C) 2025 Free Software Foundation, Inc. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 3, or (at your option) any later +# version. +# +# GCC is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +# for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# . + +MULTILIB_OPTIONS = +MULTILIB_OPTIONS += mloop/mno-loop +MULTILIB_OPTIONS += mmul/mno-mul +MULTILIB_OPTIONS += mfillzero/mno-fillzero + +# Build two variants: +# - Newer PRU core versions, present in AM335x and later. +# - Older PRU core versions, present in AM18xx. +MULTILIB_REQUIRED = +MULTILIB_REQUIRED += mloop/mmul/mfillzero +MULTILIB_REQUIRED += mno-loop/mno-mul/mno-fillzero diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize index 34dad45233ae..15a398502b38 100755 --- a/gcc/config/riscv/arch-canonicalize +++ b/gcc/config/riscv/arch-canonicalize @@ -20,77 +20,326 @@ # along with GCC; see the file COPYING3. If not see # . -# TODO: Extract riscv_subset_t from riscv-common.cc and make it can be compiled -# standalone to replace this script, that also prevents us implementing -# that twice and keep sync again and again. - from __future__ import print_function import sys import argparse import collections import itertools +import re +import os from functools import reduce SUPPORTED_ISA_SPEC = ["2.2", "20190608", "20191213"] CANONICAL_ORDER = "imafdqlcbkjtpvnh" LONG_EXT_PREFIXES = ['z', 's', 'h', 'x'] +def parse_define_riscv_ext(content): + """Parse DEFINE_RISCV_EXT macros using position-based parsing.""" + extensions = [] + + # Find all DEFINE_RISCV_EXT blocks + pattern = r'DEFINE_RISCV_EXT\s*\(' + matches = [] + + pos = 0 + while True: + match = re.search(pattern, content[pos:]) + if not match: + break + + start_pos = pos + match.start() + paren_count = 0 + current_pos = pos + match.end() - 1 # Start at the opening parenthesis + + # Find the matching closing parenthesis + while current_pos < len(content): + if content[current_pos] == '(': + paren_count += 1 + elif content[current_pos] == ')': + paren_count -= 1 + if paren_count == 0: + break + current_pos += 1 + + if paren_count == 0: + # Extract the content inside parentheses + macro_content = content[pos + match.end():current_pos] + ext_data = parse_macro_arguments(macro_content) + if ext_data: + extensions.append(ext_data) + + pos = current_pos + 1 + + return extensions + +def parse_macro_arguments(macro_content): + """Parse the arguments of a DEFINE_RISCV_EXT macro.""" + # Remove comments /* ... */ + cleaned_content = re.sub(r'/\*[^*]*\*/', '', macro_content) + + # Split arguments by comma, but respect nested structures + args = [] + current_arg = "" + paren_count = 0 + brace_count = 0 + in_string = False + escape_next = False + + for char in cleaned_content: + if escape_next: + current_arg += char + escape_next = False + continue + + if char == '\\': + escape_next = True + current_arg += char + continue + + if char == '"' and not escape_next: + in_string = not in_string + current_arg += char + continue + + if in_string: + current_arg += char + continue + + if char == '(': + paren_count += 1 + elif char == ')': + paren_count -= 1 + elif char == '{': + brace_count += 1 + elif char == '}': + brace_count -= 1 + elif char == ',' and paren_count == 0 and brace_count == 0: + args.append(current_arg.strip()) + current_arg = "" + continue + + current_arg += char + + # Add the last argument + if current_arg.strip(): + args.append(current_arg.strip()) + + # We need at least 6 arguments to get DEP_EXTS (position 5) + if len(args) < 6: + return None + + ext_name = args[0].strip() + dep_exts_arg = args[5].strip() # DEP_EXTS is at position 5 + + # Parse dependency extensions from the DEP_EXTS argument + deps = parse_dep_exts(dep_exts_arg) + + return { + 'name': ext_name, + 'dep_exts': deps + } + +def parse_dep_exts(dep_exts_str): + """Parse the DEP_EXTS argument to extract dependency list with conditions.""" + # Remove outer parentheses if present + dep_exts_str = dep_exts_str.strip() + if dep_exts_str.startswith('(') and dep_exts_str.endswith(')'): + dep_exts_str = dep_exts_str[1:-1].strip() + + # Remove outer braces if present + if dep_exts_str.startswith('{') and dep_exts_str.endswith('}'): + dep_exts_str = dep_exts_str[1:-1].strip() + + if not dep_exts_str: + return [] + + deps = [] + + # First, find and process conditional dependencies + conditional_pattern = r'\{\s*"([^"]+)"\s*,\s*(\[.*?\]\s*\([^)]*\)\s*->\s*bool.*?)\}' + conditional_matches = [] + + for match in re.finditer(conditional_pattern, dep_exts_str, re.DOTALL): + ext_name = match.group(1) + condition_code = match.group(2) + deps.append({'ext': ext_name, 'type': 'conditional', 'condition': condition_code}) + # The conditional_pattern RE matches only the first code block enclosed + # in braces. + # + # Extend the match to the condition block's closing brace, encompassing + # all code blocks, by simply trying to match the numbers of opening + # and closing braces. While crude, this avoids writing a complicated + # parse here. + closing_braces_left = condition_code.count('{') - condition_code.count('}') + condition_end = match.end() + while closing_braces_left > 0: + condition_end = dep_exts_str.find('}', condition_end) + closing_braces_left -= 1 + conditional_matches.append((match.start(), condition_end)) + + # Remove conditional dependency blocks from the string + remaining_str = dep_exts_str + for start, end in reversed(conditional_matches): # Reverse order to maintain indices + remaining_str = remaining_str[:start] + remaining_str[end:] + + # Now handle simple quoted strings in the remaining text + for match in re.finditer(r'"([^"]+)"', remaining_str): + deps.append({'ext': match.group(1), 'type': 'simple'}) + + # Remove duplicates while preserving order + seen = set() + unique_deps = [] + for dep in deps: + key = (dep['ext'], dep['type']) + if key not in seen: + seen.add(key) + unique_deps.append(dep) + + return unique_deps + +def evaluate_conditional_dependency(ext, dep, xlen, current_exts): + """Evaluate whether a conditional dependency should be included.""" + ext_name = dep['ext'] + condition = dep['condition'] + # Parse the condition based on known patterns + if ext_name == 'zcf' and ext in ['zca', 'c', 'zce']: + # zcf depends on RV32 and F extension + return xlen == 32 and 'f' in current_exts + elif ext_name == 'zcd' and ext in ['zca', 'c']: + # zcd depends on D extension + return 'd' in current_exts + elif ext_name == 'c' and ext in ['zca']: + # Special case for zca -> c conditional dependency + if xlen == 32: + if 'd' in current_exts: + return 'zcf' in current_exts and 'zcd' in current_exts + elif 'f' in current_exts: + return 'zcf' in current_exts + else: + return True + elif xlen == 64: + if 'd' in current_exts: + return 'zcd' in current_exts + else: + return True + return False + else: + # Report error for unhandled conditional dependencies + import sys + print(f"ERROR: Unhandled conditional dependency: '{ext_name}' with condition:", file=sys.stderr) + print(f" Condition code: {condition[:100]}...", file=sys.stderr) + print(f" Current context: xlen={xlen}, exts={sorted(current_exts)}", file=sys.stderr) + # For now, return False to be safe + return False + +def resolve_dependencies(arch_parts, xlen): + """Resolve all dependencies including conditional ones.""" + current_exts = set(arch_parts) + implied_deps = set() + + # Keep resolving until no new dependencies are found + changed = True + while changed: + changed = False + new_deps = set() + + for ext in current_exts | implied_deps: + if ext in IMPLIED_EXT: + for dep in IMPLIED_EXT[ext]: + if dep['type'] == 'simple': + if dep['ext'] not in current_exts and dep['ext'] not in implied_deps: + new_deps.add(dep['ext']) + changed = True + elif dep['type'] == 'conditional': + should_include = evaluate_conditional_dependency(ext, dep, xlen, current_exts | implied_deps) + if should_include: + if dep['ext'] not in current_exts and dep['ext'] not in implied_deps: + new_deps.add(dep['ext']) + changed = True + + implied_deps.update(new_deps) + + return implied_deps + +def parse_def_file(file_path, script_dir, processed_files=None, collect_all=False): + """Parse a single .def file and recursively process #include directives.""" + if processed_files is None: + processed_files = set() + + # Avoid infinite recursion + if file_path in processed_files: + return ({}, set()) if collect_all else {} + processed_files.add(file_path) + + implied_ext = {} + all_extensions = set() if collect_all else None + + if not os.path.exists(file_path): + return (implied_ext, all_extensions) if collect_all else implied_ext + + with open(file_path, 'r') as f: + content = f.read() + + # Process #include directives first + include_pattern = r'#include\s+"([^"]+)"' + includes = re.findall(include_pattern, content) + + for include_file in includes: + include_path = os.path.join(script_dir, include_file) + if collect_all: + included_ext, included_all = parse_def_file(include_path, script_dir, processed_files, collect_all) + implied_ext.update(included_ext) + all_extensions.update(included_all) + else: + included_ext = parse_def_file(include_path, script_dir, processed_files, collect_all) + implied_ext.update(included_ext) + + # Parse DEFINE_RISCV_EXT blocks using position-based parsing + parsed_exts = parse_define_riscv_ext(content) + + for ext_data in parsed_exts: + ext_name = ext_data['name'] + deps = ext_data['dep_exts'] + + if collect_all: + all_extensions.add(ext_name) + + if deps: + implied_ext[ext_name] = deps + + return (implied_ext, all_extensions) if collect_all else implied_ext + +def parse_def_files(): + """Parse RISC-V extension definition files starting from riscv-ext.def.""" + # Get directory containing this script + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + except NameError: + # When __file__ is not defined (e.g., interactive mode) + script_dir = os.getcwd() + + # Start with the main definition file + main_def_file = os.path.join(script_dir, 'riscv-ext.def') + return parse_def_file(main_def_file, script_dir) + +def get_all_extensions(): + """Get all supported extensions and their implied extensions.""" + # Get directory containing this script + try: + script_dir = os.path.dirname(os.path.abspath(__file__)) + except NameError: + # When __file__ is not defined (e.g., interactive mode) + script_dir = os.getcwd() + + # Start with the main definition file + main_def_file = os.path.join(script_dir, 'riscv-ext.def') + return parse_def_file(main_def_file, script_dir, collect_all=True) + # # IMPLIED_EXT(ext) -> implied extension list. +# This is loaded dynamically from .def files # -IMPLIED_EXT = { - "d" : ["f", "zicsr"], - - "a" : ["zaamo", "zalrsc"], - "zabha" : ["zaamo"], - "zacas" : ["zaamo"], - - "f" : ["zicsr"], - "b" : ["zba", "zbb", "zbs"], - "zdinx" : ["zfinx", "zicsr"], - "zfinx" : ["zicsr"], - "zhinx" : ["zhinxmin", "zfinx", "zicsr"], - "zhinxmin" : ["zfinx", "zicsr"], - - "zk" : ["zkn", "zkr", "zkt"], - "zkn" : ["zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"], - "zks" : ["zbkb", "zbkc", "zbkx", "zksed", "zksh"], - - "v" : ["zvl128b", "zve64d"], - "zve32x" : ["zvl32b"], - "zve64x" : ["zve32x", "zvl64b"], - "zve32f" : ["f", "zve32x"], - "zve64f" : ["f", "zve32f", "zve64x"], - "zve64d" : ["d", "zve64f"], - - "zvl64b" : ["zvl32b"], - "zvl128b" : ["zvl64b"], - "zvl256b" : ["zvl128b"], - "zvl512b" : ["zvl256b"], - "zvl1024b" : ["zvl512b"], - "zvl2048b" : ["zvl1024b"], - "zvl4096b" : ["zvl2048b"], - "zvl8192b" : ["zvl4096b"], - "zvl16384b" : ["zvl8192b"], - "zvl32768b" : ["zvl16384b"], - "zvl65536b" : ["zvl32768b"], - - "zvkn" : ["zvkned", "zvknhb", "zvkb", "zvkt"], - "zvknc" : ["zvkn", "zvbc"], - "zvkng" : ["zvkn", "zvkg"], - "zvks" : ["zvksed", "zvksh", "zvkb", "zvkt"], - "zvksc" : ["zvks", "zvbc"], - "zvksg" : ["zvks", "zvkg"], - "zvbb" : ["zvkb"], - "zvbc" : ["zve64x"], - "zvkb" : ["zve32x"], - "zvkg" : ["zve32x"], - "zvkned" : ["zve32x"], - "zvknha" : ["zve32x"], - "zvknhb" : ["zve64x"], - "zvksed" : ["zve32x"], - "zvksh" : ["zve32x"], -} +IMPLIED_EXT = parse_def_files() def arch_canonicalize(arch, isa_spec): # TODO: Support extension version. @@ -123,21 +372,31 @@ def arch_canonicalize(arch, isa_spec): long_exts += extra_long_ext # - # Handle implied extensions. + # Handle implied extensions using new conditional logic. # - any_change = True - while any_change: - any_change = False - for ext in std_exts + long_exts: - if ext in IMPLIED_EXT: - implied_exts = IMPLIED_EXT[ext] - for implied_ext in implied_exts: - if implied_ext == 'zicsr' and is_isa_spec_2p2: - continue + # Extract xlen from architecture string + # TODO: We should support profile here. + if arch.startswith('rv32'): + xlen = 32 + elif arch.startswith('rv64'): + xlen = 64 + else: + raise Exception("Unsupported prefix `%s`" % arch) - if implied_ext not in std_exts + long_exts: - long_exts.append(implied_ext) - any_change = True + # Get all current extensions + current_exts = std_exts + long_exts + + # Resolve dependencies + implied_deps = resolve_dependencies(current_exts, xlen) + + # Filter out zicsr for ISA spec 2.2 + if is_isa_spec_2p2: + implied_deps.discard('zicsr') + + # Add implied dependencies to long_exts + for dep in implied_deps: + if dep not in current_exts: + long_exts.append(dep) # Single letter extension might appear in the long_exts list, # because we just append extensions list to the arch string. @@ -179,17 +438,177 @@ def arch_canonicalize(arch, isa_spec): return new_arch -if len(sys.argv) < 2: - print ("Usage: %s [*]" % sys.argv) - sys.exit(1) +def dump_all_extensions(): + """Dump all extensions and their implied extensions.""" + implied_ext, all_extensions = get_all_extensions() + + print("All supported RISC-V extensions:") + print("=" * 60) + + if not all_extensions: + print("No extensions found.") + return -parser = argparse.ArgumentParser() -parser.add_argument('-misa-spec', type=str, - default='20191213', - choices=SUPPORTED_ISA_SPEC) -parser.add_argument('arch_strs', nargs=argparse.REMAINDER) + # Sort all extensions for consistent output + sorted_all = sorted(all_extensions) -args = parser.parse_args() + # Print all extensions with their dependencies (if any) + for ext_name in sorted_all: + if ext_name in implied_ext: + deps = implied_ext[ext_name] + dep_strs = [] + for dep in deps: + if dep['type'] == 'simple': + dep_strs.append(dep['ext']) + else: + dep_strs.append(f"{dep['ext']}*") # Mark conditional deps with * + print(f"{ext_name:15} -> {', '.join(dep_strs)}") + else: + print(f"{ext_name:15} -> (no dependencies)") + + print(f"\nTotal extensions: {len(all_extensions)}") + print(f"Extensions with dependencies: {len(implied_ext)}") + print(f"Extensions without dependencies: {len(all_extensions) - len(implied_ext)}") + +def run_unit_tests(): + """Run unit tests using pytest dynamically imported.""" + try: + import pytest + except ImportError: + print("Error: pytest is required for running unit tests.") + print("Please install pytest: pip install pytest") + return 1 + + # Define test functions + def test_basic_arch_parsing(): + """Test basic architecture string parsing.""" + result = arch_canonicalize("rv64i", "20191213") + assert result == "rv64i" + + def test_simple_extensions(): + """Test simple extension handling.""" + result = arch_canonicalize("rv64im", "20191213") + assert "zmmul" in result + + def test_implied_extensions(): + """Test implied extension resolution.""" + result = arch_canonicalize("rv64imaf", "20191213") + assert "zicsr" in result + + def test_conditional_dependencies(): + """Test conditional dependency evaluation.""" + # Test RV32 with F extension should include zcf when c is present + result = arch_canonicalize("rv32ifc", "20191213") + parts = result.split("_") + if "c" in parts: + assert "zca" in parts + if "f" in parts: + assert "zcf" in parts + + def test_parse_dep_exts(): + """Test dependency parsing function.""" + # Test simple dependency + deps = parse_dep_exts('{"ext1", "ext2"}') + assert len(deps) == 2 + assert deps[0]['ext'] == 'ext1' + assert deps[0]['type'] == 'simple' + + def test_evaluate_conditional_dependency(): + """Test conditional dependency evaluation.""" + # Test zcf condition for RV32 with F + dep = {'ext': 'zcf', 'type': 'conditional', 'condition': 'test'} + result = evaluate_conditional_dependency('zce', dep, 32, {'f'}) + assert result == True + + # Test zcf condition for RV64 with F (should be False) + result = evaluate_conditional_dependency('zce', dep, 64, {'f'}) + assert result == False + + def test_parse_define_riscv_ext(): + """Test DEFINE_RISCV_EXT parsing.""" + content = ''' + DEFINE_RISCV_EXT( + /* NAME */ test, + /* UPPERCASE_NAME */ TEST, + /* FULL_NAME */ "Test extension", + /* DESC */ "", + /* URL */ , + /* DEP_EXTS */ ({"dep1", "dep2"}), + /* SUPPORTED_VERSIONS */ ({{1, 0}}), + /* FLAG_GROUP */ test, + /* BITMASK_GROUP_ID */ 0, + /* BITMASK_BIT_POSITION*/ 0, + /* EXTRA_EXTENSION_FLAGS */ 0) + ''' + + extensions = parse_define_riscv_ext(content) + assert len(extensions) == 1 + assert extensions[0]['name'] == 'test' + assert len(extensions[0]['dep_exts']) == 2 -for arch in args.arch_strs: - print (arch_canonicalize(arch, args.misa_spec)) + def test_parse_long_condition_block(): + """Test condition block containing several code blocks.""" + result = arch_canonicalize("rv32ec", "20191213") + assert "rv32ec_zca" in result + + # Collect test functions + test_functions = [ + test_basic_arch_parsing, + test_simple_extensions, + test_implied_extensions, + test_conditional_dependencies, + test_parse_dep_exts, + test_evaluate_conditional_dependency, + test_parse_define_riscv_ext, + test_parse_long_condition_block + ] + + # Run tests manually first, then optionally with pytest + print("Running unit tests...") + + passed = 0 + failed = 0 + + for i, test_func in enumerate(test_functions): + try: + print(f" Running {test_func.__name__}...", end=" ") + test_func() + print("PASSED") + passed += 1 + except Exception as e: + print(f"FAILED: {e}") + failed += 1 + + print(f"\nTest Summary: {passed} passed, {failed} failed") + + if failed == 0: + print("\nAll tests passed!") + return 0 + else: + print(f"\n{failed} test(s) failed!") + return 1 + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('-misa-spec', type=str, + default='20191213', + choices=SUPPORTED_ISA_SPEC) + parser.add_argument('--dump-all', action='store_true', + help='Dump all extensions and their implied extensions') + parser.add_argument('--selftest', action='store_true', + help='Run unit tests using pytest') + parser.add_argument('arch_strs', nargs='*', + help='Architecture strings to canonicalize') + + args = parser.parse_args() + + if args.dump_all: + dump_all_extensions() + elif args.selftest: + sys.exit(run_unit_tests()) + elif args.arch_strs: + for arch in args.arch_strs: + print (arch_canonicalize(arch, args.misa_spec)) + else: + parser.print_help() + sys.exit(1) diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 65319960bc01..29bdfc3d90fc 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -1679,6 +1679,26 @@ ;; Combine vec_duplicate + op.vv to op.vx ;; Include ;; - vadd.vx +;; - vsub.vx +;; - vrsub.vx +;; - vand.vx +;; - vor.vx +;; - vmul.vx +;; - vdiv.vx +;; - vdivu.vx +;; - vrem.vx +;; - vremu.vx +;; - vmax.vx +;; - vmaxu.vx +;; - vmin.vx +;; - vminu.vx +;; - vsadd.vx +;; - vsaddu.vx +;; - vssub.vx +;; - vssubu.vx +;; - vaadd.vx +;; - vaaddu.vx +;; - vmerge.vxm ;; ============================================================================= (define_insn_and_split "*_vx_" [(set (match_operand:V_VLSI 0 "register_operand") @@ -1694,6 +1714,8 @@ riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2], operands[1], , mode); + + DONE; } [(set_attr "type" "vialu")]) @@ -1711,6 +1733,8 @@ riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1], operands[2], , mode); + + DONE; } [(set_attr "type" "vialu")]) @@ -1782,6 +1806,47 @@ } [(set_attr "type" "vaalu")]) +(define_insn_and_split "*merge_vx_" + [(set (match_operand:V_VLSI 0 "register_operand") + (if_then_else:V_VLSI + (match_operand: 3 "vector_mask_operand") + (vec_duplicate:V_VLSI + (match_operand: 2 "reg_or_int_operand")) + (match_operand:V_VLSI 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_merge_scalar (mode); + riscv_vector::emit_vlmax_insn (icode, riscv_vector::MERGE_OP, operands); + DONE; + } + [(set_attr "type" "vimerge")]) + +(define_insn_and_split "*vmacc_vx_" + [(set (match_operand:V_VLSI 0 "register_operand") + (plus:V_VLSI + (mult:V_VLSI + (vec_duplicate:V_VLSI + (match_operand: 1 "register_operand")) + (match_operand:V_VLSI 2 "register_operand")) + (match_operand:V_VLSI 3 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + insn_code icode = code_for_pred_mul_plus_vx (mode); + rtx ops[] = {operands[0], operands[1], operands[2], operands[3], + RVV_VUNDEF(mode)}; + riscv_vector::emit_vlmax_insn (icode, riscv_vector::TERNARY_OP, ops); + + DONE; + } + [(set_attr "type" "vimuladd")]) + + ;; ============================================================================= ;; Combine vec_duplicate + op.vv to op.vf ;; Include @@ -1962,3 +2027,41 @@ } [(set_attr "type" "vfwmuladd")] ) + +;; vfmul.vf +(define_insn_and_split "*vfmul_vf_" + [(set (match_operand:V_VLSF 0 "register_operand") + (mult:V_VLSF + (vec_duplicate:V_VLSF + (match_operand: 2 "register_operand")) + (match_operand:V_VLSF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_scalar (MULT, mode), + riscv_vector::BINARY_OP_FRM_DYN, operands); + DONE; + } + [(set_attr "type" "vfmuladd")] +) + +;; vfrdiv.vf +(define_insn_and_split "*vfrdiv_vf_" + [(set (match_operand:V_VLSF 0 "register_operand") + (div:V_VLSF + (vec_duplicate:V_VLSF + (match_operand: 2 "register_operand")) + (match_operand:V_VLSF 1 "register_operand")))] + "TARGET_VECTOR && can_create_pseudo_p ()" + "#" + "&& 1" + [(const_int 0)] + { + riscv_vector::emit_vlmax_insn (code_for_pred_reverse_scalar (DIV, mode), + riscv_vector::BINARY_OP_FRM_DYN, operands); + DONE; + } + [(set_attr "type" "vfmuladd")] +) diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md index 5ecaa19eb014..979e0df06f10 100644 --- a/gcc/config/riscv/constraints.md +++ b/gcc/config/riscv/constraints.md @@ -330,3 +330,7 @@ (define_constraint "Q" "An address operand that is valid for a prefetch instruction" (match_operand 0 "prefetch_operand")) + +(define_address_constraint "ZD" + "An address operand that is valid for a mips prefetch instruction" + (match_test "riscv_prefetch_offset_address_p (op, mode)")) diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md index 381f96c3e725..bdb3d22258d9 100644 --- a/gcc/config/riscv/predicates.md +++ b/gcc/config/riscv/predicates.md @@ -27,10 +27,14 @@ (ior (match_operand 0 "const_arith_operand") (match_operand 0 "register_operand"))) +(define_predicate "prefetch_const_operand" + (and (match_code "const_int") + (match_test "(IN_RANGE (INTVAL (op), 0, 511))"))) + ;; REG or REG+D where D fits in a simm12 and has the low 5 bits ;; off. The REG+D form can be reloaded into a temporary if needed ;; after FP elimination if that exposes an invalid offset. -(define_predicate "prefetch_operand" +(define_predicate "zicbop_prefetch_operand" (ior (match_operand 0 "register_operand") (and (match_test "const_arith_operand (op, VOIDmode)") (match_test "(INTVAL (op) & 0x1f) == 0")) @@ -39,6 +43,20 @@ (match_test "const_arith_operand (XEXP (op, 1), VOIDmode)") (match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0")))) +;; REG or REG+D where D fits in a uimm9 +(define_predicate "mips_prefetch_operand" + (ior (match_operand 0 "register_operand") + (match_test "prefetch_const_operand (op, VOIDmode)") + (and (match_code "plus") + (match_test "register_operand (XEXP (op, 0), word_mode)") + (match_test "prefetch_const_operand (XEXP (op, 1), VOIDmode)")))) + +;; MIPS specific or Standard RISCV Extension +(define_predicate "prefetch_operand" + (if_then_else (match_test "TARGET_XMIPSCBOP") + (match_operand 0 "mips_prefetch_operand") + (match_operand 0 "zicbop_prefetch_operand"))) + (define_predicate "lui_operand" (and (match_code "const_int") (match_test "LUI_OPERAND (INTVAL (op))"))) diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def index 98f347034fbb..8f0f63078ffc 100644 --- a/gcc/config/riscv/riscv-cores.def +++ b/gcc/config/riscv/riscv-cores.def @@ -113,7 +113,7 @@ RISCV_CORE("xt-c908v", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_" "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_" "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_" "xtheadfmemidx_xtheadmac_xtheadmemidx_" - "xtheadmempair_xtheadsync_xtheadvdot", + "xtheadmempair_xtheadsync", "xt-c908") RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" @@ -121,7 +121,7 @@ RISCV_CORE("xt-c910", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" "xtheadmemidx_xtheadmempair_xtheadsync", "xt-c910") RISCV_CORE("xt-c910v2", "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_" - "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zicsr_zifencei_zihintntl_zihintpause_zihpm_" "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_" "xtheadba_xtheadbb_xtheadbs_xtheadcmo_" @@ -135,13 +135,13 @@ RISCV_CORE("xt-c920", "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_" "xtheadvector", "xt-c910") RISCV_CORE("xt-c920v2", "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_" - "zicsr_zifencei _zihintntl_zihintpause_zihpm_" + "zicsr_zifencei_zihintntl_zihintpause_zihpm_" "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_" "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_" "svinval_svnapot_svpbmt_xtheadba_xtheadbb_" "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_" "xtheadmac_xtheadmemidx_xtheadmempair_" - "xtheadsync_xtheadvdot", + "xtheadsync", "xt-c920v2") RISCV_CORE("tt-ascalon-d8", "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_" diff --git a/gcc/config/riscv/riscv-ext-mips.def b/gcc/config/riscv/riscv-ext-mips.def index 5d7836d59998..132f6c1060d5 100644 --- a/gcc/config/riscv/riscv-ext-mips.def +++ b/gcc/config/riscv/riscv-ext-mips.def @@ -33,3 +33,16 @@ DEFINE_RISCV_EXT ( /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED, /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED, /* EXTRA_EXTENSION_FLAGS. */ 0) + +DEFINE_RISCV_EXT ( + /* NAME. */ xmipscbop, + /* UPPERCASE_NAME. */ XMIPSCBOP, + /* FULL_NAME. */ "Mips Prefetch extension", + /* DESC. */ "", + /* URL. */ , + /* DEP_EXTS. */ ({}), + /* SUPPORTED_VERSIONS. */ ({{1, 0}}), + /* FLAG_GROUP. */ xmips, + /* BITMASK_GROUP_ID. */ BITMASK_NOT_YET_ALLOCATED, + /* BITMASK_BIT_POSITION. */ BITMASK_NOT_YET_ALLOCATED, + /* EXTRA_EXTENSION_FLAGS. */ 0) diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt index 26d6e683acd6..ced05d22311e 100644 --- a/gcc/config/riscv/riscv-ext.opt +++ b/gcc/config/riscv/riscv-ext.opt @@ -449,3 +449,5 @@ Mask(XTHEADVECTOR) Var(riscv_xthead_subext) Mask(XVENTANACONDOPS) Var(riscv_xventana_subext) Mask(XMIPSCMOV) Var(riscv_xmips_subext) + +Mask(XMIPSCBOP) Var(riscv_xmips_subext) diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index 539321ff95b8..2d60a0ad44b3 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -143,6 +143,7 @@ extern void riscv_expand_sstrunc (rtx, rtx); extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t); extern bool synthesize_ior_xor (rtx_code, rtx [3]); extern bool synthesize_and (rtx [3]); +extern bool synthesize_add (rtx [3]); #ifdef RTX_CODE extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0); @@ -830,16 +831,18 @@ extern bool th_print_operand_address (FILE *, machine_mode, rtx); extern bool strided_load_broadcast_p (void); extern bool riscv_use_divmod_expander (void); -void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); +void riscv_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, tree, int); extern bool riscv_option_valid_attribute_p (tree, tree, tree, int); extern bool riscv_option_valid_version_attribute_p (tree, tree, tree, int); extern bool -riscv_process_target_version_attr (tree, location_t); +riscv_process_target_version_attr (tree, location_t *); extern void riscv_override_options_internal (struct gcc_options *); extern void riscv_option_override (void); +extern rtx riscv_prefetch_cookie (rtx, rtx); +extern bool riscv_prefetch_offset_address_p (rtx, machine_mode); struct riscv_tune_param; /* Information about one micro-arch we know about. */ diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h index a35537d7754a..4cd860fee59b 100644 --- a/gcc/config/riscv/riscv-subset.h +++ b/gcc/config/riscv/riscv-subset.h @@ -52,8 +52,9 @@ class riscv_subset_list /* Original arch string. */ const char *m_arch; - /* Location of arch string, used for report error. */ - location_t m_loc; + /* A pointer to the location that should be used for diagnostics, + or null if diagnostics should be suppressed. */ + location_t *m_loc; /* Head of subset info list. */ riscv_subset_t *m_head; @@ -70,7 +71,7 @@ class riscv_subset_list /* Allow adding the same extension more than once. */ bool m_allow_adding_dup; - riscv_subset_list (const char *, location_t); + riscv_subset_list (const char *, location_t *); const char *parsing_subset_version (const char *, const char *, unsigned *, unsigned *, bool, bool *); @@ -106,12 +107,12 @@ class riscv_subset_list riscv_subset_list *clone () const; - static riscv_subset_list *parse (const char *, location_t); + static riscv_subset_list *parse (const char *, location_t *); const char *parse_single_ext (const char *, bool exact_single_p = true); int match_score (riscv_subset_list *) const; - void set_loc (location_t); + void set_loc (location_t *); void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; } @@ -182,7 +183,7 @@ extern void riscv_set_arch_by_subset_list (riscv_subset_list *, struct gcc_options *); extern bool riscv_minimal_hwprobe_feature_bits (const char *, struct riscv_feature_bits *, - location_t); + location_t *); extern bool riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *); diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc index 8ad3025579b2..5e01c9214548 100644 --- a/gcc/config/riscv/riscv-target-attr.cc +++ b/gcc/config/riscv/riscv-target-attr.cc @@ -34,7 +34,7 @@ namespace { class riscv_target_attr_parser { public: - riscv_target_attr_parser (location_t loc) + riscv_target_attr_parser (location_t *loc) : m_found_arch_p (false) , m_found_tune_p (false) , m_found_cpu_p (false) @@ -62,7 +62,7 @@ class riscv_target_attr_parser bool m_found_cpu_p; bool m_found_priority_p; riscv_subset_list *m_subset_list; - location_t m_loc; + location_t *m_loc; const riscv_cpu_info *m_cpu_info; const char *m_tune; int m_priority; @@ -102,15 +102,17 @@ riscv_target_attr_parser::parse_arch (const char *str) { if (TARGET_64BIT && strncmp ("32", str + 2, strlen ("32")) == 0) { - error_at (m_loc, "unexpected arch for % attribute: " - "must start with rv64 but found %qs", str); + if (m_loc) + error_at (*m_loc, "unexpected arch for % attribute: " + "must start with rv64 but found %qs", str); goto fail; } if (!TARGET_64BIT && strncmp ("64", str + 2, strlen ("64")) == 0) { - error_at (m_loc, "unexpected arch for % attribute: " - "must start with rv32 but found %qs", str); + if (m_loc) + error_at (*m_loc, "unexpected arch for % attribute: " + "must start with rv32 but found %qs", str); goto fail; } @@ -140,10 +142,9 @@ riscv_target_attr_parser::parse_arch (const char *str) { if (token[0] != '+') { - error_at ( - m_loc, - "unexpected arch for % attribute: must start " - "with + or rv"); + if (*m_loc) + error_at (*m_loc, "unexpected arch for % " + "attribute: must start with + or rv"); goto fail; } @@ -151,10 +152,9 @@ riscv_target_attr_parser::parse_arch (const char *str) /* Check parse_single_ext has consume all string. */ if (*result != '\0') { - error_at ( - m_loc, - "unexpected arch for % attribute: bad " - "string found %qs", token); + if (m_loc) + error_at (*m_loc, "unexpected arch for % " + "attribute: bad string found %qs", token); goto fail; } @@ -179,8 +179,8 @@ riscv_target_attr_parser::parse_arch (const char *str) bool riscv_target_attr_parser::handle_arch (const char *str) { - if (m_found_arch_p) - error_at (m_loc, "% attribute: arch appears more than once"); + if (m_found_arch_p && m_loc) + error_at (*m_loc, "% attribute: arch appears more than once"); m_found_arch_p = true; return parse_arch (str); } @@ -190,15 +190,16 @@ riscv_target_attr_parser::handle_arch (const char *str) bool riscv_target_attr_parser::handle_cpu (const char *str) { - if (m_found_cpu_p) - error_at (m_loc, "% attribute: cpu appears more than once"); + if (m_found_cpu_p && m_loc) + error_at (*m_loc, "% attribute: cpu appears more than once"); m_found_cpu_p = true; const riscv_cpu_info *cpu_info = riscv_find_cpu (str); if (!cpu_info) { - error_at (m_loc, "% attribute: unknown CPU %qs", str); + if (m_loc) + error_at (*m_loc, "% attribute: unknown CPU %qs", str); return false; } @@ -218,14 +219,15 @@ riscv_target_attr_parser::handle_cpu (const char *str) bool riscv_target_attr_parser::handle_tune (const char *str) { - if (m_found_tune_p) - error_at (m_loc, "% attribute: tune appears more than once"); + if (m_found_tune_p && m_loc) + error_at (*m_loc, "% attribute: tune appears more than once"); m_found_tune_p = true; const struct riscv_tune_info *tune = riscv_parse_tune (str, true); if (tune == nullptr) { - error_at (m_loc, "% attribute: unknown TUNE %qs", str); + if (m_loc) + error_at (*m_loc, "% attribute: unknown TUNE %qs", str); return false; } @@ -237,13 +239,15 @@ riscv_target_attr_parser::handle_tune (const char *str) bool riscv_target_attr_parser::handle_priority (const char *str) { - if (m_found_priority_p) - error_at (m_loc, "% attribute: priority appears more than once"); + if (m_found_priority_p && m_loc) + error_at (*m_loc, "% attribute: priority appears " + "more than once"); m_found_priority_p = true; if (sscanf (str, "%d", &m_priority) != 1) { - error_at (m_loc, "% attribute: invalid priority %qs", str); + if (m_loc) + error_at (*m_loc, "% attribute: invalid priority %qs", str); return false; } @@ -282,7 +286,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const static bool riscv_process_one_target_attr (char *arg_str, - location_t loc, + location_t *loc, riscv_target_attr_parser &attr_parser, const struct riscv_attribute_info *attrs) { @@ -290,7 +294,8 @@ riscv_process_one_target_attr (char *arg_str, if (len == 0) { - error_at (loc, "malformed % attribute"); + if (loc) + error_at (*loc, "malformed % attribute"); return false; } @@ -302,10 +307,9 @@ riscv_process_one_target_attr (char *arg_str, if (!arg) { - error_at ( - loc, - "attribute % does not accept an argument", - str_to_check); + if (loc) + error_at (*loc, "attribute % does not " + "accept an argument", str_to_check); return false; } @@ -324,7 +328,8 @@ riscv_process_one_target_attr (char *arg_str, return (&attr_parser->*attr->handler) (arg); } - error_at (loc, "Got unknown attribute %", str_to_check); + if (loc) + error_at (*loc, "Got unknown attribute %", str_to_check); return false; } @@ -347,11 +352,12 @@ num_occurrences_in_str (char c, char *str) } /* Parse the string in ARGS that contains the target attribute information - and update the global target options space. */ + and update the global target options space. If LOC is nonnull, report + diagnostics against location *LOC, otherwise remain silent. */ bool riscv_process_target_attr (const char *args, - location_t loc, + location_t *loc, const struct riscv_attribute_info *attrs) { size_t len = strlen (args); @@ -387,8 +393,8 @@ riscv_process_target_attr (const char *args, if (num_attrs != num_semicolons + 1) { - error_at (loc, "malformed % attribute", - args); + if (loc) + error_at (*loc, "malformed % attribute", args); return false; } @@ -399,11 +405,12 @@ riscv_process_target_attr (const char *args, } /* Parse the tree in ARGS that contains the target attribute information - and update the global target options space. */ + and update the global target options space. If LOC is nonnull, report + diagnostics against *LOC, otherwise remain silent. */ static bool riscv_process_target_attr (tree args, - location_t loc, + location_t *loc, const struct riscv_attribute_info *attrs) { if (TREE_CODE (args) == TREE_LIST) @@ -424,7 +431,8 @@ riscv_process_target_attr (tree args, if (TREE_CODE (args) != STRING_CST) { - error_at (loc, "attribute % argument not a string"); + if (loc) + error_at (*loc, "attribute % argument not a string"); return false; } @@ -466,7 +474,7 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int) TREE_TARGET_OPTION (target_option_default_node)); /* Now we can parse the attributes and set &global_options accordingly. */ - ret = riscv_process_target_attr (args, loc, riscv_target_attrs); + ret = riscv_process_target_attr (args, &loc, riscv_target_attrs); if (ret) { riscv_override_options_internal (&global_options); @@ -481,16 +489,19 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int) } /* Parse the tree in ARGS that contains the target_version attribute - information and update the global target options space. */ + information and update the global target options space. If LOC is nonnull, + report diagnostics against *LOC, otherwise remain silent. */ bool -riscv_process_target_version_attr (tree args, location_t loc) +riscv_process_target_version_attr (tree args, location_t *loc) { if (TREE_CODE (args) == TREE_LIST) { if (TREE_CHAIN (args)) { - error ("attribute % has multiple values"); + if (loc) + error_at (*loc, "attribute % " + "has multiple values"); return false; } args = TREE_VALUE (args); @@ -498,7 +509,8 @@ riscv_process_target_version_attr (tree args, location_t loc) if (!args || TREE_CODE (args) != STRING_CST) { - error ("attribute % argument not a string"); + if (loc) + error_at (*loc, "attribute % argument not a string"); return false; } @@ -541,7 +553,7 @@ riscv_option_valid_version_attribute_p (tree fndecl, tree, tree args, int) cl_target_option_restore (&global_options, &global_options_set, TREE_TARGET_OPTION (target_option_current_node)); - ret = riscv_process_target_version_attr (args, loc); + ret = riscv_process_target_version_attr (args, &loc); /* Set up any additional state. */ if (ret) diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index c9c832827325..edfb4ff4ba60 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -954,6 +954,26 @@ emit_vlmax_masked_gather_mu_insn (rtx target, rtx op, rtx sel, rtx mask) emit_vlmax_insn (icode, BINARY_OP_TAMU, ops); } +/* Function to emit a vslide1up instruction of mode MODE with destination + DEST and slideup element ELT. */ + +rtx +expand_slide1up (machine_mode mode, rtx dest, rtx elt) +{ + unsigned int unspec + = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; + insn_code icode = code_for_pred_slide (unspec, mode); + /* RVV Spec 16.3.1 + The destination vector register group for vslideup cannot overlap the + source vector register group, otherwise the instruction encoding + is reserved. Thus, we need a new register. */ + rtx tmp = gen_reg_rtx (mode); + rtx ops[] = {tmp, dest, elt}; + emit_vlmax_insn (icode, BINARY_OP, ops); + return tmp; +} + + /* According to RVV ISA spec (16.5.1. Synthesizing vdecompress): https://github.com/riscv/riscv-v-spec/blob/master/v-spec.adoc @@ -1175,16 +1195,7 @@ expand_vector_init_trailing_same_elem (rtx target, { rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1)); for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--) - { - unsigned int unspec - = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; - insn_code icode = code_for_pred_slide (unspec, mode); - rtx tmp = gen_reg_rtx (mode); - rtx ops[] = {tmp, dup, builder.elt (i)}; - emit_vlmax_insn (icode, BINARY_OP, ops); - /* slide1up need source and dest to be different REG. */ - dup = tmp; - } + dup = expand_slide1up (mode, dup, builder.elt (i)); emit_move_insn (target, dup); return true; @@ -1717,6 +1728,77 @@ expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder) gcc_unreachable (); } +/* We don't actually allow this case in legitimate_constant_p but + the middle-end still expects us to handle it in an expander + (see PR121334). This is assumed to happen very rarely so the + implementation is not very efficient, particularly + for short vectors. +*/ + +static void +expand_const_vector_onestep (rtx target, rvv_builder &builder) +{ + machine_mode mode = GET_MODE (target); + gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT); + gcc_assert (builder.nelts_per_pattern () == 2); + + /* We have n encoded patterns + {csta_0, cstb_0}, + {csta_1, cstb_1}, + ... + {csta_{n-1}, cstb_{n-1}} + which should become one vector: + {csta_0, csta_1, ..., csta_{n-1}, + cstb_0, cstb_1, ..., cstb_{n-1}, + ... + cstb_0, cstb_1, ..., cstb_{n-1}}. + + In order to achieve this we create a permute/gather constant + sel = {0, 1, ..., n - 1, 0, 1, ..., n - 1, ...} + and two vectors + va = {csta_0, csta_1, ..., csta_{n-1}}, + vb = {cstb_0, cstb_1, ..., cstb_{n-1}}. + + Then we use a VLMAX gather to "broadcast" vb and afterwards + overwrite the first n elements with va. */ + + int n = builder.npatterns (); + /* { 0, 1, 2, ..., n - 1 }. */ + rtx vid = gen_reg_rtx (mode); + expand_vec_series (vid, const0_rtx, const1_rtx); + + /* { 0, 1, ..., n - 1, 0, 1, ..., n - 1, ... }. */ + rtx sel = gen_reg_rtx (mode); + rtx and_ops[] = {sel, vid, GEN_INT (n)}; + emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops); + + /* va = { ELT (0), ELT (1), ... ELT (n - 1) }. */ + rtx tmp1 = gen_reg_rtx (mode); + rtx ops1[] = {tmp1, builder.elt (0)}; + expand_broadcast (mode, ops1); + for (int i = 1; i < n; i++) + tmp1 = expand_slide1up (mode, tmp1, builder.elt (i)); + + /* vb = { ELT (n), ELT (n + 1), ... ELT (2 * n - 1) }. */ + rtx tmp2 = gen_reg_rtx (mode); + rtx ops2[] = {tmp2, builder.elt (n)}; + expand_broadcast (mode, ops2); + for (int i = 1; i < n; i++) + tmp2 = expand_slide1up (mode, tmp2, builder.elt (n + i)); + + /* Duplicate vb. */ + rtx tmp3 = gen_reg_rtx (mode); + emit_vlmax_gather_insn (tmp3, tmp2, sel); + + /* Overwrite the first n - 1 elements with va. */ + rtx dest = gen_reg_rtx (mode); + insn_code icode = code_for_pred_mov (mode); + rtx ops3[] = {dest, tmp3, tmp1}; + emit_nonvlmax_insn (icode, __MASK_OP_TUMA | UNARY_OP_P, ops3, GEN_INT (n)); + + emit_move_insn (target, dest); +} + static void expand_const_vector (rtx target, rtx src) { @@ -1744,6 +1826,8 @@ expand_const_vector (rtx target, rtx src) if (CONST_VECTOR_DUPLICATE_P (src)) return expand_const_vector_duplicate (target, &builder); + else if (CONST_VECTOR_NELTS_PER_PATTERN (src) == 2) + return expand_const_vector_onestep (target, builder); else if (CONST_VECTOR_STEPPED_P (src)) return expand_const_vector_stepped (target, src, &builder); @@ -2648,8 +2732,14 @@ expand_vector_init_merge_repeating_sequence (rtx target, = get_repeating_sequence_dup_machine_mode (builder, mask_bit_mode); uint64_t full_nelts = builder.full_nelts ().to_constant (); + gcc_assert (builder.nelts_per_pattern () == 1 + || builder.nelts_per_pattern () == 2); + + rtx first + = builder.nelts_per_pattern () == 1 ? builder.elt (0) : builder.elt (1); + /* Step 1: Broadcast the first pattern. */ - rtx ops[] = {target, force_reg (builder.inner_mode (), builder.elt (0))}; + rtx ops[] = {target, force_reg (builder.inner_mode (), first)}; expand_broadcast (builder.mode (), ops); /* Step 2: Merge the rest iteration of pattern. */ for (unsigned int i = 1; i < builder.npatterns (); i++) @@ -2677,7 +2767,10 @@ expand_vector_init_merge_repeating_sequence (rtx target, emit_move_insn (mask, gen_lowpart (mask_bit_mode, dup)); /* Step 2-2: Merge pattern according to the mask. */ - rtx ops[] = {target, target, builder.elt (i), mask}; + unsigned int which = i; + if (builder.nelts_per_pattern () == 2) + which = 2 * which + 1; + rtx ops[] = {target, target, builder.elt (which), mask}; emit_vlmax_insn (code_for_pred_merge_scalar (GET_MODE (target)), MERGE_OP, ops); } @@ -4078,11 +4171,7 @@ shuffle_off_by_one_patterns (struct expand_vec_perm_d *d) emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode)); /* Insert the scalar into element 0. */ - unsigned int unspec - = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP; - insn_code icode = code_for_pred_slide (unspec, d->vmode); - rtx ops[] = {d->target, d->op1, tmp}; - emit_vlmax_insn (icode, BINARY_OP, ops); + expand_slide1up (d->vmode, d->op1, tmp); } return true; diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc index 44ef44a14353..5e6cb671490c 100644 --- a/gcc/config/riscv/riscv-vector-costs.cc +++ b/gcc/config/riscv/riscv-vector-costs.cc @@ -607,7 +607,7 @@ costs::need_additional_vector_vars_p (stmt_vec_info stmt_info, if (type == load_vec_info_type || type == store_vec_info_type) { if (STMT_VINFO_GATHER_SCATTER_P (stmt_info) - && SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_GATHER_SCATTER) + && mat_gather_scatter_p (SLP_TREE_MEMORY_ACCESS_TYPE (node))) return true; machine_mode mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)); diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc index e0d8904c1bf8..985fe67f8227 100644 --- a/gcc/config/riscv/riscv.cc +++ b/gcc/config/riscv/riscv.cc @@ -3685,7 +3685,8 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src) /* This test can fail if (for example) we want a HF and Z[v]fh is not enabled. In that case we just want to let the standard expansion path run. */ - if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode)) + if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode) + && gen_lowpart_common (vmode, SUBREG_REG (src))) { rtx v = gen_lowpart (vmode, SUBREG_REG (src)); rtx int_reg = dest; @@ -3958,41 +3959,6 @@ riscv_extend_cost (rtx op, bool unsigned_p) return COSTS_N_INSNS (2); } -/* Return the cost of the vector binary rtx like add, minus, mult. - The cost of scalar2vr_cost will be appended if there one of the - op comes from the VEC_DUPLICATE. */ - -static int -get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost) -{ - gcc_assert (riscv_v_ext_mode_p (GET_MODE (x))); - - rtx neg; - rtx op_0; - rtx op_1; - - if (GET_CODE (x) == UNSPEC) - { - op_0 = XVECEXP (x, 0, 0); - op_1 = XVECEXP (x, 0, 1); - } - else - { - op_0 = XEXP (x, 0); - op_1 = XEXP (x, 1); - } - - if (GET_CODE (op_0) == VEC_DUPLICATE - || GET_CODE (op_1) == VEC_DUPLICATE) - return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else if (GET_CODE (neg = op_0) == NEG - && (GET_CODE (op_1) == VEC_DUPLICATE - || GET_CODE (XEXP (neg, 0)) == VEC_DUPLICATE)) - return (scalar2vr_cost + 1) * COSTS_N_INSNS (1); - else - return COSTS_N_INSNS (1); -} - /* Implement TARGET_RTX_COSTS. */ #define SINGLE_SHIFT_COST 1 @@ -4014,73 +3980,20 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN { case SET: { - switch (GET_CODE (x)) + if (GET_CODE (x) == VEC_DUPLICATE) + *total = (scalar2vr_cost + 1) * COSTS_N_INSNS (1); + else { - case VEC_DUPLICATE: - *total = gr2vr_cost * COSTS_N_INSNS (1); - break; - case IF_THEN_ELSE: - { - rtx op = XEXP (x, 1); + int vec_dup_count = 0; + subrtx_var_iterator::array_type array; - switch (GET_CODE (op)) - { - case DIV: - case UDIV: - case MOD: - case UMOD: - case US_PLUS: - case US_MINUS: - case SS_PLUS: - case SS_MINUS: - *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); - break; - case UNSPEC: - { - switch (XINT (op, 1)) - { - case UNSPEC_VAADDU: - case UNSPEC_VAADD: - *total - = get_vector_binary_rtx_cost (op, scalar2vr_cost); - break; - default: - *total = COSTS_N_INSNS (1); - break; - } - } - break; - default: - *total = COSTS_N_INSNS (1); - break; - } - } - break; - case PLUS: - case MINUS: - case AND: - case IOR: - case XOR: - case MULT: - case SMAX: - case UMAX: - case SMIN: - case UMIN: - { - rtx op; - rtx op_0 = XEXP (x, 0); - rtx op_1 = XEXP (x, 1); + FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) + if (GET_CODE (*iter) == VEC_DUPLICATE) + vec_dup_count++; - if (GET_CODE (op = op_0) == MULT - || GET_CODE (op = op_1) == MULT) - *total = get_vector_binary_rtx_cost (op, scalar2vr_cost); - else - *total = get_vector_binary_rtx_cost (x, scalar2vr_cost); - } - break; - default: - *total = COSTS_N_INSNS (1); - break; + int total_vec_dup_cost = vec_dup_count * scalar2vr_cost; + + *total = COSTS_N_INSNS (1) * (total_vec_dup_cost + 1); } } break; @@ -5532,9 +5445,9 @@ canonicalize_comparands (rtx_code code, rtx *op0, rtx *op1) /* We might have been handed back a SUBREG. Just to make things easy, force it into a REG. */ - if (!REG_P (*op0) && !CONST_INT_P (*op0)) + if (!REG_P (*op0) && !CONST_INT_P (*op0) && INTEGRAL_MODE_P (GET_MODE (*op0))) *op0 = force_reg (word_mode, *op0); - if (!REG_P (*op1) && !CONST_INT_P (*op1)) + if (!REG_P (*op1) && !CONST_INT_P (*op1) && INTEGRAL_MODE_P (GET_MODE (*op1))) *op1 = force_reg (word_mode, *op1); } @@ -6213,7 +6126,8 @@ riscv_pass_vls_aggregate_in_gpr (struct riscv_arg_info *info, machine_mode mode, For a library call, FNTYPE is 0. */ void -riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx, tree, int) +riscv_init_cumulative_args (CUMULATIVE_ARGS *cum, const_tree fntype, + rtx, tree, int) { memset (cum, 0, sizeof (*cum)); @@ -6494,30 +6408,44 @@ riscv_arg_partial_bytes (cumulative_args_t cum, return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; } -/* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, - VALTYPE is the return type and MODE is VOIDmode. For libcalls, - VALTYPE is null and MODE is the mode of the return value. */ +/* Implements hook TARGET_FUNCTION_VALUE. */ rtx -riscv_function_value (const_tree type, const_tree func, machine_mode mode) +riscv_function_value (const_tree ret_type, const_tree fn_decl_or_type, + bool) { struct riscv_arg_info info; CUMULATIVE_ARGS args; - if (type) + if (fn_decl_or_type) { - int unsigned_p = TYPE_UNSIGNED (type); + const_tree fntype = TREE_CODE (fn_decl_or_type) == FUNCTION_DECL ? + TREE_TYPE (fn_decl_or_type) : fn_decl_or_type; + riscv_init_cumulative_args (&args, fntype, NULL_RTX, NULL_TREE, 0); + } + else + memset (&args, 0, sizeof args); - mode = TYPE_MODE (type); + int unsigned_p = TYPE_UNSIGNED (ret_type); - /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, - return values, promote the mode here too. */ - mode = promote_function_mode (type, mode, &unsigned_p, func, 1); - } + machine_mode mode = TYPE_MODE (ret_type); - memset (&args, 0, sizeof args); + /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, + return values, promote the mode here too. */ + mode = promote_function_mode (ret_type, mode, &unsigned_p, fn_decl_or_type, 1); - return riscv_get_arg_info (&info, &args, mode, type, true, true); + return riscv_get_arg_info (&info, &args, mode, ret_type, true, true); +} + +/* Implements hook TARGET_LIBCALL_VALUE. */ + +rtx +riscv_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) +{ + struct riscv_arg_info info; + CUMULATIVE_ARGS args; + memset (&args, 0, sizeof args); + return riscv_get_arg_info (&info, &args, mode, NULL_TREE, true, true); } /* Implement TARGET_PASS_BY_REFERENCE. */ @@ -14037,10 +13965,13 @@ riscv_c_mode_for_floating_type (enum tree_index ti) return default_mode_for_floating_type (ti); } -/* This parses the attribute arguments to target_version in DECL and modifies - the feature mask and priority required to select those targets. */ +/* Parse the attribute arguments to target_version in DECL and modify + the feature mask and priority required to select those targets. + If LOC is nonnull, report diagnostics against *LOC, otherwise + remain silent. */ static void parse_features_for_version (tree decl, + location_t *loc, struct riscv_feature_bits &res, int &priority) { @@ -14071,14 +14002,12 @@ parse_features_for_version (tree decl, cl_target_option_restore (&global_options, &global_options_set, default_opts); - riscv_process_target_version_attr (TREE_VALUE (version_attr), - DECL_SOURCE_LOCATION (decl)); + riscv_process_target_version_attr (TREE_VALUE (version_attr), loc); priority = global_options.x_riscv_fmv_priority; const char *arch_string = global_options.x_riscv_arch_string; bool parse_res - = riscv_minimal_hwprobe_feature_bits (arch_string, &res, - DECL_SOURCE_LOCATION (decl)); + = riscv_minimal_hwprobe_feature_bits (arch_string, &res, loc); gcc_assert (parse_res); cl_target_option_restore (&global_options, &global_options_set, @@ -14135,8 +14064,8 @@ riscv_compare_version_priority (tree decl1, tree decl2) struct riscv_feature_bits mask1, mask2; int prio1, prio2; - parse_features_for_version (decl1, mask1, prio1); - parse_features_for_version (decl2, mask2, prio2); + parse_features_for_version (decl1, nullptr, mask1, prio1); + parse_features_for_version (decl2, nullptr, mask2, prio2); return compare_fmv_features (mask1, mask2, prio1, prio2); } @@ -14439,6 +14368,7 @@ dispatch_function_versions (tree dispatch_decl, version_info.version_decl = version_decl; // Get attribute string, parse it and find the right features. parse_features_for_version (version_decl, + &DECL_SOURCE_LOCATION (version_decl), version_info.features, version_info.prio); function_versions.push_back (version_info); @@ -15441,6 +15371,131 @@ synthesize_and (rtx operands[3]) return true; } +/* Synthesize OPERANDS[0] = OPERANDS[1] + OPERANDS[2]. + + OPERANDS[0] and OPERANDS[1] will be a REG and may be the same + REG. + + OPERANDS[2] is a CONST_INT. + + Return TRUE if the operation was fully synthesized and the caller + need not generate additional code. Return FALSE if the operation + was not synthesized and the caller is responsible for emitting the + proper sequence. */ + +bool +synthesize_add (rtx operands[3]) +{ + /* Trivial cases that don't need synthesis. */ + if (SMALL_OPERAND (INTVAL (operands[2]))) + return false; + + int budget1 = riscv_const_insns (operands[2], true); + int budget2 = riscv_const_insns (GEN_INT (-INTVAL (operands[2])), true); + + HOST_WIDE_INT ival = INTVAL (operands[2]); + + /* If we can emit two addi insns then that's better than synthesizing + the constant into a temporary, then adding the temporary to the + other input. The exception is when the constant can be loaded + in a single instruction which can issue whenever its convenient. */ + if (SUM_OF_TWO_S12 (ival) && budget1 >= 2) + { + HOST_WIDE_INT saturated = HOST_WIDE_INT_M1U << (IMM_BITS - 1); + + if (ival >= 0) + saturated = ~saturated; + + ival -= saturated; + + rtx x = gen_rtx_PLUS (word_mode, operands[1], GEN_INT (saturated)); + emit_insn (gen_rtx_SET (operands[0], x)); + rtx output = gen_rtx_PLUS (word_mode, operands[0], GEN_INT (ival)); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* If we can shift the constant by 1, 2, or 3 bit positions + and the result is a cheaper constant, then do so. */ + ival = INTVAL (operands[2]); + if (TARGET_ZBA + && (((ival % 2) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 1), true)) + || ((ival % 4) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 2), true)) + || ((ival % 8) == 0 && budget1 + > riscv_const_insns (GEN_INT (ival >> 3), true)))) + { + // Load the shifted constant into a temporary + int shct = ctz_hwi (ival); + + /* We can handle shifting up to 3 bit positions via shNadd. */ + if (shct > 3) + shct = 3; + + /* The adjusted constant may still need synthesis, so do not copy + it directly into register. Let the expander handle it. */ + rtx tmp = force_reg (word_mode, GEN_INT (ival >> shct)); + + /* Generate shift-add of temporary and operands[1] + into the final destination. */ + rtx x = gen_rtx_ASHIFT (word_mode, tmp, GEN_INT (shct)); + rtx output = gen_rtx_PLUS (word_mode, x, operands[1]); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* If the negated constant is cheaper than the original, then negate + the constant and use sub. */ + if (budget2 < budget1) + { + // load -INTVAL (operands[2]) into a temporary + rtx tmp = force_reg (word_mode, GEN_INT (-INTVAL (operands[2]))); + + // subtract operads[2] from operands[1] + rtx output = gen_rtx_MINUS (word_mode, operands[1], tmp); + emit_insn (gen_rtx_SET (operands[0], output)); + return true; + } + + /* No add synthesis was found. Synthesize the constant into + a temporary and use that. */ + rtx x = force_reg (word_mode, operands[2]); + x = gen_rtx_PLUS (word_mode, operands[1], x); + emit_insn (gen_rtx_SET (operands[0], x)); + return true; +} + +/* + HINT : argument specify the target cache + + TODO : LOCALITY is unused. + + Return the first operand of the associated PREF or PREFX insn. */ +rtx +riscv_prefetch_cookie (rtx hint, rtx locality) +{ + return (GEN_INT (INTVAL (hint) + + CacheHint::DCACHE_HINT + INTVAL (locality) * 0)); +} + +/* Return true if X is a legitimate address with offset for prefetch. + MODE is the mode of the value being accessed. */ +bool +riscv_prefetch_offset_address_p (rtx x, machine_mode mode) +{ + struct riscv_address_info addr; + + if (riscv_classify_address (&addr, x, mode, false) + && addr.type == ADDRESS_REG) + { + if (TARGET_XMIPSCBOP) + return (CONST_INT_P (addr.offset) + && MIPS_RISCV_9BIT_OFFSET_P (INTVAL (addr.offset))); + } + + return true; +} /* Initialize the GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -15804,6 +15859,12 @@ synthesize_and (rtx operands[3]) #undef TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P #define TARGET_VECTOR_MODE_SUPPORTED_ANY_TARGET_P riscv_vector_mode_supported_any_target_p +#undef TARGET_FUNCTION_VALUE +#define TARGET_FUNCTION_VALUE riscv_function_value + +#undef TARGET_LIBCALL_VALUE +#define TARGET_LIBCALL_VALUE riscv_libcall_value + #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P riscv_function_value_regno_p diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h index 45fa521f219f..9146571908f5 100644 --- a/gcc/config/riscv/riscv.h +++ b/gcc/config/riscv/riscv.h @@ -71,7 +71,7 @@ extern const char *riscv_arch_help (int argc, const char **argv); {"tune", "%{!mtune=*:" \ " %{!mcpu=*:-mtune=%(VALUE)}" \ " %{mcpu=*:-mtune=%:riscv_default_mtune(%* %(VALUE))}}" }, \ - {"arch", "%{!march=*:" \ + {"arch", "%{!march=*|march=unset:" \ " %{!mcpu=*:-march=%(VALUE)}" \ " %{mcpu=*:%:riscv_expand_arch_from_cpu(%* %(VALUE))}}" }, \ {"abi", "%{!mabi=*:-mabi=%(VALUE)}" }, \ @@ -111,13 +111,19 @@ extern const char *riscv_arch_help (int argc, const char **argv); %(subtarget_asm_spec)" \ ASM_MISA_SPEC +/* Drop all -march=* options before -march=unset. */ +#define ARCH_UNSET_CLEANUP_SPECS \ + "%{march=unset:%mode); rtx coeff_reg = gen_reg_rtx (mode); rtx tmp_reg = gen_reg_rtx (mode); + rtx fflags = gen_reg_rtx (SImode); riscv_emit_move (tmp_reg, operands[1]); riscv_emit_move (coeff_reg, riscv_vector::get_fp_rounding_coefficient (mode)); emit_insn (gen_abs2 (abs_reg, operands[1])); + /* fp compare can set invalid flag for NaN, so backup fflags. */ + if (flag_trapping_math) + emit_insn (gen_riscv_frflags (fflags)); riscv_expand_conditional_branch (label, LT, abs_reg, coeff_reg); emit_jump_insn (gen_jump (end_label)); @@ -2324,6 +2348,14 @@ emit_insn (gen_copysign3 (tmp_reg, abs_reg, operands[1])); emit_label (end_label); + + /* Restore fflags, but after label. This is slightly different + than glibc implementation which only needs to restore under + the label, since it checks for NaN first, meaning following fp + compare can't raise fp exceptons and thus not clobber fflags. */ + if (flag_trapping_math) + emit_insn (gen_riscv_fsflags (fflags)); + riscv_emit_move (operands[0], tmp_reg); } @@ -4402,11 +4434,21 @@ ) (define_insn "prefetch" - [(prefetch (match_operand 0 "prefetch_operand" "Qr") - (match_operand 1 "imm5_operand" "i") - (match_operand 2 "const_int_operand" "n"))] - "TARGET_ZICBOP" + [(prefetch (match_operand 0 "prefetch_operand" "Qr,ZD") + (match_operand 1 "imm5_operand" "i,i") + (match_operand 2 "const_int_operand" "n,n"))] + "TARGET_ZICBOP || TARGET_XMIPSCBOP" { + if (TARGET_XMIPSCBOP) + { + /* Mips Prefetch write is nop for p8700. */ + if (operands[1] != CONST0_RTX (GET_MODE (operands[1]))) + return "nop"; + + operands[1] = riscv_prefetch_cookie (operands[1], operands[2]); + return "mips.pref\t%1,%a0"; + } + switch (INTVAL (operands[1])) { case 0: diff --git a/gcc/config/riscv/sifive-p400.md b/gcc/config/riscv/sifive-p400.md index ed8b8ec9da7b..0acdbdab31e5 100644 --- a/gcc/config/riscv/sifive-p400.md +++ b/gcc/config/riscv/sifive-p400.md @@ -153,10 +153,13 @@ (eq_attr "type" "fmove,fcvt")) "p400_float_pipe,sifive_p400_fpu") +;; We need something for HF so that we don't abort during +;; scheduling if someone was to ask for p400 scheduling, but +;; enable the various HF mode extensions. (define_insn_reservation "sifive_p400_fdiv_s" 18 (and (eq_attr "tune" "sifive_p400") (eq_attr "type" "fdiv,fsqrt") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "sifive_p400_FM, sifive_p400_fdiv*5") (define_insn_reservation "sifive_p400_fdiv_d" 31 @@ -178,3 +181,18 @@ (define_bypass 1 "sifive_p400_f2i" "sifive_p400_branch,sifive_p400_sfb_alu,sifive_p400_mul, sifive_p400_div,sifive_p400_alu,sifive_p400_cpop") + + +;; Someone familiar with the p400 uarch needs to put +;; these into the right reservations. This is just a placeholder +;; for everything I found that had no mapping to a reservation. +;; +;; Note that even if the processor does not implementat a particular +;; instruction it should still have suitable reservations, even if +;; they are just dummies like this one. +(define_insn_reservation "sifive_p400_unknown" 1 + (and (eq_attr "tune" "sifive_p400") + (eq_attr "type" "ghost,vfrecp,vclmul,vldm,vmffs,vclmulh,vlsegde,vfcvtitof,vsm4k,vfcvtftoi,vfdiv,vsm3c,vsm4r,viwmuladd,vfwredu,vcpop,vfwmuladd,vstux,vsshift,vfwcvtftof,vfncvtftof,vfwmaccbf16,vext,vssegte,rdvl,vaeskf1,vfslide1up,vmov,vimovvx,vaesef,vfsqrt,viminmax,vfwcvtftoi,vssegtox,vfclass,viwmul,vector,vgmul,vsm3me,vfcmp,vstm,vfredo,vfwmul,vaeskf2,vstox,vfncvtbf16,vislide1up,vgather,vldox,viwred,vctz,vghsh,vsts,vslidedown,vfmerge,vicmp,vsmul,vlsegdff,vfalu,vfmov,vislide1down,vfminmax,vcompress,vldr,vldff,vlsegdux,vimuladd,vsalu,vidiv,sf_vqmacc,vfslide1down,vaesem,vimerge,vfncvtftoi,vfwcvtitof,vicalu,vaesz,sf_vc_se,vsha2cl,vmsfs,vldux,vmidx,vslideup,vired,vlde,vfwredo,vfmovfv,vbrev,vfncvtitof,rdfrm,vsetvl,vssegts,vimul,vialu,vbrev8,vfwalu,rdvlenb,sf_vfnrclip,vclz,vnclip,sf_vc,vimov,vste,vfmuladd,vfmovvf,vwsll,vsetvl_pre,vlds,vlsegds,vmiota,vmalu,wrvxrm,wrfrm,viwalu,vaesdm,vssegtux,vaesdf,vimovxv,vror,vnshift,vstr,vaalu,vsha2ms,crypto,vfwcvtbf16,vlsegdox,vrol,vandn,vfsgnj,vmpop,vfredu,vsha2ch,vshift,vrev8,vfmul")) + "p400_int_pipe+sifive_p400_ialu") + + diff --git a/gcc/config/riscv/sifive-p600.md b/gcc/config/riscv/sifive-p600.md index 240134976fa1..ccd006d16ed7 100644 --- a/gcc/config/riscv/sifive-p600.md +++ b/gcc/config/riscv/sifive-p600.md @@ -157,10 +157,13 @@ (eq_attr "type" "fmove,fcvt")) "float_pipe,sifive_p600_fpu") +;; We need something for HF so that we don't abort during +;; scheduling if someone was to ask for p600 scheduling, but +;; enable the various HF mode extensions. (define_insn_reservation "sifive_p600_fdiv_s" 11 (and (eq_attr "tune" "sifive_p600") (eq_attr "type" "fdiv,fsqrt") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "sifive_p600_FM, sifive_p600_fdiv*5") (define_insn_reservation "sifive_p600_fdiv_d" 19 @@ -182,3 +185,15 @@ (define_bypass 1 "sifive_p600_f2i" "sifive_p600_branch,sifive_p600_sfb_alu,sifive_p600_mul, sifive_p600_div,sifive_p600_alu,sifive_p600_cpop") + +;; Someone familiar with the p600 uarch needs to put +;; these into the right reservations. This is just a placeholder +;; for everything I found that had no mapping to a reservation. +;; +;; Note that even if the processor does not implementat a particular +;; instruction it should still have suitable reservations, even if +;; they are just dummies like this one. +(define_insn_reservation "sifive_p600_unknown" 1 + (and (eq_attr "tune" "sifive_p600") + (eq_attr "type" "vicmp,vssegte,vbrev8,vfwalu,vimov,vmpop,vaesdf,vislide1up,vror,vsha2cl,vrol,vslideup,vimuladd,vclmul,vaesef,vext,vlsegdff,vfmuladd,vfclass,vmsfs,vfcmp,vsmul,vsm3me,vmalu,vshift,viwmuladd,vfslide1up,vlsegde,vsm4k,wrvxrm,vislide1down,vsm3c,vfwmuladd,vaesdm,vclmulh,vfwcvtftof,vfwredu,vfredo,sf_vfnrclip,vaesz,vwsll,vmiota,vctz,vsetvl_pre,vstm,vidiv,vssegtux,vfwmul,vcompress,vste,vired,vlsegds,vaesem,vfminmax,ghost,vandn,crypto,vfmul,vialu,vfmovvf,rdfrm,vldff,vfmerge,vsshift,vnclip,sf_vqmacc,vnshift,vfdiv,vfslide1down,vfncvtitof,vfsqrt,vimovxv,vstr,vfwcvtbf16,vfwcvtitof,vbrev,vssegtox,vssegts,vcpop,vmffs,viwmul,vldr,vmidx,rdvlenb,vfalu,vslidedown,vlde,vfsgnj,vfmov,viwalu,vsha2ch,vfncvtbf16,vfcvtitof,rdvl,vsetvl,vsha2ms,vector,vstux,vimerge,vclz,sf_vc,vfcvtftoi,viminmax,vsm4r,sf_vc_se,wrfrm,vstox,vfmovfv,vfncvtftoi,vimul,vsalu,vmov,vgmul,vgather,vldux,vlsegdox,vfncvtftof,vimovvx,vghsh,vldm,vldox,vfwcvtftoi,vlds,vfrecp,vaeskf2,vsts,vfredu,vicalu,vaalu,vfwmaccbf16,vrev8,vfwredo,vlsegdux,viwred,vaeskf1")) + "int_pipe+sifive_p600_ialu") diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md index 50ec8b38f723..e47bb41adcc2 100644 --- a/gcc/config/riscv/sync.md +++ b/gcc/config/riscv/sync.md @@ -386,13 +386,13 @@ }) (define_insn "amo_atomic_exchange" - [(set (match_operand:GPR 0 "register_operand" "=&r") + [(set (match_operand:GPR 0 "register_operand" "=r") (unspec_volatile:GPR [(match_operand:GPR 1 "memory_operand" "+A") (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_EXCHANGE)) (set (match_dup 1) - (match_operand:GPR 2 "register_operand" "0"))] + (match_operand:GPR 2 "reg_or_0_operand" "rJ"))] "TARGET_ZAAMO" "amoswap.%A3\t%0,%z2,%1" [(set_attr "type" "atomic") @@ -434,13 +434,13 @@ }) (define_insn "zabha_atomic_exchange" - [(set (match_operand:SHORT 0 "register_operand" "=&r") + [(set (match_operand:SHORT 0 "register_operand" "=r") (unspec_volatile:SHORT [(match_operand:SHORT 1 "memory_operand" "+A") (match_operand:SI 3 "const_int_operand")] ;; model UNSPEC_SYNC_EXCHANGE_ZABHA)) (set (match_dup 1) - (match_operand:SHORT 2 "register_operand" "0"))] + (match_operand:SHORT 2 "reg_or_0_operand" "rJ"))] "TARGET_ZABHA" "amoswap.%A3\t%0,%z2,%1" [(set_attr "type" "atomic") diff --git a/gcc/config/riscv/t-rtems b/gcc/config/riscv/t-rtems index f596e764f9d0..a4d2d03f5602 100644 --- a/gcc/config/riscv/t-rtems +++ b/gcc/config/riscv/t-rtems @@ -1,8 +1,8 @@ MULTILIB_OPTIONS = MULTILIB_DIRNAMES = -MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc -MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc +MULTILIB_OPTIONS += march=rv32i/march=rv32iac/march=rv32im/march=rv32imf/march=rv32ima/march=rv32imac/march=rv32imaf/march=rv32imafc/march=rv32imafd/march=rv32imafdc/march=rv64ima/march=rv64imac/march=rv64imafd/march=rv64imafdc/march=rv64imc +MULTILIB_DIRNAMES += rv32i rv32iac rv32im rv32imf rv32ima rv32imac rv32imaf rv32imafc rv32imafd rv32imafdc rv64ima rv64imac rv64imafd rv64imafdc rv64imc MULTILIB_OPTIONS += mabi=ilp32/mabi=ilp32f/mabi=ilp32d/mabi=lp64/mabi=lp64d MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d @@ -10,6 +10,9 @@ MULTILIB_DIRNAMES += ilp32 ilp32f ilp32d lp64 lp64d MULTILIB_OPTIONS += mcmodel=medany MULTILIB_DIRNAMES += medany +MULTILIB_OPTIONS += mstrict-align +MULTILIB_DIRNAMES += strict-align + MULTILIB_REQUIRED = MULTILIB_REQUIRED += march=rv32i/mabi=ilp32 MULTILIB_REQUIRED += march=rv32iac/mabi=ilp32 @@ -25,3 +28,5 @@ MULTILIB_REQUIRED += march=rv64ima/mabi=lp64/mcmodel=medany MULTILIB_REQUIRED += march=rv64imac/mabi=lp64/mcmodel=medany MULTILIB_REQUIRED += march=rv64imafd/mabi=lp64d/mcmodel=medany MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany +MULTILIB_REQUIRED += march=rv64imafdc/mabi=lp64d/mcmodel=medany/mstrict-align +MULTILIB_REQUIRED += march=rv64imc/mabi=lp64/mcmodel=medany/mstrict-align diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index 66b76701f5a8..e7ffeeb9cd64 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -5490,6 +5490,52 @@ "TARGET_VECTOR" {}) +(define_expand "@pred_mul_plus_vx_" + [(set (match_operand:V_VLSI_QHS 0 "register_operand") + (if_then_else:V_VLSI_QHS + (unspec: + [(match_operand: 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand: 2 "register_operand")) + (match_operand:V_VLSI_QHS 3 "register_operand")) + (match_operand:V_VLSI_QHS 4 "register_operand")) + (match_operand:V_VLSI_QHS 5 "vector_merge_operand")))] + "TARGET_VECTOR" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + +(define_expand "@pred_mul_plus_vx_" + [(set (match_operand:V_VLSI_D 0 "register_operand") + (if_then_else:V_VLSI_D + (unspec: + [(match_operand: 1 "vector_mask_operand") + (match_operand 6 "vector_length_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (match_operand 9 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand: 2 "register_operand")) + (match_operand:V_VLSI_D 3 "register_operand")) + (match_operand:V_VLSI_D 4 "register_operand")) + (match_operand:V_VLSI_D 5 "vector_merge_operand")))] + "TARGET_VECTOR && TARGET_64BIT" +{ + riscv_vector::prepare_ternary_operands (operands); +}) + (define_insn "*pred_madd_scalar" [(set (match_operand:V_VLSI 0 "register_operand" "=vd, vr") (if_then_else:V_VLSI @@ -6324,8 +6370,8 @@ (set_attr "mode" "")]) (define_insn "@pred__scalar" - [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VF + [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSF (unspec: [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") @@ -6336,11 +6382,11 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (commutative_float_binop:VF - (vec_duplicate:VF + (commutative_float_binop:V_VLSF + (vec_duplicate:V_VLSF (match_operand: 4 "register_operand" " f, f, f, f")) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vf.vf\t%0,%3,%4%p1" [(set_attr "type" "") @@ -6417,8 +6463,8 @@ (symbol_ref "riscv_vector::get_frm_mode (operands[9])"))]) (define_insn "@pred__reverse_scalar" - [(set (match_operand:VF 0 "register_operand" "=vd, vd, vr, vr") - (if_then_else:VF + [(set (match_operand:V_VLSF 0 "register_operand" "=vd, vd, vr, vr") + (if_then_else:V_VLSF (unspec: [(match_operand: 1 "vector_mask_operand" " vm, vm,Wc1,Wc1") (match_operand 5 "vector_length_operand" "rvl,rvl,rvl,rvl") @@ -6429,11 +6475,11 @@ (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM) (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE) - (non_commutative_float_binop:VF - (vec_duplicate:VF + (non_commutative_float_binop:V_VLSF + (vec_duplicate:V_VLSF (match_operand: 4 "register_operand" " f, f, f, f")) - (match_operand:VF 3 "register_operand" " vr, vr, vr, vr")) - (match_operand:VF 2 "vector_merge_operand" " vu, 0, vu, 0")))] + (match_operand:V_VLSF 3 "register_operand" " vr, vr, vr, vr")) + (match_operand:V_VLSF 2 "vector_merge_operand" " vu, 0, vu, 0")))] "TARGET_VECTOR" "vfr.vf\t%0,%3,%4%p1" [(set_attr "type" "") @@ -8839,6 +8885,56 @@ [(set_attr "type" "vssegtx") (set_attr "mode" "")]) +(define_insn "*pred_macc__scalar_undef" + [(set (match_operand:V_VLSI_QHS 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_QHS + (unspec: + [(match_operand: 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_QHS + (mult:V_VLSI_QHS + (vec_duplicate:V_VLSI_QHS + (match_operand: 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_QHS 4 "register_operand" " vr, vr")) + (match_operand:V_VLSI_QHS 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_QHS 2 "vector_undef_operand")))] + "TARGET_VECTOR" + "@ + vmacc.vx\t%0,%z3,%4%p1 + vmacc.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "")]) + +(define_insn "*pred_macc__scalar_undef" + [(set (match_operand:V_VLSI_D 0 "register_operand" "=vd, vr") + (if_then_else:V_VLSI_D + (unspec: + [(match_operand: 1 "vector_mask_operand" " vm, Wc1") + (match_operand 6 "vector_length_operand" "rvl, rvl") + (match_operand 7 "const_int_operand" " i, i") + (match_operand 8 "const_int_operand" " i, i") + (match_operand 9 "const_int_operand" " i, i") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (plus:V_VLSI_D + (mult:V_VLSI_D + (vec_duplicate:V_VLSI_D + (match_operand: 3 "reg_or_0_operand" " rJ, rJ")) + (match_operand:V_VLSI_D 4 "register_operand" " vr, vr")) + (match_operand:V_VLSI_D 5 "register_operand" " 0, 0")) + (match_operand:V_VLSI_D 2 "vector_undef_operand")))] + "TARGET_VECTOR && TARGET_64BIT" + "@ + vmacc.vx\t%0,%z3,%4%p1 + vmacc.vx\t%0,%z3,%4%p1" + [(set_attr "type" "vimuladd") + (set_attr "mode" "")]) + (include "autovec.md") (include "autovec-opt.md") (include "sifive-vector.md") diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md index 34b4a8f1f3fc..617914087815 100644 --- a/gcc/config/riscv/xiangshan.md +++ b/gcc/config/riscv/xiangshan.md @@ -144,13 +144,13 @@ (define_insn_reservation "xiangshan_sfdiv" 11 (and (eq_attr "tune" "xiangshan") (eq_attr "type" "fdiv") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "xs_fmisc_rs") (define_insn_reservation "xiangshan_sfsqrt" 17 (and (eq_attr "tune" "xiangshan") (eq_attr "type" "fsqrt") - (eq_attr "mode" "SF")) + (eq_attr "mode" "HF,SF")) "xs_fmisc_rs") (define_insn_reservation "xiangshan_dfdiv" 21 diff --git a/gcc/config/rl78/rl78.opt.urls b/gcc/config/rl78/rl78.opt.urls index 96eff5f72041..66e874be589b 100644 --- a/gcc/config/rl78/rl78.opt.urls +++ b/gcc/config/rl78/rl78.opt.urls @@ -4,7 +4,7 @@ msim UrlSuffix(gcc/RL78-Options.html#index-msim-6) mmul= -UrlSuffix(gcc/RL78-Options.html#index-mmul) +UrlSuffix(gcc/RL78-Options.html#index-mmul-1) mallregs UrlSuffix(gcc/RL78-Options.html#index-mallregs) diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 764b4992fb56..8dd23f8619cd 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -10322,7 +10322,7 @@ can_be_rotated_to_negative_lis (HOST_WIDE_INT c, int *rot) rotated over the highest bit. */ unsigned HOST_WIDE_INT uc = c; int pos_one = clz_hwi ((HOST_WIDE_INT) (uc << 16) >> 16); - if (pos_one != 0) + if (pos_one > 0 && pos_one < HOST_BITS_PER_WIDE_INT) { middle_zeros = ctz_hwi (c >> (HOST_BITS_PER_WIDE_INT - pos_one)); int middle_ones = clz_hwi (~(uc << pos_one)); @@ -10585,7 +10585,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c, int *num_insns) { /* li/lis; rldicX */ unsigned HOST_WIDE_INT imm = (c | ~mask); - if (shift != 0) + if (shift > 0 && shift < HOST_BITS_PER_WIDE_INT) imm = (imm >> shift) | (imm << (HOST_BITS_PER_WIDE_INT - shift)); count_or_emit_insn (temp, GEN_INT (imm)); diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index e31ee40aa870..04a6c0f7461d 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15665,10 +15665,10 @@ (if_then_else:SI (lt (match_dup 3) (const_int 0)) (const_int -1) - (if_then_else (gt (match_dup 3) - (const_int 0)) - (const_int 1) - (const_int 0))))] + (if_then_else:SI (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] "TARGET_P9_MISC" { operands[3] = gen_reg_rtx (CCmode); @@ -15703,10 +15703,10 @@ (if_then_else:SI (lt (match_operand:CC 1 "cc_reg_operand" "y") (const_int 0)) (const_int -1) - (if_then_else (gt (match_dup 1) - (const_int 0)) - (const_int 1) - (const_int 0))))] + (if_then_else:SI (gt (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] "TARGET_P9_MISC" "setb %0,%1" [(set_attr "type" "logical")]) @@ -15716,10 +15716,10 @@ (if_then_else:SI (ltu (match_operand:CCUNS 1 "cc_reg_operand" "y") (const_int 0)) (const_int -1) - (if_then_else (gtu (match_dup 1) - (const_int 0)) - (const_int 1) - (const_int 0))))] + (if_then_else:SI (gtu (match_dup 1) + (const_int 0)) + (const_int 1) + (const_int 0))))] "TARGET_P9_MISC" "setb %0,%1" [(set_attr "type" "logical")]) @@ -15751,10 +15751,10 @@ (if_then_else:SI (lt (match_dup 3) (const_int 0)) (const_int -1) - (if_then_else (gt (match_dup 3) - (const_int 0)) - (const_int 1) - (const_int 0))))] + (if_then_else:SI (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] "TARGET_P9_MISC" { operands[3] = gen_reg_rtx (CCmode); @@ -15807,10 +15807,10 @@ (if_then_else:SI (lt (match_dup 3) (const_int 0)) (const_int -1) - (if_then_else (gt (match_dup 3) - (const_int 0)) - (const_int 1) - (const_int 0))))] + (if_then_else:SI (gt (match_dup 3) + (const_int 0)) + (const_int 1) + (const_int 0))))] "TARGET_P9_MISC && TARGET_64BIT" { operands[3] = gen_reg_rtx (CCmode); diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc index 012b6dbb6e00..d044f9a010ca 100644 --- a/gcc/config/s390/s390.cc +++ b/gcc/config/s390/s390.cc @@ -9239,15 +9239,12 @@ print_operand (FILE *file, rtx x, int code) else if (code == 'h') fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000); + /* Support arbitrary _BitInt constants in asm statements. */ + else if (code == 0) + output_addr_const (file, x); else - { - if (code == 0) - output_operand_lossage ("invalid constant - try using " - "an output modifier"); - else - output_operand_lossage ("invalid constant for output modifier '%c'", - code); - } + output_operand_lossage ("invalid constant for output modifier '%c'", + code); break; case CONST_VECTOR: switch (code) @@ -18768,6 +18765,27 @@ s390_c_mode_for_floating_type (enum tree_index ti) return default_mode_for_floating_type (ti); } +/* Return true if _BitInt(N) is supported and fill its details into *INFO. */ + +bool +s390_bitint_type_info (int n, struct bitint_info *info) +{ + if (!TARGET_64BIT) + return false; + if (n <= 8) + info->limb_mode = QImode; + else if (n <= 16) + info->limb_mode = HImode; + else if (n <= 32) + info->limb_mode = SImode; + else + info->limb_mode = DImode; + info->abi_limb_mode = info->limb_mode; + info->big_endian = true; + info->extended = true; + return true; +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -19089,6 +19107,9 @@ s390_c_mode_for_floating_type (enum tree_index ti) #undef TARGET_DOCUMENTATION_NAME #define TARGET_DOCUMENTATION_NAME "S/390" +#undef TARGET_C_BITINT_TYPE_INFO +#define TARGET_C_BITINT_TYPE_INFO s390_bitint_type_info + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h" diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md index 8cc48b075be2..858387cd85c5 100644 --- a/gcc/config/s390/s390.md +++ b/gcc/config/s390/s390.md @@ -5248,18 +5248,19 @@ }) (define_insn "*zero_extendsidi2" - [(set (match_operand:DI 0 "register_operand" "=d,d,d") - (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b")))] + [(set (match_operand:DI 0 "register_operand" "=d,d,d,d") + (zero_extend:DI (match_operand:SI 1 "nonimmediate_operand" "d,T,b,v")))] "TARGET_ZARCH" "@ llgfr\t%0,%1 llgf\t%0,%1 - llgfrl\t%0,%1" - [(set_attr "op_type" "RRE,RXY,RIL") - (set_attr "type" "*,*,larl") - (set_attr "cpu_facility" "*,*,z10") - (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3") - (set_attr "relative_long" "*,*,yes")]) + llgfrl\t%0,%1 + vlgvf\t%0,%v1,0" + [(set_attr "op_type" "RRE,RXY,RIL,VRS") + (set_attr "type" "*,*,larl,*") + (set_attr "cpu_facility" "*,*,z10,vx") + (set_attr "z10prop" "z10_fwd_E1,z10_fwd_A3,z10_fwd_A3,*") + (set_attr "relative_long" "*,*,yes,*")]) ; ; LLGT-type instructions (zero-extend from 31 bit to 64 bit). @@ -5362,29 +5363,32 @@ ; llhrl, llghrl (define_insn "*zero_extendhi2_z10" - [(set (match_operand:GPR 0 "register_operand" "=d,d,d") - (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b")))] + [(set (match_operand:GPR 0 "register_operand" "=d,d,d,d") + (zero_extend:GPR (match_operand:HI 1 "nonimmediate_operand" "d,T,b,v")))] "TARGET_Z10" "@ llhr\t%0,%1 llh\t%0,%1 - llhrl\t%0,%1" - [(set_attr "op_type" "RXY,RRE,RIL") - (set_attr "type" "*,*,larl") - (set_attr "cpu_facility" "*,*,z10") - (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3") - (set_attr "relative_long" "*,*,yes")]) + llhrl\t%0,%1 + vlgvh\t%0,%v1,0" + [(set_attr "op_type" "RXY,RRE,RIL,VRS") + (set_attr "type" "*,*,larl,*") + (set_attr "cpu_facility" "*,*,z10,vx") + (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,z10_fwd_A3,*") + (set_attr "relative_long" "*,*,yes,*")]) ; llhr, llcr, llghr, llgcr, llh, llc, llgh, llgc (define_insn "*zero_extend2_extimm" - [(set (match_operand:GPR 0 "register_operand" "=d,d") - (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T")))] + [(set (match_operand:GPR 0 "register_operand" "=d,d,d") + (zero_extend:GPR (match_operand:HQI 1 "nonimmediate_operand" "d,T,v")))] "TARGET_EXTIMM" "@ llr\t%0,%1 - ll\t%0,%1" - [(set_attr "op_type" "RRE,RXY") - (set_attr "z10prop" "z10_super_E1,z10_fwd_A3")]) + ll\t%0,%1 + vlgv\t%0,%v1,0" + [(set_attr "op_type" "RRE,RXY,VRS") + (set_attr "cpu_facility" "*,*,vx") + (set_attr "z10prop" "z10_super_E1,z10_fwd_A3,*")]) ; llgh, llgc (define_insn "*zero_extend2" diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md index 12bbeb640723..745634edf57b 100644 --- a/gcc/config/s390/vector.md +++ b/gcc/config/s390/vector.md @@ -501,54 +501,6 @@ SIL,SIL,RI,RI,RRE,RRE,RIL,RR,RXY,RXY,RIL")]) -; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e., -; an implicit zero extend is done. - -(define_insn "*movdi_zero_extend_A" - [(set (match_operand:DI 0 "register_operand" "=d") - (zero_extend:DI (match_operand:SINT 1 "register_operand" "v")))] - "TARGET_VX" - "vlgv\t%0,%v1,0" - [(set_attr "op_type" "VRS")]) - -(define_insn "*movsi_zero_extend_A" - [(set (match_operand:SI 0 "register_operand" "=d") - (zero_extend:SI (match_operand:HQI 1 "register_operand" "v")))] - "TARGET_VX" - "vlgv\t%0,%v1,0" - [(set_attr "op_type" "VRS")]) - -(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI - V1HI V2HI V4HI V8HI - V1SI V2SI V4SI]) -(define_insn "*movdi_zero_extend_B" - [(set (match_operand:DI 0 "register_operand" "=d") - (zero_extend:DI (vec_select: - (match_operand:VLGV_DI 1 "register_operand" "v") - (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))] - "TARGET_VX" -{ - operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (mode) - 1)); - return "vlgv\t%0,%v1,%Y2"; -} - [(set_attr "op_type" "VRS") - (set_attr "mnemonic" "vlgv")]) - -(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI - V1HI V2HI V4HI V8HI]) -(define_insn "*movsi_zero_extend_B" - [(set (match_operand:SI 0 "register_operand" "=d") - (zero_extend:SI (vec_select: - (match_operand:VLGV_SI 1 "register_operand" "v") - (parallel [(match_operand:SI 2 "const_int_operand" "n")]))))] - "TARGET_VX" -{ - operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (mode) - 1)); - return "vlgv\t%0,%v1,%Y2"; -} - [(set_attr "op_type" "VRS") - (set_attr "mnemonic" "vlgv")]) - ; vec_load_lanes? ; vec_store_lanes? @@ -763,6 +715,42 @@ DONE; }) +; Instructions vlgvb, vlgvh, vlgvf zero all remaining bits of a GPR, i.e., +; an implicit zero extend is done. + +(define_mode_iterator VLGV_DI [V1QI V2QI V4QI V8QI V16QI + V1HI V2HI V4HI V8HI + V1SI V2SI V4SI]) +(define_insn "*vec_extract_zero_extend" + [(set (match_operand:DI 0 "register_operand" "=d") + (zero_extend:DI (vec_select: + (match_operand:VLGV_DI 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))] + "TARGET_VX" +{ + if (CONST_INT_P (operands[2])) + operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (mode) - 1)); + return "vlgv\t%0,%v1,%Y2"; +} + [(set_attr "op_type" "VRS") + (set_attr "mnemonic" "vlgv")]) + +(define_mode_iterator VLGV_SI [V1QI V2QI V4QI V8QI V16QI + V1HI V2HI V4HI V8HI]) +(define_insn "*vec_extract_zero_extend" + [(set (match_operand:SI 0 "register_operand" "=d") + (zero_extend:SI (vec_select: + (match_operand:VLGV_SI 1 "register_operand" "v") + (parallel [(match_operand:SI 2 "nonmemory_operand" "an")]))))] + "TARGET_VX" +{ + if (CONST_INT_P (operands[2])) + operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (mode) - 1)); + return "vlgv\t%0,%v1,%Y2"; +} + [(set_attr "op_type" "VRS") + (set_attr "mnemonic" "vlgv")]) + (define_insn "*vec_vllezlf" [(set (match_operand:V_HW_4 0 "register_operand" "=v") (vec_concat:V_HW_4 diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md index 77c95713085c..727ec1e2c202 100644 --- a/gcc/config/xtensa/constraints.md +++ b/gcc/config/xtensa/constraints.md @@ -130,7 +130,7 @@ (and (match_code "mem") (match_test "smalloffset_mem_p (op)"))) -(define_memory_constraint "T" +(define_special_memory_constraint "T" "Memory in a literal pool (addressable with an L32R instruction)." (and (match_code "mem") (match_test "!TARGET_CONST16 && constantpool_mem_p (op)"))) diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md index 629dfdde33cb..ab0403d9ea86 100644 --- a/gcc/config/xtensa/xtensa.md +++ b/gcc/config/xtensa/xtensa.md @@ -88,6 +88,7 @@ ;; This mode iterator allows the HI and QI patterns to be defined from ;; the same template. (define_mode_iterator HQI [HI QI]) +(define_mode_attr mode_bits [(HI "16") (QI "8")]) ;; This mode iterator allows the SI and HI patterns to be defined from ;; the same template. @@ -176,19 +177,18 @@ ;; Addition. (define_insn "addsi3" - [(set (match_operand:SI 0 "register_operand" "=D,D,a,a,a") - (plus:SI (match_operand:SI 1 "register_operand" "%d,d,r,r,r") - (match_operand:SI 2 "add_operand" "d,O,r,J,N")))] - "" - "@ - add.n\t%0, %1, %2 - addi.n\t%0, %1, %d2 - add\t%0, %1, %2 - addi\t%0, %1, %d2 - addmi\t%0, %1, %x2" - [(set_attr "type" "arith,arith,arith,arith,arith") - (set_attr "mode" "SI") - (set_attr "length" "2,2,3,3,3")]) + [(set (match_operand:SI 0 "register_operand") + (plus:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "add_operand")))] + "" + {@ [cons: =0, %1, 2; attrs: type, length] + [D, d, d; arith, 2] add.n\t%0, %1, %2 + [D, d, O; arith, 2] addi.n\t%0, %1, %d2 + [a, r, r; arith, 3] add\t%0, %1, %2 + [a, r, J; arith, 3] addi\t%0, %1, %d2 + [a, r, N; arith, 3] addmi\t%0, %1, %x2 + } + [(set_attr "mode" "SI")]) (define_insn "*addsubx" [(set (match_operand:SI 0 "register_operand" "=a") @@ -392,18 +392,15 @@ (set_attr "length" "3")]) (define_insn "mulhisi3" - [(set (match_operand:SI 0 "register_operand" "=C,A") - (mult:SI (any_extend:SI - (match_operand:HI 1 "register_operand" "%r,r")) - (any_extend:SI - (match_operand:HI 2 "register_operand" "r,r"))))] + [(set (match_operand:SI 0 "register_operand") + (mult:SI (any_extend:SI (match_operand:HI 1 "register_operand")) + (any_extend:SI (match_operand:HI 2 "register_operand"))))] "TARGET_MUL16 || TARGET_MAC16" - "@ - mul16\t%0, %1, %2 - mul.aa.ll\t%1, %2" - [(set_attr "type" "mul16,mac16") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) + {@ [cons: =0, %1, 2; attrs: type, length] + [C, r, r; mul16, 3] mul16\t%0, %1, %2 + [A, r, r; mac16, 3] mul.aa.ll\t%1, %2 + } + [(set_attr "mode" "SI")]) (define_insn "muladdhisi" [(set (match_operand:SI 0 "register_operand" "=A") @@ -742,16 +739,15 @@ ;; Logical instructions. (define_insn "andsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (and:SI (match_operand:SI 1 "register_operand" "%r,r") - (match_operand:SI 2 "mask_operand" "P,r")))] + [(set (match_operand:SI 0 "register_operand") + (and:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "mask_operand")))] "" - "@ - extui\t%0, %1, 0, %K2 - and\t%0, %1, %2" - [(set_attr "type" "arith,arith") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) + {@ [cons: =0, %1, 2; attrs: type, length] + [a, r, P; arith, 3] extui\t%0, %1, 0, %K2 + [a, r, r; arith, 3] and\t%0, %1, %2 + } + [(set_attr "mode" "SI")]) (define_insn_and_split "*andsi3_bitcmpl" [(set (match_operand:SI 0 "register_operand" "=a") @@ -944,27 +940,15 @@ ;; Zero-extend instructions. -(define_insn "zero_extendhisi2" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (zero_extend:SI (match_operand:HI 1 "nonimmed_operand" "r,U")))] - "" - "@ - extui\t%0, %1, 0, 16 - %v1l16ui\t%0, %1" - [(set_attr "type" "arith,load") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) - -(define_insn "zero_extendqisi2" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (zero_extend:SI (match_operand:QI 1 "nonimmed_operand" "r,U")))] +(define_insn "zero_extendsi2" + [(set (match_operand:SI 0 "register_operand") + (zero_extend:SI (match_operand:HQI 1 "nonimmed_operand")))] "" - "@ - extui\t%0, %1, 0, 8 - %v1l8ui\t%0, %1" - [(set_attr "type" "arith,load") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) + {@ [cons: =0, 1; attrs: type, length] + [a, r; arith, 3] extui\t%0, %1, 0, + [a, U; load , 3] %v1lui\t%0, %1 + } + [(set_attr "mode" "SI")]) ;; Sign-extend instructions. @@ -982,15 +966,14 @@ }) (define_insn "extendhisi2_internal" - [(set (match_operand:SI 0 "register_operand" "=B,a") - (sign_extend:SI (match_operand:HI 1 "sext_operand" "r,U")))] + [(set (match_operand:SI 0 "register_operand") + (sign_extend:SI (match_operand:HI 1 "sext_operand")))] "" - "@ - sext\t%0, %1, 15 - %v1l16si\t%0, %1" - [(set_attr "type" "arith,load") - (set_attr "mode" "SI") - (set_attr "length" "3,3")]) + {@ [cons: =0, 1; attrs: type, length] + [B, r; arith, 3] sext\t%0, %1, 15 + [a, U; load , 3] %v1l16si\t%0, %1 + } + [(set_attr "mode" "SI")]) (define_expand "extendqisi2" [(set (match_operand:SI 0 "register_operand" "") @@ -1327,29 +1310,28 @@ }) (define_insn "movsi_internal" - [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,a,W,a,a,U,*a,*A") - (match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,Y,i,T,U,r,*A,*r"))] + [(set (match_operand:SI 0 "nonimmed_operand") + (match_operand:SI 1 "move_operand"))] "xtensa_valid_move (SImode, operands)" - "@ - movi.n\t%0, %x1 - mov.n\t%0, %1 - mov.n\t%0, %1 - %v1l32i.n\t%0, %1 - %v0s32i.n\t%1, %0 - %v0s32i.n\t%1, %0 - mov\t%0, %1 - movsp\t%0, %1 - movi\t%0, %x1 - movi\t%0, %1 - const16\t%0, %t1\;const16\t%0, %b1 - %v1l32r\t%0, %1 - %v1l32i\t%0, %1 - %v0s32i\t%1, %0 - rsr\t%0, ACCLO - wsr\t%1, ACCLO" - [(set_attr "type" "move,move,move,load,store,store,move,move,move,load,move,load,load,store,rsr,wsr") - (set_attr "mode" "SI") - (set_attr "length" "2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")]) + {@ [cons: =0, 1; attrs: type, length] + [ D, M; move , 2] movi.n\t%0, %x1 + [ D, D; move , 2] mov.n\t%0, %1 + [ D, d; move , 2] ^ + [ D, R; load , 2] %v1l32i.n\t%0, %1 + [ R, D; store, 2] %v0s32i.n\t%1, %0 + [ R, d; store, 2] ^ + [ a, r; move , 3] mov\t%0, %1 + [ q, r; move , 3] movsp\t%0, %1 + [ a, I; move , 3] movi\t%0, %x1 + [ a, Y; load , 3] movi\t%0, %1 + [ W, i; move , 6] const16\t%0, %t1\;const16\t%0, %b1 + [ a, T; load , 3] %v1l32r\t%0, %1 + [ a, U; load , 3] %v1l32i\t%0, %1 + [ U, r; store, 3] %v0s32i\t%1, %0 + [*a, *A; rsr , 3] rsr\t%0, ACCLO + [*A, *r; wsr , 3] wsr\t%1, ACCLO + } + [(set_attr "mode" "SI")]) (define_split [(set (match_operand:SHI 0 "register_operand") @@ -1399,23 +1381,22 @@ }) (define_insn "movhi_internal" - [(set (match_operand:HI 0 "nonimmed_operand" "=D,D,a,a,a,a,a,U,*a,*A") - (match_operand:HI 1 "move_operand" "M,d,r,I,Y,T,U,r,*A,*r"))] + [(set (match_operand:HI 0 "nonimmed_operand") + (match_operand:HI 1 "move_operand"))] "xtensa_valid_move (HImode, operands)" - "@ - movi.n\t%0, %x1 - mov.n\t%0, %1 - mov\t%0, %1 - movi\t%0, %x1 - movi\t%0, %1 - %v1l32r\t%0, %1 - %v1l16ui\t%0, %1 - %v0s16i\t%1, %0 - rsr\t%0, ACCLO - wsr\t%1, ACCLO" - [(set_attr "type" "move,move,move,move,load,load,load,store,rsr,wsr") - (set_attr "mode" "HI") - (set_attr "length" "2,2,3,3,3,3,3,3,3,3")]) + {@ [cons: =0, 1; attrs: type, length] + [ D, M; move , 2] movi.n\t%0, %x1 + [ D, d; move , 2] mov.n\t%0, %1 + [ a, r; move , 3] mov\t%0, %1 + [ a, I; move , 3] movi\t%0, %x1 + [ a, Y; load , 3] movi\t%0, %1 + [ a, T; load , 3] %v1l32r\t%0, %1 + [ a, U; load , 3] %v1l16ui\t%0, %1 + [ U, r; store, 3] %v0s16i\t%1, %0 + [*a, *A; rsr , 3] rsr\t%0, ACCLO + [*A, *r; wsr , 3] wsr\t%1, ACCLO + } + [(set_attr "mode" "HI")]) ;; 8-bit Integer moves @@ -1429,21 +1410,20 @@ }) (define_insn "movqi_internal" - [(set (match_operand:QI 0 "nonimmed_operand" "=D,D,a,a,a,U,*a,*A") - (match_operand:QI 1 "move_operand" "M,d,r,I,U,r,*A,*r"))] + [(set (match_operand:QI 0 "nonimmed_operand") + (match_operand:QI 1 "move_operand"))] "xtensa_valid_move (QImode, operands)" - "@ - movi.n\t%0, %x1 - mov.n\t%0, %1 - mov\t%0, %1 - movi\t%0, %x1 - %v1l8ui\t%0, %1 - %v0s8i\t%1, %0 - rsr\t%0, ACCLO - wsr\t%1, ACCLO" - [(set_attr "type" "move,move,move,move,load,store,rsr,wsr") - (set_attr "mode" "QI") - (set_attr "length" "2,2,3,3,3,3,3,3")]) + {@ [cons: =0, 1; attrs: type, length] + [ D, M; move , 2] movi.n\t%0, %x1 + [ D, d; move , 2] mov.n\t%0, %1 + [ a, r; move , 3] mov\t%0, %1 + [ a, I; move , 3] movi\t%0, %x1 + [ a, U; load , 3] %v1l8ui\t%0, %1 + [ U, r; store, 3] %v0s8i\t%1, %0 + [*a, *A; rsr , 3] rsr\t%0, ACCLO + [*A, *r; wsr , 3] wsr\t%1, ACCLO + } + [(set_attr "mode" "QI")]) ;; Sub-word reloads from the constant pool. @@ -1501,30 +1481,29 @@ }) (define_insn "movsf_internal" - [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,a,D,R,a,f,a,a,W,a,U") - (match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))] + [(set (match_operand:SF 0 "nonimmed_operand") + (match_operand:SF 1 "move_operand"))] "((register_operand (operands[0], SFmode) || register_operand (operands[1], SFmode)) && !(FP_REG_P (xt_true_regnum (operands[0])) && (constantpool_mem_p (operands[1]) || CONSTANT_P (operands[1]))))" - "@ - mov.s\t%0, %1 - %v1lsi\t%0, %1 - %v0ssi\t%1, %0 - mov.n\t%0, %1 - %v1l32r\t%0, %1 - %v1l32i.n\t%0, %1 - %v0s32i.n\t%1, %0 - mov\t%0, %1 - wfr\t%0, %1 - rfr\t%0, %1 - movi\t%0, %y1 - const16\t%0, %t1\;const16\t%0, %b1 - %v1l32i\t%0, %1 - %v0s32i\t%1, %0" - [(set_attr "type" "farith,fload,fstore,move,load,load,store,move,farith,farith,load,move,load,store") - (set_attr "mode" "SF") - (set_attr "length" "3,3,3,2,3,2,2,3,3,3,3,6,3,3")]) + {@ [cons: =0, 1; attrs: type, length] + [f, f; farith, 3] mov.s\t%0, %1 + [f, ^U; fload , 3] %v1lsi\t%0, %1 + [U, f; fstore, 3] %v0ssi\t%1, %0 + [D, d; move , 2] mov.n\t%0, %1 + [a, T; load , 3] %v1l32r\t%0, %1 + [D, R; load , 2] %v1l32i.n\t%0, %1 + [R, d; store , 2] %v0s32i.n\t%1, %0 + [a, r; move , 3] mov\t%0, %1 + [f, r; farith, 3] wfr\t%0, %1 + [a, f; farith, 3] rfr\t%0, %1 + [a, Y; load , 3] movi\t%0, %y1 + [W, iF; move , 6] const16\t%0, %t1\;const16\t%0, %b1 + [a, U; load , 3] %v1l32i\t%0, %1 + [U, r; store , 3] %v0s32i\t%1, %0 + } + [(set_attr "mode" "SF")]) (define_insn "*lsiu" [(set (match_operand:SF 0 "register_operand" "=f") @@ -1692,16 +1671,15 @@ }) (define_insn "ashlsi3_internal" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashift:SI (match_operand:SI 1 "register_operand" "r,r") - (match_operand:SI 2 "arith_operand" "J,r")))] + [(set (match_operand:SI 0 "register_operand") + (ashift:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "arith_operand")))] "" - "@ - slli\t%0, %1, %R2 - ssl\t%2\;sll\t%0, %1" - [(set_attr "type" "arith,arith") - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) + {@ [cons: =0, 1, 2; attrs: type, length] + [a, r, J; arith, 3] slli\t%0, %1, %R2 + [a, r, r; arith, 6] ssl\t%2\;sll\t%0, %1 + } + [(set_attr "mode" "SI")]) (define_split [(set (match_operand:SI 0 "register_operand") @@ -1713,35 +1691,26 @@ (match_dup 1)))]) (define_insn "ashrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (ashiftrt:SI (match_operand:SI 1 "register_operand" "r,r") - (match_operand:SI 2 "arith_operand" "J,r")))] + [(set (match_operand:SI 0 "register_operand") + (ashiftrt:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "arith_operand")))] "" - "@ - srai\t%0, %1, %R2 - ssr\t%2\;sra\t%0, %1" - [(set_attr "type" "arith,arith") - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) + {@ [cons: =0, 1, 2; attrs: type, length] + [a, r, J; arith, 3] srai\t%0, %1, %R2 + [a, r, r; arith, 6] ssr\t%2\;sra\t%0, %1 + } + [(set_attr "mode" "SI")]) (define_insn "lshrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (lshiftrt:SI (match_operand:SI 1 "register_operand" "r,r") - (match_operand:SI 2 "arith_operand" "J,r")))] + [(set (match_operand:SI 0 "register_operand") + (lshiftrt:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "arith_operand")))] "" -{ - if (which_alternative == 0) - { - if ((INTVAL (operands[2]) & 0x1f) < 16) - return "srli\t%0, %1, %R2"; - else - return "extui\t%0, %1, %R2, %L2"; - } - return "ssr\t%2\;srl\t%0, %1"; -} - [(set_attr "type" "arith,arith") - (set_attr "mode" "SI") - (set_attr "length" "3,6")]) + {@ [cons: =0, 1, 2; attrs: type, length] + [a, r, J; arith, 3] << (INTVAL (operands[2]) & 0x1f) < 16 ? \"srli\t%0, %1, %R2\" : \"extui\t%0, %1, %R2, %L2\"; + [a, r, r; arith, 6] ssr\t%2\;srl\t%0, %1 + } + [(set_attr "mode" "SI")]) (define_insn "*shift_per_byte" [(set (match_operand:SI 0 "register_operand" "=a") @@ -1944,28 +1913,26 @@ (set_attr "length" "6")]) (define_insn "rotlsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (rotate:SI (match_operand:SI 1 "register_operand" "r,r") - (match_operand:SI 2 "arith_operand" "J,r")))] + [(set (match_operand:SI 0 "register_operand") + (rotate:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "arith_operand")))] "" - "@ - ssai\t%L2\;src\t%0, %1, %1 - ssl\t%2\;src\t%0, %1, %1" - [(set_attr "type" "multi,multi") - (set_attr "mode" "SI") - (set_attr "length" "6,6")]) + {@ [cons: =0, 1, 2; attrs: type, length] + [a, r, J; multi, 6] ssai\t%L2\;src\t%0, %1, %1 + [a, r, r; multi, 6] ssl\t%2\;src\t%0, %1, %1 + } + [(set_attr "mode" "SI")]) (define_insn "rotrsi3" - [(set (match_operand:SI 0 "register_operand" "=a,a") - (rotatert:SI (match_operand:SI 1 "register_operand" "r,r") - (match_operand:SI 2 "arith_operand" "J,r")))] + [(set (match_operand:SI 0 "register_operand") + (rotatert:SI (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "arith_operand")))] "" - "@ - ssai\t%R2\;src\t%0, %1, %1 - ssr\t%2\;src\t%0, %1, %1" - [(set_attr "type" "multi,multi") - (set_attr "mode" "SI") - (set_attr "length" "6,6")]) + {@ [cons: =0, 1, 2; attrs: type, length] + [a, r, J; multi, 6] ssai\t%R2\;src\t%0, %1, %1 + [a, r, r; multi, 6] ssr\t%2\;src\t%0, %1, %1 + } + [(set_attr "mode" "SI")]) ;; Comparisons. @@ -2024,26 +1991,23 @@ [(match_operand:SI 0 "register_operand" "r") (const_int -2147483648)]) (label_ref (match_operand 1 "")) - (pc)))] + (pc))) + (clobber (match_scratch:SI 3 "=a"))] "TARGET_ABS" "#" - "&& can_create_pseudo_p ()" + "&& 1" [(set (match_dup 3) (abs:SI (match_dup 0))) (set (pc) (if_then_else (match_op_dup 2 - [(zero_extract:SI (match_dup 3) - (const_int 1) - (match_dup 4)) + [(match_dup 3) (const_int 0)]) (label_ref (match_dup 1)) (pc)))] { - operands[3] = gen_reg_rtx (SImode); - operands[4] = GEN_INT (BITS_BIG_ENDIAN ? 0 : 31); - operands[2] = gen_rtx_fmt_ee (reverse_condition (GET_CODE (operands[2])), - VOIDmode, XEXP (operands[2], 0), - const0_rtx); + if (GET_CODE (operands[3]) == SCRATCH) + operands[3] = gen_reg_rtx (SImode); + PUT_CODE (operands[2], GET_CODE (operands[2]) == EQ ? LT : GE); } [(set_attr "type" "jump") (set_attr "mode" "none") @@ -2190,7 +2154,7 @@ (label_ref (match_dup 1)) (pc)))] { - operands[3] = GEN_INT ((1 << GET_MODE_BITSIZE (GET_MODE (operands[3]))) - 1); + operands[3] = GEN_INT (GET_MODE_MASK (GET_MODE (operands[3]))); }) (define_insn_and_split "*masktrue_const_pow2_minus_one" diff --git a/gcc/configure b/gcc/configure index bacdd29da69a..4a751d969bab 100755 --- a/gcc/configure +++ b/gcc/configure @@ -872,7 +872,6 @@ c_strict_warn strict_warn c_loose_warn loose_warn -aliasing_flags CPP EGREP GREP @@ -7126,45 +7125,6 @@ $as_echo "#define HAVE_SWAP_IN_UTILITY 1" >>confdefs.h fi -# Check whether compiler is affected by placement new aliasing bug (PR 29286). -# If the host compiler is affected by the bug, and we build with optimization -# enabled (which happens e.g. when cross-compiling), the pool allocator may -# get miscompiled. Use -fno-strict-aliasing to work around this problem. -# Since there is no reliable feature check for the presence of this bug, -# we simply use a GCC version number check. (This should never trigger for -# stages 2 or 3 of a native bootstrap.) -aliasing_flags= -if test "$GCC" = yes; then - saved_CXXFLAGS="$CXXFLAGS" - - # The following test compilation will succeed if and only if $CXX accepts - # -fno-strict-aliasing *and* is older than GCC 4.3. - CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CXX is affected by placement new aliasing bug" >&5 -$as_echo_n "checking whether $CXX is affected by placement new aliasing bug... " >&6; } - cat confdefs.h - <<_ACEOF >conftest.$ac_ext -/* end confdefs.h. */ - -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) -#error compiler not affected by placement new aliasing bug -#endif - -_ACEOF -if ac_fn_cxx_try_compile "$LINENO"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; }; aliasing_flags='-fno-strict-aliasing' -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - - CXXFLAGS="$saved_CXXFLAGS" -fi - - - - # --------------------- # Warnings and checking # --------------------- @@ -11308,6 +11268,7 @@ fi + use_additional=yes acl_save_prefix="$prefix" @@ -14236,6 +14197,7 @@ $as_echo "$LIBICONV" >&6; } + use_additional=yes acl_save_prefix="$prefix" @@ -21522,7 +21484,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 21525 "configure" +#line 21487 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -21628,7 +21590,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 21631 "configure" +#line 21593 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -32760,6 +32722,7 @@ if test x"$ld64_flag" = x"yes"; then gcc_cv_ld64_platform_version=0 gcc_cv_ld64_macos_version_min=0 gcc_cv_ld64_demangle=0 + gcc_cv_ld64_no_deduplicate=0 if test "$build" = "$host"; then darwin_try_test=1 @@ -32789,6 +32752,9 @@ $as_echo "$gcc_cv_ld64_major" >&6; } if test "$gcc_cv_ld64_major" -ge 236; then gcc_cv_ld64_export_dynamic=1 fi + if test "$gcc_cv_ld64_major" -ge 262; then + gcc_cv_ld64_no_deduplicate=1 + fi if test "$gcc_cv_ld64_major" -ge 512; then gcc_cv_ld64_platform_version=1 gcc_cv_ld64_macos_version_min=1 @@ -32823,6 +32789,15 @@ $as_echo_n "checking linker for -export_dynamic support... " >&6; } { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_export_dynamic" >&5 $as_echo "$gcc_cv_ld64_export_dynamic" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker for -no_deduplicate support" >&5 +$as_echo_n "checking linker for -no_deduplicate support... " >&6; } + gcc_cv_ld64_no_deduplicate=1 + if $gcc_cv_ld -no_deduplicate < /dev/null 2>&1 | grep 'unknown option' > /dev/null; then + gcc_cv_ld64_no_deduplicate=0 + fi + { $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_ld64_no_deduplicate" >&5 +$as_echo "$gcc_cv_ld64_no_deduplicate" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker for -platform_version support" >&5 $as_echo_n "checking linker for -platform_version support... " >&6; } gcc_cv_ld64_platform_version=1 @@ -32863,6 +32838,12 @@ _ACEOF +cat >>confdefs.h <<_ACEOF +#define LD64_HAS_NO_DEDUPLICATE $gcc_cv_ld64_no_deduplicate +_ACEOF + + + cat >>confdefs.h <<_ACEOF #define LD64_HAS_PLATFORM_VERSION $gcc_cv_ld64_platform_version _ACEOF diff --git a/gcc/configure.ac b/gcc/configure.ac index 2c43b38178c7..4532c5c22fe5 100644 --- a/gcc/configure.ac +++ b/gcc/configure.ac @@ -563,35 +563,6 @@ if test $ac_cv_std_swap_in_utility = yes; then [Define if defines std::swap.]) fi -# Check whether compiler is affected by placement new aliasing bug (PR 29286). -# If the host compiler is affected by the bug, and we build with optimization -# enabled (which happens e.g. when cross-compiling), the pool allocator may -# get miscompiled. Use -fno-strict-aliasing to work around this problem. -# Since there is no reliable feature check for the presence of this bug, -# we simply use a GCC version number check. (This should never trigger for -# stages 2 or 3 of a native bootstrap.) -aliasing_flags= -if test "$GCC" = yes; then - saved_CXXFLAGS="$CXXFLAGS" - - # The following test compilation will succeed if and only if $CXX accepts - # -fno-strict-aliasing *and* is older than GCC 4.3. - CXXFLAGS="$CXXFLAGS -fno-strict-aliasing" - AC_MSG_CHECKING([whether $CXX is affected by placement new aliasing bug]) - AC_COMPILE_IFELSE([AC_LANG_SOURCE([ -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3) -#error compiler not affected by placement new aliasing bug -#endif -])], - [AC_MSG_RESULT([yes]); aliasing_flags='-fno-strict-aliasing'], - [AC_MSG_RESULT([no])]) - - CXXFLAGS="$saved_CXXFLAGS" -fi -AC_SUBST(aliasing_flags) - - - # --------------------- # Warnings and checking # --------------------- @@ -6421,6 +6392,7 @@ if test x"$ld64_flag" = x"yes"; then gcc_cv_ld64_platform_version=0 gcc_cv_ld64_macos_version_min=0 gcc_cv_ld64_demangle=0 + gcc_cv_ld64_no_deduplicate=0 if test "$build" = "$host"; then darwin_try_test=1 @@ -6448,6 +6420,9 @@ if test x"$ld64_flag" = x"yes"; then if test "$gcc_cv_ld64_major" -ge 236; then gcc_cv_ld64_export_dynamic=1 fi + if test "$gcc_cv_ld64_major" -ge 262; then + gcc_cv_ld64_no_deduplicate=1 + fi if test "$gcc_cv_ld64_major" -ge 512; then gcc_cv_ld64_platform_version=1 gcc_cv_ld64_macos_version_min=1 @@ -6476,6 +6451,13 @@ if test x"$ld64_flag" = x"yes"; then fi AC_MSG_RESULT($gcc_cv_ld64_export_dynamic) + AC_MSG_CHECKING(linker for -no_deduplicate support) + gcc_cv_ld64_no_deduplicate=1 + if $gcc_cv_ld -no_deduplicate < /dev/null 2>&1 | grep 'unknown option' > /dev/null; then + gcc_cv_ld64_no_deduplicate=0 + fi + AC_MSG_RESULT($gcc_cv_ld64_no_deduplicate) + AC_MSG_CHECKING(linker for -platform_version support) gcc_cv_ld64_platform_version=1 if $gcc_cv_ld -platform_version macos 10.5 0.0 < /dev/null 2>&1 | grep 'unknown option' > /dev/null; then @@ -6502,6 +6484,9 @@ if test x"$ld64_flag" = x"yes"; then AC_DEFINE_UNQUOTED(LD64_HAS_EXPORT_DYNAMIC, $gcc_cv_ld64_export_dynamic, [Define to 1 if ld64 supports '-export_dynamic'.]) + AC_DEFINE_UNQUOTED(LD64_HAS_NO_DEDUPLICATE, $gcc_cv_ld64_no_deduplicate, + [Define to 1 if ld64 supports '-no_deduplicate'.]) + AC_DEFINE_UNQUOTED(LD64_HAS_PLATFORM_VERSION, $gcc_cv_ld64_platform_version, [Define to 1 if ld64 supports '-platform_version'.]) diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog index 3ab14f066476..056ee4047b52 100644 --- a/gcc/cp/ChangeLog +++ b/gcc/cp/ChangeLog @@ -1,3 +1,369 @@ +2025-08-26 Sandra Loosemore + + PR middle-end/118839 + * decl.cc (omp_declare_variant_finalize_one): Error if variant + is the same as base. + +2025-08-26 Sandra Loosemore + + * parser.cc (cp_finish_omp_declare_variant): Structure diagnostic + code similarly to C front end. Make check for a missing "match" + clause unconditional. + +2025-08-25 Jakub Jelinek + + * pt.cc (finish_expansion_stmt): Implement C++ CWG3048 + - Empty destructuring expansion statements. Don't error for + destructuring expansion stmts if sz is 0, don't call + fit_decomposition_lang_decl if n is 0 and pass NULL rather than + this_decomp to cp_finish_decl. + +2025-08-25 Jakub Jelinek + + PR c++/121601 + * constexpr.cc (cxx_bind_parameters_in_call): Move break + if *jump_target before the check for null this object pointer. + +2025-08-23 Eczbek + + PR c++/116928 + * parser.cc (cp_parser_braced_list): Set greater_than_is_operator_p. + +2025-08-23 Nathaniel Shead + + PR c++/120499 + * method.cc (synthesize_method): Set the instantiating module. + +2025-08-21 Jason Merrill + + PR c++/121068 + * constexpr.cc (cxx_eval_store_expression): Allow clobber of a const + object. + +2025-08-21 Jason Merrill + + PR c++/120757 + * pt.cc (tsubst_expr) [OFFSET_REF]: Don't tsubst the type. + +2025-08-20 Marek Polacek + + PR c++/121553 + * name-lookup.cc (check_local_shadow): Check !is_normal_capture_proxy. + +2025-08-19 Ben Wu + + PR c++/120618 + * parser.cc (cp_parser_compound_requirement): Set type to + NULL_TREE for invalid type-constraint. + +2025-08-19 Patrick Palka + + PR c++/121351 + * class.cc (add_method): Use outer_template_args when + substituting outer template arguments into constraints. + +2025-08-19 Nathaniel Shead + + PR c++/120195 + * name-lookup.cc (do_nonmember_using_decl): Also handle change + in exportedness of a function. + +2025-08-18 Indu Bhagat + + * typeck.cc (get_member_function_from_ptrfunc): Use + 'sanitize_code_type' instead of 'unsigned int'. + +2025-08-17 Nathaniel Shead + + PR c++/120503 + PR c++/120824 + * cp-tree.h (TYPE_UNNAMED_P): Adjust for enums with enumerators + for linkage purposes. + (enum_with_enumerator_for_linkage_p): Declare. + * decl.cc (name_unnamed_type): Adjust assertions to handle enums + with enumerators for linkage purposes. + (grokdeclarator): Use a typedef name for enums with enumerators + for linkage purposes. + (enum_with_enumerator_for_linkage_p): New function. + (finish_enum_value_list): Reset type linkage for enums with + enumerators for linkage purposes. + * mangle.cc (write_unnamed_enum_name): New function. + (write_unqualified_name): Handle enums with enumerators for + linkage purposes. + * tree.cc (decl_linkage): Fixup unnamed enums. + +2025-08-15 Jakub Jelinek + + * cp-trait.def (STRUCTURED_BINDING_SIZE): New unary trait. + * cp-tree.h (finish_structured_binding_size): Declare. + * semantics.cc (trait_expr_value): Handle + CPTK_STRUCTURED_BINDING_SIZE. + (finish_structured_binding_size): New function. + (finish_trait_expr): Handle CPTK_RANK and CPTK_TYPE_ORDER + in the switch instead of just doing break; for those and + ifs at the end to handle them. Handle CPTK_STRUCTURED_BINDING_SIZE. + * pt.cc (tsubst_expr): Likewise. + * constraint.cc (diagnose_trait_expr): Likewise. + * decl.cc (get_tuple_size): Use mce_true for maybe_const_value. + (cp_decomp_size): Diagnose incomplete types not just if + processing_template_decl, and use error_at instead of pedwarn. + If btype is NULL, just return 0 instead of diagnosing an error. + +2025-08-15 Jakub Jelinek + + PR c++/121552 + * decl.cc: Implement C++20 P1766R1 - Mitigating minor modules maladies. + (diagnose_non_c_class_typedef_for_linkage, + maybe_diagnose_non_c_class_typedef_for_linkage): New functions. + (name_unnamed_type): Call + maybe_diagnose_non_c_class_typedef_for_linkage. + +2025-08-15 Jakub Jelinek + + PR c++/121539 + * parser.cc (cp_parser_cache_defarg): Set done to true for + CPP_ELLIPSIS followed by CPP_CLOSE_PAREN in !nsdmi at depth 0. + +2025-08-15 Jakub Jelinek + + PR preprocessor/120778 + PR target/121520 + * lex.cc (cxx_init): Remove warn_on lambda. Use cpp_warn instead of + cpp_lookup and NODE_WARN bit setting or warn_on. + +2025-08-14 Jakub Jelinek + + PR c++/121524 + * tree.cc (build_cplus_array_type): Don't reuse variant type + if it has TREE_DEPRECATED or TREE_UNAVAILABLE flags set or, + unless elt_type has TYPE_USER_ALIGN set and TYPE_ALIGN is + TYPE_ALIGN of elt_type, TYPE_USER_ALIGN is not set. + +2025-08-13 Marek Polacek + + PR c++/102610 + * cp-tree.h (LAMBDA_EXPR_CONST_QUAL_P): Define. + (maybe_add_dummy_lambda_op): Declare. + (remove_dummy_lambda_op): Declare. + (push_capture_proxies): Adjust. + * lambda.cc (build_capture_proxy): No longer static. New early_p + parameter. Use it. + (add_capture): Adjust the call to build_capture_proxy. + (resolvable_dummy_lambda): Check DECL_LAMBDA_FUNCTION_P. + (push_capture_proxies): New. + (start_lambda_function): Use it. + * name-lookup.cc (check_local_shadow): Give an error for + is_capture_proxy. + (cp_binding_level_descriptor): Add lambda-scope. + (begin_scope) : New case. + * name-lookup.h (enum scope_kind): Add sk_lambda. + (struct cp_binding_level): Widen kind. + * parser.cc (cp_parser_lambda_expression): Create a new (lambda) scope + after the lambda-introducer. + (cp_parser_lambda_declarator_opt): Set LAMBDA_EXPR_CONST_QUAL_P. + Create a dummy operator() if needed. Inject the captures into the + lambda scope. Remove the dummy operator(). + (make_dummy_lambda_op): New. + (maybe_add_dummy_lambda_op): New. + (remove_dummy_lambda_op): New. + * pt.cc (tsubst_lambda_expr): Begin/end a lambda scope. Push the + capture proxies. Build/remove a dummy operator() if needed. Set + LAMBDA_EXPR_CONST_QUAL_P. + * semantics.cc (parsing_lambda_declarator): New. + (outer_var_p): Also consider captures as outer variables if in a lambda + declarator. + (process_outer_var_ref): Reset containing_function when + parsing_lambda_declarator. + (finish_decltype_type): Process decls in the lambda-declarator as well. + Look at LAMBDA_EXPR_CONST_QUAL_P unless we have an xobj function. + +2025-08-13 Jakub Jelinek + + PR c++/120776 + * cp-tree.def: Implement C++26 P1306R5 - Expansion statements. + (TEMPLATE_FOR_STMT): New tree code. + * cp-tree.h (struct saved_scope): Add expansion_stmt. + (in_expansion_stmt): Define. + (TEMPLATE_FOR_DECL, TEMPLATE_FOR_EXPR, TEMPLATE_FOR_BODY, + TEMPLATE_FOR_SCOPE, TEMPLATE_FOR_INIT_STMT): Define. + (struct tinst_level): Adjust comment. + (cp_decomp_size, finish_expansion_stmt, do_pushlevel, + cp_build_range_for_decls, build_range_temp, + cp_perform_range_for_lookup, begin_template_for_scope): Declare. + (finish_range_for_stmt): Remove declaration. + * cp-objcp-common.cc (cp_common_init_ts): Handle TEMPLATE_FOR_STMT. + * name-lookup.h (enum scope_kind): Add sk_template_for enumerator. + (struct cp_binding_level): Enlarge kind bitfield from 4 to 5 bits. + Adjust comment with remaining space bits. + * name-lookup.cc (check_local_shadow): Handle sk_template_for like + sk_for. + (cp_binding_level_descriptor): Add entry for sk_template_for. + (begin_scope): Handle sk_template_for. + * parser.h (IN_EXPANSION_STMT): Define. + * parser.cc (cp_debug_parser): Print IN_EXPANSION_STMT bit. + (cp_parser_lambda_expression): Temporarily clear in_expansion_stmt. + (cp_parser_statement): Handle RID_TEMPLATE followed by RID_FOR for + C++11. + (cp_parser_label_for_labeled_statement): Complain about named labels + inside of expansion stmt body. + (cp_hide_range_decl): New function. + (cp_parser_range_for): Use it. Adjust do_range_for_auto_deduction + caller. Remove second template argument from auto_vecs bindings and + names. + (build_range_temp): No longer static. + (do_range_for_auto_deduction): Add expansion_stmt argument. + (cp_build_range_for_decls): New function. + (cp_convert_range_for): Use it. Call cp_perform_range_for_lookup + rather than cp_parser_perform_range_for_lookup. + (cp_parser_perform_range_for_lookup): Rename to ... + (cp_perform_range_for_lookup): ... this. No longer static. Add + complain argument and handle it. + (cp_parser_range_for_member_function): Rename to ... + (cp_range_for_member_function): ... this. + (cp_parser_expansion_statement): New function. + (cp_parser_jump_statement): Handle IN_EXPANSION_STMT. + (cp_convert_omp_range_for): Adjust do_range_for_auto_deduction caller. + Call cp_perform_range_for_lookup rather than + cp_parser_perform_range_for_lookup. + * error.cc (print_instantiation_full_context): Handle tldcl being + TEMPLATE_FOR_STMT. + (print_instantiation_partial_context_line): Likewise. + * constexpr.cc (potential_constant_expression_1): Handle + TEMPLATE_FOR_STMT. + * decl.cc (poplevel_named_label_1): Use obl instead of bl->level_chain. + (finish_case_label): Diagnose case labels inside of template for. + (find_decomp_class_base): Add complain argument, don't diagnose + anything and just return error_mark_node if tf_none, adjust recursive + call. + (cp_decomp_size): New function. + (cp_finish_decomp): Adjust find_decomp_class_base caller. + * semantics.cc (do_pushlevel): No longer static. + (begin_template_for_scope): New function. + * pt.cc (push_tinst_level_loc): Handle TEMPLATE_FOR_STMT. + (reopen_tinst_level): Likewise. + (tsubst_stmt): Handle TEMPLATE_FOR_STMT. + (struct expansion_stmt_bc): New type. + (expansion_stmt_find_bc_r, finish_expansion_stmt): New functions. + * decl2.cc (decl_dependent_p): Return true for current function's decl + if in_expansion_stmt. + * call.cc (extend_ref_init_temps): Don't extend_all_temps if + TREE_STATIC (decl). + * cxx-pretty-print.cc (cxx_pretty_printer::statement): Handle + TEMPLATE_FOR_STMT. + +2025-08-13 Benjamin Wu + + * lex.cc (init_operators): Fix typo. + +2025-08-11 Nicolas Werner + + * mapper-client.cc (spawn_mapper_program): change argv parsing + +2025-08-11 Jakub Jelinek + + PR c++/117783 + * decl.cc (set_sb_pack_name): For name independent decls + just clear DECL_NAME instead of appending #i to it. + +2025-08-11 Jakub Jelinek + + PR c++/117783 + * decl.cc (cp_finish_decomp): Don't sorry on tuple static + structured bindings with a pack, instead temporarily reset + DECL_NAME of the individual vars in the pack to the name + of the pack for cp_finish_decl time and force mangling. + +2025-08-11 Jakub Jelinek + + PR c++/121442 + * parser.cc (cp_parser_decomposition_declaration): Don't copy + DECL_DECLARED_CONST{EXPR,INIT}_P bits from decl to decl2 if + decl is error_mark_node. + +2025-08-08 David Malcolm + + * error.cc (cp_adjust_diagnostic_info): Convert "context" arg from + ptr to const &. + +2025-08-07 Patrick Palka + + * call.cc (extract_call_expr): Remove handling of C++20 + rewritten comparison operators. + +2025-08-07 Jakub Jelinek + + PR c++/117783 + * parser.cc: Implement C++26 P1061R10 - Structured Bindings can + introduce a Pack. + (cp_parser_range_for): Also handle TREE_VEC as DECL_VALUE_EXPR + instead of ARRAY_REF. + (cp_parser_decomposition_declaration): Use sb-identifier-list instead + of identifier-list in comments. Parse structured bindings with + structured binding pack. Don't emit pedwarn about structured + binding attributes in structured bindings inside of a condition. + (cp_convert_omp_range_for): Also handle TREE_VEC as DECL_VALUE_EXPR + instead of ARRAY_REF. + * decl.cc (get_tuple_element_type): Change i argument type from + unsigned to unsigned HOST_WIDE_INT. + (get_tuple_decomp_init): Likewise. + (set_sb_pack_name): New function. + (cp_finish_decomp): Handle structured binding packs. + * pt.cc (tsubst_pack_expansion): Handle structured binding packs + and capture proxies for them. Formatting fixes. + (tsubst_decl): For structured binding packs don't tsubst TREE_TYPE + first, instead recreate the type after r is created. + (tsubst_omp_for_iterator): Also handle TREE_VEC as DECL_VALUE_EXPR + instead of ARRAY_REF. + (tsubst_expr): Handle sizeof... on non-dependent structure binding + packs. + (value_dependent_expression_p): Return false for sizeof... on + non-dependent structure binding packs. + (instantiation_dependent_r): Don't recurse on sizeof... on + non-dependent structure binding packs. + * constexpr.cc (potential_constant_expression_1): Also handle + TREE_VEC on DECL_VALUE_EXPR of structure binding packs. + +2025-08-07 Jakub Jelinek + + PR preprocessor/120778 + * lex.cc (cxx_init): Mark cpp nodes corresponding + to keywords, identifiers with special meaning and standard + attribute identifiers as NODE_WARN if warn_keyword_macro. + +2025-08-06 Patrick Palka + + PR c++/121231 + PR c++/119688 + PR c++/94511 + * mangle.cc (write_expression): Write out implicit non-trailing + zeroes of a CONSTRUCTOR when the ABI version is at least 21. + +2025-08-06 Jason Merrill + + * constexpr.cc (cxx_eval_indirect_ref): Improve diagnostic. + +2025-08-06 Kwok Cheung Yeung + + * parser.cc (cp_parser_omp_clause_from_to): Parse 'iterator' modifier. + * semantics.cc (finish_omp_clauses): Finish iterators for to/from + clauses. + +2025-08-06 Kwok Cheung Yeung + Andrew Stubbs + + * parser.cc (cp_parser_omp_clause_map): Parse 'iterator' modifier. + * semantics.cc (finish_omp_clauses): Finish iterators. Apply + iterators to generated clauses. + +2025-08-05 Jason Merrill + + PR c++/121068 + * constexpr.cc (cxx_eval_store_expression): Handle clobbers. + (potential_constant_expression_1): Handle clobbers more. + * decl.cc (build_clobber_this): Use INIT_EXPR for initial clobber. + * init.cc (build_new_1): Clobber on placement new. + (build_vec_init): Don't clean up after clobber. + 2025-08-04 Patrick Palka PR c++/121351 diff --git a/gcc/cp/call.cc b/gcc/cp/call.cc index 9283d974966c..02cef63a4538 100644 --- a/gcc/cp/call.cc +++ b/gcc/cp/call.cc @@ -7904,28 +7904,6 @@ extract_call_expr (tree call) call = TREE_OPERAND (call, 0); if (TREE_CODE (call) == TARGET_EXPR) call = TARGET_EXPR_INITIAL (call); - if (cxx_dialect >= cxx20) - switch (TREE_CODE (call)) - { - /* C++20 rewritten comparison operators. */ - case TRUTH_NOT_EXPR: - call = TREE_OPERAND (call, 0); - break; - case LT_EXPR: - case LE_EXPR: - case GT_EXPR: - case GE_EXPR: - case SPACESHIP_EXPR: - { - tree op0 = TREE_OPERAND (call, 0); - if (integer_zerop (op0)) - call = TREE_OPERAND (call, 1); - else - call = op0; - } - break; - default:; - } if (TREE_CODE (call) != CALL_EXPR && TREE_CODE (call) != AGGR_INIT_EXPR @@ -15054,7 +15032,10 @@ extend_ref_init_temps (tree decl, tree init, vec **cleanups, /* P2718R0 - in C++23 for-range-initializer, extend all temps. */ if (DECL_NAME (decl) == for_range__identifier - && flag_range_for_ext_temps) + && flag_range_for_ext_temps + /* Iterating expansion statement decl is static right now, but that + could change depending on CWG3044 and CWG3043. */ + && !TREE_STATIC (decl)) { gcc_checking_assert (!cond_guard); return extend_all_temps (decl, init, cleanups); diff --git a/gcc/cp/class.cc b/gcc/cp/class.cc index 14acb9c23c01..cf58f652fc1d 100644 --- a/gcc/cp/class.cc +++ b/gcc/cp/class.cc @@ -1365,14 +1365,14 @@ add_method (tree type, tree method, bool via_using) { if (TREE_CODE (fn) == TEMPLATE_DECL) ++processing_template_decl; - if (tree ti = CLASSTYPE_TEMPLATE_INFO (DECL_CONTEXT (fn))) + if (tree outer_args = outer_template_args (fn)) fn_constraints = tsubst_constraint_info (fn_constraints, - TI_ARGS (ti), + outer_args, tf_warning_or_error, fn); - if (tree ti = CLASSTYPE_TEMPLATE_INFO (DECL_CONTEXT (method))) + if (tree outer_args = outer_template_args (method)) method_constraints = tsubst_constraint_info (method_constraints, - TI_ARGS (ti), + outer_args, tf_warning_or_error, method); if (TREE_CODE (fn) == TEMPLATE_DECL) diff --git a/gcc/cp/constexpr.cc b/gcc/cp/constexpr.cc index 142579a91029..701420ca8ec0 100644 --- a/gcc/cp/constexpr.cc +++ b/gcc/cp/constexpr.cc @@ -2694,6 +2694,8 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, tree t, tree fun, arg = cxx_eval_constant_expression (ctx, x, vc_prvalue, non_constant_p, overflow_p, jump_target); + if (*jump_target) + break; /* Check we aren't dereferencing a null pointer when calling a non-static member function, which is undefined behaviour. */ if (i == 0 && DECL_OBJECT_MEMBER_FUNCTION_P (fun) @@ -2711,8 +2713,6 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, tree t, tree fun, /* Don't VERIFY_CONSTANT here. */ if (*non_constant_p && ctx->quiet) break; - if (*jump_target) - break; /* Just discard ellipsis args after checking their constantitude. */ if (!parms) continue; @@ -7179,10 +7179,23 @@ cxx_eval_indirect_ref (const constexpr_ctx *ctx, tree t, (TREE_TYPE (TREE_TYPE (sub)), TREE_TYPE (t))); /* DR 1188 says we don't have to deal with this. */ if (!ctx->quiet) - error_at (cp_expr_loc_or_input_loc (t), - "accessing value of %qE through a %qT glvalue in a " - "constant expression", build_fold_indirect_ref (sub), - TREE_TYPE (t)); + { + auto_diagnostic_group d; + error_at (cp_expr_loc_or_input_loc (t), + "accessing value of %qT object through a %qT " + "glvalue in a constant expression", + TREE_TYPE (TREE_TYPE (sub)), TREE_TYPE (t)); + tree ob = build_fold_indirect_ref (sub); + if (DECL_P (ob)) + { + if (DECL_ARTIFICIAL (ob)) + inform (DECL_SOURCE_LOCATION (ob), + "%qT object created here", TREE_TYPE (ob)); + else + inform (DECL_SOURCE_LOCATION (ob), + "%q#D declared here", ob); + } + } *non_constant_p = true; return t; } @@ -7452,12 +7465,6 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, tree init = TREE_OPERAND (t, 1); - if (TREE_CLOBBER_P (init) - && CLOBBER_KIND (init) < CLOBBER_OBJECT_END) - /* Only handle clobbers ending the lifetime of objects. - ??? We should probably set CONSTRUCTOR_NO_CLEARING. */ - return void_node; - /* First we figure out where we're storing to. */ tree target = TREE_OPERAND (t, 0); @@ -7644,11 +7651,17 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, } /* Handle explicit end-of-lifetime. */ - if (TREE_CLOBBER_P (init)) + if (TREE_CLOBBER_P (init) + && CLOBBER_KIND (init) >= CLOBBER_OBJECT_END) { if (refs->is_empty ()) - ctx->global->destroy_value (object); - return void_node; + { + ctx->global->destroy_value (object); + return void_node; + } + + /* Ending the lifetime of a const object is OK. */ + const_object_being_modified = NULL_TREE; } type = TREE_TYPE (object); @@ -7785,6 +7798,8 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, *non_constant_p = true; } else if (!is_access_expr + || (TREE_CLOBBER_P (init) + && CLOBBER_KIND (init) >= CLOBBER_OBJECT_END) || (TREE_CODE (t) == MODIFY_EXPR && CLASS_TYPE_P (inner) && !type_has_non_deleted_trivial_default_ctor (inner))) @@ -7848,11 +7863,17 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, type = reftype; } + /* Change an "as-base" clobber to the real type; + we don't need to worry about padding in constexpr. */ + tree itype = initialized_type (init); + if (IS_FAKE_BASE_TYPE (itype)) + itype = TYPE_CONTEXT (itype); + /* For initialization of an empty base, the original target will be *(base*)this, evaluation of which resolves to the object argument, which has the derived type rather than the base type. */ if (!empty_base && !(same_type_ignoring_top_level_qualifiers_p - (initialized_type (init), type))) + (itype, type))) { gcc_assert (is_empty_class (TREE_TYPE (target))); empty_base = true; @@ -7959,8 +7980,10 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, /* Don't share a CONSTRUCTOR that might be changed later. */ init = unshare_constructor (init); - gcc_checking_assert (!*valp || (same_type_ignoring_top_level_qualifiers_p - (TREE_TYPE (*valp), type))); + gcc_checking_assert (!*valp + || *valp == void_node + || (same_type_ignoring_top_level_qualifiers_p + (TREE_TYPE (*valp), type))); if (empty_base) { /* Just evaluate the initializer and return, since there's no actual data @@ -7973,6 +7996,22 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, CONSTRUCTOR_ZERO_PADDING_BITS (*valp) = zero_padding_bits; } } + else if (TREE_CLOBBER_P (init)) + { + if (AGGREGATE_TYPE_P (type)) + { + if (*valp) + CONSTRUCTOR_ELTS (*valp) = nullptr; + else + *valp = build_constructor (type, nullptr); + TREE_CONSTANT (*valp) = true; + TREE_SIDE_EFFECTS (*valp) = false; + CONSTRUCTOR_NO_CLEARING (*valp) = true; + CONSTRUCTOR_ZERO_PADDING_BITS (*valp) = zero_padding_bits; + } + else + *valp = void_node; + } else if (*valp && TREE_CODE (*valp) == CONSTRUCTOR && TREE_CODE (init) == CONSTRUCTOR) { @@ -7997,6 +8036,9 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, && TREE_CODE (*valp) == CONSTRUCTOR && TYPE_READONLY (type)) { + tree target_type = TREE_TYPE (target); + if (IS_FAKE_BASE_TYPE (target_type)) + target_type = TYPE_CONTEXT (target_type); if (INDIRECT_REF_P (target) && (is_this_parameter (tree_strip_nop_conversions (TREE_OPERAND (target, 0))))) @@ -8004,7 +8046,7 @@ cxx_eval_store_expression (const constexpr_ctx *ctx, tree t, constructor of a delegating constructor). Leave it up to the caller that set 'this' to set TREE_READONLY appropriately. */ gcc_checking_assert (same_type_ignoring_top_level_qualifiers_p - (TREE_TYPE (target), type) || empty_base); + (target_type, type) || empty_base); else TREE_READONLY (*valp) = true; } @@ -11308,6 +11350,13 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict, bool now, && !FUNC_OR_METHOD_TYPE_P (TREE_TYPE (t)) && !NULLPTR_TYPE_P (TREE_TYPE (t))) { + if (TREE_CLOBBER_P (t)) + { + /* We should have caught any clobbers in INIT/MODIFY_EXPR. */ + gcc_checking_assert (false); + return true; + } + if (flags & tf_error) constexpr_error (loc, fundef_p, "lvalue-to-rvalue conversion of " "a volatile lvalue %qE with type %qT", t, @@ -11569,12 +11618,14 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict, bool now, } return false; } + tree ve = DECL_VALUE_EXPR (t); /* Treat __PRETTY_FUNCTION__ inside a template function as potentially-constant. */ - else if (DECL_PRETTY_FUNCTION_P (t) - && DECL_VALUE_EXPR (t) == error_mark_node) + if (DECL_PRETTY_FUNCTION_P (t) && ve == error_mark_node) return true; - return RECUR (DECL_VALUE_EXPR (t), rval); + if (DECL_DECOMPOSITION_P (t) && TREE_CODE (ve) == TREE_VEC) + return RECUR (TREE_VEC_ELT (ve, 0), rval); + return RECUR (ve, rval); } if (want_rval && (now || !var_in_maybe_constexpr_fn (t)) @@ -12131,6 +12182,8 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict, bool now, } /* FALLTHRU */ case INIT_EXPR: + if (TREE_CLOBBER_P (TREE_OPERAND (t, 1))) + return true; return RECUR (TREE_OPERAND (t, 1), rval); case CONSTRUCTOR: @@ -12425,6 +12478,7 @@ potential_constant_expression_1 (tree t, bool want_rval, bool strict, bool now, case CO_AWAIT_EXPR: case CO_YIELD_EXPR: case CO_RETURN_EXPR: + case TEMPLATE_FOR_STMT: if (flags & tf_error) constexpr_error (cp_expr_loc_or_loc (t, input_location), fundef_p, "%qE is not a constant expression", t); diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc index cbdfafc90c08..4b20b791e5d1 100644 --- a/gcc/cp/constraint.cc +++ b/gcc/cp/constraint.cc @@ -3304,6 +3304,9 @@ diagnose_trait_expr (location_t loc, tree expr, tree args) case CPTK_TYPE_ORDER: inform (loc, "%qT and %qT cannot be ordered", t1, t2); break; + case CPTK_STRUCTURED_BINDING_SIZE: + inform (loc, "%qT is not destructurable", t1); + break; case CPTK_REF_CONSTRUCTS_FROM_TEMPORARY: inform (loc, "%qT is not a reference that binds to a temporary " "object of type %qT (direct-initialization)", t1, t2); diff --git a/gcc/cp/cp-objcp-common.cc b/gcc/cp/cp-objcp-common.cc index 7665b9454dc5..ee1c0ba3de3e 100644 --- a/gcc/cp/cp-objcp-common.cc +++ b/gcc/cp/cp-objcp-common.cc @@ -659,6 +659,7 @@ cp_common_init_ts (void) MARK_TS_EXP (IF_STMT); MARK_TS_EXP (OMP_DEPOBJ); MARK_TS_EXP (RANGE_FOR_STMT); + MARK_TS_EXP (TEMPLATE_FOR_STMT); MARK_TS_EXP (TRY_BLOCK); MARK_TS_EXP (USING_STMT); diff --git a/gcc/cp/cp-trait.def b/gcc/cp/cp-trait.def index 9fedfd71a38f..5e4493a84a0f 100644 --- a/gcc/cp/cp-trait.def +++ b/gcc/cp/cp-trait.def @@ -117,6 +117,7 @@ DEFTRAIT_TYPE (REMOVE_CVREF, "__remove_cvref", 1) DEFTRAIT_TYPE (REMOVE_EXTENT, "__remove_extent", 1) DEFTRAIT_TYPE (REMOVE_POINTER, "__remove_pointer", 1) DEFTRAIT_TYPE (REMOVE_REFERENCE, "__remove_reference", 1) +DEFTRAIT_EXPR (STRUCTURED_BINDING_SIZE, "__builtin_structured_binding_size", 1) DEFTRAIT_EXPR (TYPE_ORDER, "__builtin_type_order", 2) DEFTRAIT_TYPE (TYPE_PACK_ELEMENT, "__type_pack_element", -1) DEFTRAIT_TYPE (UNDERLYING_TYPE, "__underlying_type", 1) diff --git a/gcc/cp/cp-tree.def b/gcc/cp/cp-tree.def index bb5aaf983fee..b1e369738fa8 100644 --- a/gcc/cp/cp-tree.def +++ b/gcc/cp/cp-tree.def @@ -299,6 +299,11 @@ DEFTREECODE (IF_STMT, "if_stmt", tcc_statement, 4) templates. */ DEFTREECODE (RANGE_FOR_STMT, "range_for_stmt", tcc_statement, 6) +/* Used to represent an expansion-statement. The operands are + TEMPLATE_FOR_DECL, TEMPLATE_FOR_EXPR, TEMPLATE_FOR_BODY, + TEMPLATE_FOR_SCOPE, and TEMPLATE_FOR_INIT_STMT, respectively. */ +DEFTREECODE (TEMPLATE_FOR_STMT, "template_for_stmt", tcc_statement, 5) + /* Used to represent an expression statement. Use `EXPR_STMT_EXPR' to obtain the expression. */ DEFTREECODE (EXPR_STMT, "expr_stmt", tcc_expression, 1) diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h index fb8e0d8d98e3..55e8e0736272 100644 --- a/gcc/cp/cp-tree.h +++ b/gcc/cp/cp-tree.h @@ -476,6 +476,7 @@ extern GTY(()) tree cp_global_trees[CPTI_MAX]; ATOMIC_CONSTR_EXPR_FROM_CONCEPT_P (in ATOMIC_CONSTR) STATIC_INIT_DECOMP_BASE_P (in the TREE_LIST for {static,tls}_aggregates) MUST_NOT_THROW_THROW_P (in MUST_NOT_THROW_EXPR) + LAMBDA_EXPR_CONST_QUAL_P (in LAMBDA_EXPR) 2: IDENTIFIER_KIND_BIT_2 (in IDENTIFIER_NODE) ICS_THIS_FLAG (in _CONV) DECL_INITIALIZED_BY_CONSTANT_EXPRESSION_P (in VAR_DECL) @@ -1557,6 +1558,13 @@ enum cp_lambda_default_capture_mode_type { #define LAMBDA_EXPR_CONSTEVAL_BLOCK_P(NODE) \ TREE_LANG_FLAG_0 (LAMBDA_EXPR_CHECK (NODE)) +/* True if we should add "const" when figuring out the type of an entity + in a lambda. This is false in the parameter-declaration-clause of + a lambda; after that, it will remain false if the mutable keyword is + present. */ +#define LAMBDA_EXPR_CONST_QUAL_P(NODE) \ + TREE_LANG_FLAG_1 (LAMBDA_EXPR_CHECK (NODE)) + /* True iff uses of a const variable capture were optimized away. */ #define LAMBDA_EXPR_CAPTURE_OPTIMIZED(NODE) \ TREE_LANG_FLAG_2 (LAMBDA_EXPR_CHECK (NODE)) @@ -1973,6 +1981,8 @@ struct GTY(()) saved_scope { of consteval if statement. Also set while processing an immediate invocation. */ BOOL_BITFIELD consteval_if_p : 1; + /* Nonzero if we are parsing the substatement of expansion-statement. */ + BOOL_BITFIELD expansion_stmt : 1; int unevaluated_operand; int inhibit_evaluation_warnings; @@ -2046,6 +2056,7 @@ extern GTY(()) struct saved_scope *scope_chain; #define in_discarded_stmt scope_chain->discarded_stmt #define in_consteval_if_p scope_chain->consteval_if_p +#define in_expansion_stmt scope_chain->expansion_stmt #define current_ref_temp_count scope_chain->ref_temp_count @@ -2330,7 +2341,8 @@ enum languages { lang_c, lang_cplusplus }; /* Nonzero if NODE, a TYPE, has no name for linkage purposes. */ #define TYPE_UNNAMED_P(NODE) \ (TYPE_ANON_P (NODE) \ - && !IDENTIFIER_LAMBDA_P (TYPE_LINKAGE_IDENTIFIER (NODE))) + && !IDENTIFIER_LAMBDA_P (TYPE_LINKAGE_IDENTIFIER (NODE)) \ + && !enum_with_enumerator_for_linkage_p (NODE)) /* The _DECL for this _TYPE. */ #define TYPE_MAIN_DECL(NODE) (TYPE_STUB_DECL (TYPE_MAIN_VARIANT (NODE))) @@ -5691,6 +5703,19 @@ decl_template_parm_check (const_tree t, const char *f, int l, const char *fn) #define RANGE_FOR_IVDEP(NODE) TREE_LANG_FLAG_6 (RANGE_FOR_STMT_CHECK (NODE)) #define RANGE_FOR_NOVECTOR(NODE) TREE_LANG_FLAG_5 (RANGE_FOR_STMT_CHECK (NODE)) +/* TEMPLATE_FOR_STMT accessors. These give access to the declarator, + expression, body, and scope of the statement, respectively. */ +#define TEMPLATE_FOR_DECL(NODE) \ + TREE_OPERAND (TEMPLATE_FOR_STMT_CHECK (NODE), 0) +#define TEMPLATE_FOR_EXPR(NODE) \ + TREE_OPERAND (TEMPLATE_FOR_STMT_CHECK (NODE), 1) +#define TEMPLATE_FOR_BODY(NODE) \ + TREE_OPERAND (TEMPLATE_FOR_STMT_CHECK (NODE), 2) +#define TEMPLATE_FOR_SCOPE(NODE) \ + TREE_OPERAND (TEMPLATE_FOR_STMT_CHECK (NODE), 3) +#define TEMPLATE_FOR_INIT_STMT(NODE) \ + TREE_OPERAND (TEMPLATE_FOR_STMT_CHECK (NODE), 4) + /* STMT_EXPR accessor. */ #define STMT_EXPR_STMT(NODE) TREE_OPERAND (STMT_EXPR_CHECK (NODE), 0) @@ -6802,9 +6827,11 @@ struct GTY((chain_next ("%h.next"))) tinst_level { /* The original node. TLDCL can be a DECL (for a function or static data member), a TYPE (for a class), depending on what we were - asked to instantiate, or a TREE_LIST with the template as PURPOSE - and the template args as VALUE, if we are substituting for - overload resolution. In all these cases, TARGS is NULL. + asked to instantiate, a TEMPLATE_FOR_STMT (for instantiation + of expansion stmt body outside of templates) or a TREE_LIST with + the template as PURPOSE and the template args as VALUE, if we are + substituting for overload resolution. In all these cases, TARGS + is NULL. However, to avoid creating TREE_LIST objects for substitutions if we can help, we store PURPOSE and VALUE in TLDCL and TARGS, respectively. So TLDCL stands for TREE_LIST or DECL (the @@ -7278,6 +7305,7 @@ extern void omp_declare_variant_finalize (tree, tree); struct cp_decomp { tree decl; unsigned int count; }; extern void cp_finish_decl (tree, tree, bool, tree, int, cp_decomp * = nullptr); extern tree lookup_decomp_type (tree); +HOST_WIDE_INT cp_decomp_size (location_t, tree, tsubst_flags_t); extern bool cp_finish_decomp (tree, cp_decomp *, bool = false); extern int cp_complete_array_type (tree *, tree, bool); extern int cp_complete_array_type_or_error (tree *, tree, bool, tsubst_flags_t); @@ -7298,6 +7326,7 @@ extern tree xref_tag (tag_types, tree, bool tpl_header_p = false); extern void xref_basetypes (tree, tree); extern tree start_enum (tree, tree, tree, tree, bool, bool *); +extern bool enum_with_enumerator_for_linkage_p (tree); extern void finish_enum_value_list (tree); extern void finish_enum (tree); extern tree build_enumerator (tree, tree, tree, tree, location_t); @@ -7760,8 +7789,12 @@ extern tree clone_attrs (tree); extern bool maybe_clone_body (tree); /* In parser.cc */ +extern tree cp_build_range_for_decls (location_t, tree, tree *, bool); extern tree cp_convert_range_for (tree, tree, tree, cp_decomp *, bool, tree, bool); +extern tree build_range_temp (tree); +extern tree cp_perform_range_for_lookup (tree, tree *, tree *, + tsubst_flags_t = tf_warning_or_error); extern void cp_convert_omp_range_for (tree &, tree &, tree &, tree &, tree &, tree &, tree &, tree &, bool); @@ -7774,6 +7807,8 @@ extern location_t defparse_location (tree); extern void maybe_show_extern_c_location (void); extern bool literal_integer_zerop (const_tree); extern tree attr_chainon (tree, tree); +extern tree maybe_add_dummy_lambda_op (tree); +extern void remove_dummy_lambda_op (tree, tree); /* in pt.cc */ extern tree canonical_type_parameter (tree); @@ -7978,6 +8013,7 @@ extern tree add_to_template_args (tree, tree); extern tree add_outermost_template_args (tree, tree); extern tree add_extra_args (tree, tree, tsubst_flags_t, tree); extern tree build_extra_args (tree, tree, tsubst_flags_t); +extern void finish_expansion_stmt (tree, tree, tsubst_flags_t, tree); /* in rtti.cc */ /* A vector of all tinfo decls that haven't been emitted yet. */ @@ -8078,6 +8114,7 @@ class deferring_access_check_sentinel extern int stmts_are_full_exprs_p (void); extern void init_cp_semantics (void); extern tree do_poplevel (tree); +extern tree do_pushlevel (scope_kind); extern void break_maybe_infinite_loop (void); extern void add_decl_expr (tree); extern tree maybe_cleanup_point_expr_void (tree); @@ -8104,7 +8141,7 @@ extern void find_range_for_decls (tree[3]); extern void finish_for_stmt (tree); extern tree begin_range_for_stmt (tree, tree); extern void finish_range_for_decl (tree, tree, tree); -extern void finish_range_for_stmt (tree); +extern tree begin_template_for_scope (tree *); extern tree finish_break_stmt (void); extern tree finish_continue_stmt (void); extern tree begin_switch_stmt (void); @@ -8257,6 +8294,7 @@ extern void finish_static_assert (tree, tree, location_t, extern tree finish_decltype_type (tree, bool, tsubst_flags_t); extern tree fold_builtin_is_corresponding_member (location_t, int, tree *); extern tree fold_builtin_is_pointer_inverconvertible_with_class (location_t, int, tree *); +extern tree finish_structured_binding_size (location_t, tree, tsubst_flags_t); extern tree finish_trait_expr (location_t, enum cp_trait_kind, tree, tree); extern tree finish_trait_type (enum cp_trait_kind, tree, tree, tsubst_flags_t); extern tree build_lambda_expr (void); @@ -8301,6 +8339,7 @@ extern void record_lambda_scope (tree lambda); extern void record_lambda_scope_discriminator (tree lambda); extern void record_lambda_scope_sig_discriminator (tree lambda, tree fn); extern tree start_lambda_function (tree fn, tree lambda_expr); +extern void push_capture_proxies (tree, bool = false); extern void finish_lambda_function (tree body); extern bool regenerated_lambda_fn_p (tree); extern tree lambda_regenerating_args (tree); diff --git a/gcc/cp/cxx-pretty-print.cc b/gcc/cp/cxx-pretty-print.cc index 5f24015d7de3..4916bf66ee24 100644 --- a/gcc/cp/cxx-pretty-print.cc +++ b/gcc/cp/cxx-pretty-print.cc @@ -2137,6 +2137,29 @@ cxx_pretty_printer::statement (tree t) pp_needs_newline (this) = true; break; + case TEMPLATE_FOR_STMT: + pp_cxx_ws_string (this, "template for"); + pp_space (this); + pp_cxx_left_paren (this); + if (TEMPLATE_FOR_INIT_STMT (t)) + { + statement (TEMPLATE_FOR_INIT_STMT (t)); + pp_needs_newline (this) = false; + pp_cxx_whitespace (this); + } + statement (TEMPLATE_FOR_DECL (t)); + pp_space (this); + pp_needs_newline (this) = false; + pp_colon (this); + pp_space (this); + statement (TEMPLATE_FOR_EXPR (t)); + pp_cxx_right_paren (this); + pp_newline_and_indent (this, 3); + statement (TEMPLATE_FOR_BODY (t)); + pp_indentation (this) -= 3; + pp_needs_newline (this) = true; + break; + /* expression-statement: expression(opt) ; */ case EXPR_STMT: diff --git a/gcc/cp/decl.cc b/gcc/cp/decl.cc index cb3ebfff4298..4b1a335910b1 100644 --- a/gcc/cp/decl.cc +++ b/gcc/cp/decl.cc @@ -572,9 +572,9 @@ poplevel_named_label_1 (named_label_entry **slot, cp_binding_level *bl) ent->in_stmt_expr = true; break; case sk_block: - if (level_for_constexpr_if (bl->level_chain)) + if (level_for_constexpr_if (obl)) ent->in_constexpr_if = true; - else if (level_for_consteval_if (bl->level_chain)) + else if (level_for_consteval_if (obl)) ent->in_consteval_if = true; break; default: @@ -4336,7 +4336,19 @@ finish_case_label (location_t loc, tree low_value, tree high_value) tree label; /* For templates, just add the case label; we'll do semantic - analysis at instantiation-time. */ + analysis at instantiation-time. But diagnose case labels + in expansion statements with switch outside of it here. */ + if (in_expansion_stmt) + for (cp_binding_level *b = current_binding_level; + b != switch_stack->level; b = b->level_chain) + if (b->kind == sk_template_for && b->this_entity) + { + auto_diagnostic_group d; + error ("jump to case label"); + inform (EXPR_LOCATION (b->this_entity), + " enters %