diff --git a/library/stdarch/.github/workflows/main.yml b/library/stdarch/.github/workflows/main.yml index 5c84e856d54bb..98f6b842d135f 100644 --- a/library/stdarch/.github/workflows/main.yml +++ b/library/stdarch/.github/workflows/main.yml @@ -281,10 +281,19 @@ jobs: - armv7-unknown-linux-gnueabihf - x86_64-unknown-linux-gnu profile: [dev, release] + cc: [clang, gcc] include: - target: aarch64_be-unknown-linux-gnu build_std: true - + - target: x86_64-unknown-linux-gnu + cc: icx + profile: dev + - target: x86_64-unknown-linux-gnu + cc: icx + profile: release + exclude: + - target: armv7-unknown-linux-gnueabihf + cc: gcc steps: - uses: actions/checkout@v6 - name: Install Rust @@ -297,10 +306,11 @@ jobs: rustup component add rust-src echo "CARGO_UNSTABLE_BUILD_STD=std" >> $GITHUB_ENV if: ${{ matrix.build_std }} + - run: rustup component add rustfmt # Configure some env vars based on matrix configuration - run: echo "PROFILE=${{ matrix.profile }}" >> $GITHUB_ENV - - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }} + - run: ./ci/intrinsic-test-docker.sh ${{ matrix.target }} ${{ matrix.cc }} if: ${{ !startsWith(matrix.target, 'thumb') }} env: TARGET: ${{ matrix.target }} @@ -308,7 +318,7 @@ jobs: # Check that the generated files agree with the checked-in versions. check-stdarch-gen: needs: [style] - name: Check stdarch-gen-{arm, loongarch, hexagon} output + name: Check stdarch-gen-{arm, loongarch, hexagon, hexagon-scalar} output runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 @@ -330,6 +340,10 @@ jobs: run: | cargo run -p stdarch-gen-hexagon --release git diff --exit-code + - name: Check hexagon scalar + run: | + cargo run -p stdarch-gen-hexagon-scalar --release + git diff --exit-code # Run some tests with Miri. Most stdarch functions use platform-specific intrinsics # that Miri does not support. Also Miri is reltively slow. diff --git a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile index e2b3d95585efe..1b61dd0c1b87a 100644 --- a/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/aarch64-unknown-linux-gnu/Dockerfile @@ -15,7 +15,8 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH=aarch64-linux-gnu-gcc ENV CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \ CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64 -cpu max -L /usr/aarch64-linux-gnu" \ diff --git a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile index d7c12493ad9cf..70acc2d22a418 100644 --- a/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/aarch64_be-unknown-linux-gnu/Dockerfile @@ -23,11 +23,12 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" - ENV AARCH64_BE_TOOLCHAIN="/toolchains/${TOOLCHAIN}" ENV AARCH64_BE_LIBC="${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc" +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc" + ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_LINKER="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-gcc" ENV CARGO_TARGET_AARCH64_BE_UNKNOWN_LINUX_GNU_RUNNER="qemu-aarch64_be -cpu max -L ${AARCH64_BE_LIBC}" ENV OBJDUMP="${AARCH64_BE_TOOLCHAIN}/bin/aarch64_be-none-linux-gnu-objdump" diff --git a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile index 02744917af6df..3c8a1b5add27f 100644 --- a/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile +++ b/library/stdarch/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile @@ -14,7 +14,8 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH=arm-linux-gnueabihf-gcc ENV CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_LINKER=arm-linux-gnueabihf-gcc \ CARGO_TARGET_ARMV7_UNKNOWN_LINUX_GNUEABIHF_RUNNER="qemu-arm -cpu max -L /usr/arm-linux-gnueabihf" \ diff --git a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile index 17d1ac67e714f..efbb2b0853371 100644 --- a/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile +++ b/library/stdarch/ci/docker/x86_64-unknown-linux-gnu/Dockerfile @@ -6,7 +6,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ make \ ca-certificates \ wget \ - xz-utils + xz-utils \ + gpg RUN wget http://ci-mirrors.rust-lang.org/sde-external-10.8.0-2026-03-15-lin.tar.xz -O sde.tar.xz RUN mkdir intel-sde @@ -16,7 +17,18 @@ RUN wget https://mirrors.edge.kernel.org/pub/tools/llvm/files/llvm-22.1.4-x86_64 RUN mkdir llvm RUN tar -xvf llvm.tar.xz --strip-components=1 -C llvm -ENV PATH="/llvm/bin:$PATH" +RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB |\ + gpg --dearmor |\ + tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null + +RUN echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" |\ + tee /etc/apt/sources.list.d/oneAPI.list + +RUN apt-get update && apt-get install -y --no-install-recommends intel-oneapi-compiler-dpcpp-cpp + +ENV CLANG_PATH="/llvm/bin/clang" +ENV GCC_PATH="gcc" +ENV ICX_PATH="/opt/intel/oneapi/compiler/2026.0/bin/icx" ENV CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="/intel-sde/sde64 \ -cpuid-in /checkout/ci/docker/x86_64-unknown-linux-gnu/cpuid.def \ diff --git a/library/stdarch/ci/intrinsic-test-docker.sh b/library/stdarch/ci/intrinsic-test-docker.sh index 948b53dc67bc9..c1d44dca91c5e 100755 --- a/library/stdarch/ci/intrinsic-test-docker.sh +++ b/library/stdarch/ci/intrinsic-test-docker.sh @@ -5,8 +5,8 @@ set -ex -if [ $# -lt 1 ]; then - >&2 echo "Usage: $0 " +if [ $# -lt 2 ]; then + >&2 echo "Usage: $0 " exit 1 fi @@ -29,7 +29,6 @@ run() { --user "$(id -u)":"$(id -g)" \ --env CARGO_HOME=/cargo \ --env CARGO_TARGET_DIR=/checkout/target \ - --env TARGET="${1}" \ --env PROFILE \ --env "${HOST_LINKER}"="cc" \ --env STDARCH_DISABLE_ASSERT_INSTR \ @@ -48,12 +47,12 @@ run() { --workdir /checkout \ --privileged \ stdarch \ - sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh" + sh -c "HOME=/tmp PATH=\$PATH:/rust/bin exec ci/intrinsic-test.sh ${1} ${2}" } if [ -z "$1" ]; then >&2 echo "No target specified!" exit 1 else - run "${1}" + run "${1}" "${2}" fi diff --git a/library/stdarch/ci/intrinsic-test.sh b/library/stdarch/ci/intrinsic-test.sh index 1f3a2caf50654..0441611f38fd2 100755 --- a/library/stdarch/ci/intrinsic-test.sh +++ b/library/stdarch/ci/intrinsic-test.sh @@ -2,7 +2,30 @@ set -ex -: "${TARGET?The TARGET environment variable must be set.}" +if [ $# -lt 2 ]; then + >&2 echo "Usage: $0 " + exit 1 +fi + +case ${2} in + clang) + export CC="${CLANG_PATH}" + CC_ARG_STYLE=clang + ;; + gcc) + export CC="${GCC_PATH}" + CC_ARG_STYLE=gcc + ;; + icx) + export CC="${ICX_PATH}" + # `icx` uses clang-style arguments + CC_ARG_STYLE=clang + ;; + *) + >&2 echo "Unknown compiler: ${2}" + exit 1 + ;; +esac export RUSTFLAGS="${RUSTFLAGS} -D warnings -Z merge-functions=disabled -Z verify-llvm-ir" export PROFILE="${PROFILE:="release"}" @@ -12,49 +35,49 @@ echo "PROFILE=${PROFILE}" INTRINSIC_TEST="--manifest-path=crates/intrinsic-test/Cargo.toml" -export CC="clang" - -case ${TARGET} in +case ${1} in aarch64_be*) export CFLAGS="-I${AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc/usr/include --sysroot={AARCH64_BE_TOOLCHAIN}/aarch64_be-none-linux-gnu/libc -Wno-nonportable-vector-initialization" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64_be.txt + ARCH=aarch64_be ;; aarch64*) export CFLAGS="-I/usr/aarch64-linux-gnu/include/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_aarch64.txt + ARCH=aarch64 ;; armv7*) export CFLAGS="-I/usr/arm-linux-gnueabihf/include/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_arm.txt + ARCH=arm ;; x86_64*) export CFLAGS="-I/usr/include/x86_64-linux-gnu/" - TEST_SKIP_INTRINSICS=crates/intrinsic-test/missing_x86.txt + ARCH=x86 ;; *) ;; esac -case "${TARGET}" in +case "${1}" in x86_64-unknown-linux-gnu*) env -u CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER \ cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/x86-intel.xml \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" - - echo "${CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER}" + --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ + --target "${1}" \ + --cc-arg-style "${CC_ARG_STYLE}" ;; *) cargo run "${INTRINSIC_TEST}" --release \ --bin intrinsic-test -- intrinsics_data/arm_intrinsics.json \ - --skip "${TEST_SKIP_INTRINSICS}" \ - --target "${TARGET}" + --skip "crates/intrinsic-test/missing_${ARCH}_common.txt" \ + --skip "crates/intrinsic-test/missing_${ARCH}_${2}.txt" \ + --target "${1}" \ + --cc-arg-style "${CC_ARG_STYLE}" ;; esac -cargo test --manifest-path=rust_programs/Cargo.toml --target "${TARGET}" --profile "${PROFILE}" +cargo test --manifest-path=rust_programs/Cargo.toml --target "${1}" --profile "${PROFILE}" --tests diff --git a/library/stdarch/crates/core_arch/src/hexagon/scalar.rs b/library/stdarch/crates/core_arch/src/hexagon/scalar.rs index c906ec5166a19..477414de74b85 100644 --- a/library/stdarch/crates/core_arch/src/hexagon/scalar.rs +++ b/library/stdarch/crates/core_arch/src/hexagon/scalar.rs @@ -11425,7 +11425,7 @@ pub unsafe fn Q6_l2fetch_AP(rs: i32, rtt: i64) { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(1)] #[cfg_attr(test, assert_instr(rol, IU6 = 0))] @@ -11440,7 +11440,7 @@ pub unsafe fn Q6_P_rol_PI(rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU6 = 0))] @@ -11455,7 +11455,7 @@ pub unsafe fn Q6_P_rolacc_PI(rxx: i64, rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU6 = 0))] @@ -11470,7 +11470,7 @@ pub unsafe fn Q6_P_roland_PI(rxx: i64, rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU6 = 0))] @@ -11485,7 +11485,7 @@ pub unsafe fn Q6_P_rolnac_PI(rxx: i64, rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU6 = 0))] @@ -11500,7 +11500,7 @@ pub unsafe fn Q6_P_rolor_PI(rxx: i64, rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU6 = 0))] @@ -11515,7 +11515,7 @@ pub unsafe fn Q6_P_rolxacc_PI(rxx: i64, rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(1)] #[cfg_attr(test, assert_instr(rol, IU5 = 0))] @@ -11530,7 +11530,7 @@ pub unsafe fn Q6_R_rol_RI(rs: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU5 = 0))] @@ -11545,7 +11545,7 @@ pub unsafe fn Q6_R_rolacc_RI(rx: i32, rs: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU5 = 0))] @@ -11560,7 +11560,7 @@ pub unsafe fn Q6_R_roland_RI(rx: i32, rs: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU5 = 0))] @@ -11575,7 +11575,7 @@ pub unsafe fn Q6_R_rolnac_RI(rx: i32, rs: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU5 = 0))] @@ -11590,7 +11590,7 @@ pub unsafe fn Q6_R_rolor_RI(rx: i32, rs: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V60 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v60"))] #[rustc_legacy_const_generics(2)] #[cfg_attr(test, assert_instr(rol, IU5 = 0))] @@ -11605,7 +11605,7 @@ pub unsafe fn Q6_R_rolxacc_RI(rx: i32, rs: i32) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V62 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))] #[cfg_attr(test, assert_instr(vabsdiffb))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11618,7 +11618,7 @@ pub unsafe fn Q6_P_vabsdiffb_PP(rtt: i64, rss: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V62 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))] #[cfg_attr(test, assert_instr(vabsdiffub))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11631,7 +11631,7 @@ pub unsafe fn Q6_P_vabsdiffub_PP(rtt: i64, rss: i64) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V62 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))] #[cfg_attr(test, assert_instr(vsplatb))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11644,7 +11644,7 @@ pub unsafe fn Q6_P_vsplatb_R(rs: i32) -> i64 { /// Instruction Type: S_3op /// Execution Slots: SLOT23 /// Requires: V62 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))] #[cfg_attr(test, assert_instr(vtrunehb))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11657,7 +11657,7 @@ pub unsafe fn Q6_P_vtrunehb_PP(rss: i64, rtt: i64) -> i64 { /// Instruction Type: S_3op /// Execution Slots: SLOT23 /// Requires: V62 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v62"))] #[cfg_attr(test, assert_instr(vtrunohb))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11670,7 +11670,7 @@ pub unsafe fn Q6_P_vtrunohb_PP(rss: i64, rtt: i64) -> i64 { /// Instruction Type: ALU64 /// Execution Slots: SLOT23 /// Requires: V65 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v65"))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] pub unsafe fn Q6_p_not_any8_vcmpb_eq_PP(rss: i64, rtt: i64) -> i32 { @@ -11682,7 +11682,7 @@ pub unsafe fn Q6_p_not_any8_vcmpb_eq_PP(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V66 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))] #[cfg_attr(test, assert_instr(dfadd))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11695,7 +11695,7 @@ pub unsafe fn Q6_P_dfadd_PP(rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V66 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))] #[cfg_attr(test, assert_instr(dfsub))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11708,7 +11708,7 @@ pub unsafe fn Q6_P_dfsub_PP(rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V66 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))] #[cfg_attr(test, assert_instr(mpyi))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11721,7 +11721,7 @@ pub unsafe fn Q6_R_mpyinac_RR(rx: i32, rs: i32, rt: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V66 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v66"))] #[rustc_legacy_const_generics(0, 1)] #[cfg_attr(test, assert_instr(mask, IU5 = 0, IU5_2 = 0))] @@ -11737,7 +11737,7 @@ pub unsafe fn Q6_R_mask_II() -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[rustc_legacy_const_generics(1)] #[cfg_attr(test, assert_instr(clip, IU5 = 0))] @@ -11752,7 +11752,7 @@ pub unsafe fn Q6_R_clip_RI(rs: i32) -> i32 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[rustc_legacy_const_generics(1)] #[cfg_attr(test, assert_instr(cround, IU6 = 0))] @@ -11767,7 +11767,7 @@ pub unsafe fn Q6_P_cround_PI(rss: i64) -> i64 { /// Instruction Type: S_3op /// Execution Slots: SLOT23 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cround))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11780,7 +11780,7 @@ pub unsafe fn Q6_P_cround_PR(rss: i64, rt: i32) -> i64 { /// Instruction Type: S_2op /// Execution Slots: SLOT23 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[rustc_legacy_const_generics(1)] #[cfg_attr(test, assert_instr(vclip, IU5 = 0))] @@ -11795,7 +11795,7 @@ pub unsafe fn Q6_P_vclip_PI(rss: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V67 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))] #[cfg_attr(test, assert_instr(dfmax))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11808,7 +11808,7 @@ pub unsafe fn Q6_P_dfmax_PP(rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V67 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))] #[cfg_attr(test, assert_instr(dfmin))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11821,7 +11821,7 @@ pub unsafe fn Q6_P_dfmin_PP(rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V67 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))] #[cfg_attr(test, assert_instr(dfmpyfix))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11834,7 +11834,7 @@ pub unsafe fn Q6_P_dfmpyfix_PP(rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V67 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))] #[cfg_attr(test, assert_instr(dfmpyhh))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11847,7 +11847,7 @@ pub unsafe fn Q6_P_dfmpyhhacc_PP(rxx: f64, rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V67 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))] #[cfg_attr(test, assert_instr(dfmpylh))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11860,7 +11860,7 @@ pub unsafe fn Q6_P_dfmpylhacc_PP(rxx: f64, rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT23 /// Requires: V67 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67"))] #[cfg_attr(test, assert_instr(dfmpyll))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11873,7 +11873,7 @@ pub unsafe fn Q6_P_dfmpyll_PP(rss: f64, rtt: f64) -> f64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11886,7 +11886,7 @@ pub unsafe fn Q6_P_cmpyiw_PP(rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11899,7 +11899,7 @@ pub unsafe fn Q6_P_cmpyiwacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11912,7 +11912,7 @@ pub unsafe fn Q6_P_cmpyiw_PP_conj(rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11925,7 +11925,7 @@ pub unsafe fn Q6_P_cmpyiwacc_PP_conj(rxx: i64, rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11938,7 +11938,7 @@ pub unsafe fn Q6_P_cmpyrw_PP(rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11951,7 +11951,7 @@ pub unsafe fn Q6_P_cmpyrwacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11964,7 +11964,7 @@ pub unsafe fn Q6_P_cmpyrw_PP_conj(rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11977,7 +11977,7 @@ pub unsafe fn Q6_P_cmpyrwacc_PP_conj(rxx: i64, rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(vdmpyw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -11990,7 +11990,7 @@ pub unsafe fn Q6_P_vdmpyw_PP(rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(vdmpyw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12003,7 +12003,7 @@ pub unsafe fn Q6_P_vdmpywacc_PP(rxx: i64, rss: i64, rtt: i64) -> i64 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12016,7 +12016,7 @@ pub unsafe fn Q6_R_cmpyiw_PP_s1_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12029,7 +12029,7 @@ pub unsafe fn Q6_R_cmpyiw_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12042,7 +12042,7 @@ pub unsafe fn Q6_R_cmpyiw_PP_conj_s1_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyiw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12055,7 +12055,7 @@ pub unsafe fn Q6_R_cmpyiw_PP_conj_s1_rnd_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12068,7 +12068,7 @@ pub unsafe fn Q6_R_cmpyrw_PP_s1_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12081,7 +12081,7 @@ pub unsafe fn Q6_R_cmpyrw_PP_s1_rnd_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12094,7 +12094,7 @@ pub unsafe fn Q6_R_cmpyrw_PP_conj_s1_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: M /// Execution Slots: SLOT3 /// Requires: V67, Audio -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v67,audio"))] #[cfg_attr(test, assert_instr(cmpyrw))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12107,7 +12107,7 @@ pub unsafe fn Q6_R_cmpyrw_PP_conj_s1_rnd_sat(rss: i64, rtt: i64) -> i32 { /// Instruction Type: ST /// Execution Slots: SLOT0 /// Requires: V68 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))] #[cfg_attr(test, assert_instr(dmlink))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12120,7 +12120,7 @@ pub unsafe fn Q6_dmlink_AA(rs: i32, rt: i32) { /// Instruction Type: ST /// Execution Slots: SLOT0 /// Requires: V68 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))] #[cfg_attr(test, assert_instr(dmpause))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12133,7 +12133,7 @@ pub unsafe fn Q6_R_dmpause() -> i32 { /// Instruction Type: ST /// Execution Slots: SLOT0 /// Requires: V68 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))] #[cfg_attr(test, assert_instr(dmpoll))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12146,7 +12146,7 @@ pub unsafe fn Q6_R_dmpoll() -> i32 { /// Instruction Type: ST /// Execution Slots: SLOT0 /// Requires: V68 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))] #[cfg_attr(test, assert_instr(dmresume))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12159,7 +12159,7 @@ pub unsafe fn Q6_dmresume_A(rs: i32) { /// Instruction Type: ST /// Execution Slots: SLOT0 /// Requires: V68 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))] #[cfg_attr(test, assert_instr(dmstart))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] @@ -12172,7 +12172,7 @@ pub unsafe fn Q6_dmstart_A(rs: i32) { /// Instruction Type: ST /// Execution Slots: SLOT0 /// Requires: V68 -#[inline(always)] +#[inline] #[cfg_attr(target_arch = "hexagon", target_feature(enable = "v68"))] #[cfg_attr(test, assert_instr(dmwait))] #[unstable(feature = "stdarch_hexagon", issue = "151523")] diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs index e05e19457319d..1fa156c85a422 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/generated.rs @@ -155,22 +155,6 @@ unsafe extern "unadjusted" { fn __lasx_xvrepl128vei_w(a: __v8i32, b: u32) -> __v8i32; #[link_name = "llvm.loongarch.lasx.xvrepl128vei.d"] fn __lasx_xvrepl128vei_d(a: __v4i64, b: u32) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvilvh.b"] - fn __lasx_xvilvh_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvilvh.h"] - fn __lasx_xvilvh_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvilvh.w"] - fn __lasx_xvilvh_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvilvh.d"] - fn __lasx_xvilvh_d(a: __v4i64, b: __v4i64) -> __v4i64; - #[link_name = "llvm.loongarch.lasx.xvilvl.b"] - fn __lasx_xvilvl_b(a: __v32i8, b: __v32i8) -> __v32i8; - #[link_name = "llvm.loongarch.lasx.xvilvl.h"] - fn __lasx_xvilvl_h(a: __v16i16, b: __v16i16) -> __v16i16; - #[link_name = "llvm.loongarch.lasx.xvilvl.w"] - fn __lasx_xvilvl_w(a: __v8i32, b: __v8i32) -> __v8i32; - #[link_name = "llvm.loongarch.lasx.xvilvl.d"] - fn __lasx_xvilvl_d(a: __v4i64, b: __v4i64) -> __v4i64; #[link_name = "llvm.loongarch.lasx.xvpackev.b"] fn __lasx_xvpackev_b(a: __v32i8, b: __v32i8) -> __v32i8; #[link_name = "llvm.loongarch.lasx.xvpackev.h"] @@ -1637,62 +1621,6 @@ pub fn lasx_xvrepl128vei_d(a: m256i) -> m256i { unsafe { transmute(__lasx_xvrepl128vei_d(transmute(a), IMM1)) } } -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvh_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvh_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvh_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvh_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvh_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvh_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvh_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvh_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvl_b(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvl_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvl_h(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvl_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvl_w(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvl_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lasx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lasx_xvilvl_d(a: m256i, b: m256i) -> m256i { - unsafe { transmute(__lasx_xvilvl_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lasx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs index 1d44f418bfbcd..0c583795af9b7 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lasx/portable.rs @@ -67,6 +67,68 @@ const unsafe fn simd_pickod_h(a: T, b: T) -> T { simd_shuffle!(b, a, [1, 3, 5, 7, 17, 19, 21, 23, 9, 11, 13, 15, 25, 27, 29, 31]) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_b(a: T, b: T) -> T { + simd_shuffle!( + b, + a, + [ + 8, 40, 9, 41, 10, 42, 11, 43, 12, 44, 13, 45, 14, 46, 15, 47, + 24, 56, 25, 57, 26, 58, 27, 59, 28, 60, 29, 61, 30, 62, 31, 63 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [4, 20, 5, 21, 6, 22, 7, 23, 12, 28, 13, 29, 14, 30, 15, 31]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [2, 10, 3, 11, 6, 14, 7, 15]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 5, 3, 7]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_b(a: T, b: T) -> T { + simd_shuffle!( + b, + a, + [ + 0, 32, 1, 33, 2, 34, 3, 35, 4, 36, 5, 37, 6, 38, 7, 39, + 16, 48, 17, 49, 18, 50, 19, 51, 20, 52, 21, 53, 22, 54, 23, 55 + ] + ) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 16, 1, 17, 2, 18, 3, 19, 8, 24, 9, 25, 10, 26, 11, 27]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 8, 1, 9, 4, 12, 5, 13]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 4, 2, 6]) +} + impl_vv!("lasx", lasx_xvpcnt_b, is::simd_ctpop, m256i, i8x32); impl_vv!("lasx", lasx_xvpcnt_h, is::simd_ctpop, m256i, i16x16); impl_vv!("lasx", lasx_xvpcnt_w, is::simd_ctpop, m256i, i32x8); @@ -230,6 +292,14 @@ impl_vvv!("lasx", lasx_xvpickod_b, simd_pickod_b, m256i, i8x32); impl_vvv!("lasx", lasx_xvpickod_h, simd_pickod_h, m256i, i16x16); impl_vvv!("lasx", lasx_xvpickod_w, simd_pickod_w, m256i, i32x8); impl_vvv!("lasx", lasx_xvpickod_d, simd_pickod_d, m256i, i64x4); +impl_vvv!("lasx", lasx_xvilvh_b, simd_ilvh_b, m256i, i8x32); +impl_vvv!("lasx", lasx_xvilvh_h, simd_ilvh_h, m256i, i16x16); +impl_vvv!("lasx", lasx_xvilvh_w, simd_ilvh_w, m256i, i32x8); +impl_vvv!("lasx", lasx_xvilvh_d, simd_ilvh_d, m256i, i64x4); +impl_vvv!("lasx", lasx_xvilvl_b, simd_ilvl_b, m256i, i8x32); +impl_vvv!("lasx", lasx_xvilvl_h, simd_ilvl_h, m256i, i16x16); +impl_vvv!("lasx", lasx_xvilvl_w, simd_ilvl_w, m256i, i32x8); +impl_vvv!("lasx", lasx_xvilvl_d, simd_ilvl_d, m256i, i64x4); impl_vuv!("lasx", lasx_xvslli_b, is::simd_shl, m256i, i8x32); impl_vuv!("lasx", lasx_xvslli_h, is::simd_shl, m256i, i16x16); diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs index 767be195292f2..bc08e0cea726e 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/generated.rs @@ -163,22 +163,6 @@ unsafe extern "unadjusted" { fn __lsx_vreplvei_w(a: __v4i32, b: u32) -> __v4i32; #[link_name = "llvm.loongarch.lsx.vreplvei.d"] fn __lsx_vreplvei_d(a: __v2i64, b: u32) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vilvh.b"] - fn __lsx_vilvh_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vilvh.h"] - fn __lsx_vilvh_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vilvh.w"] - fn __lsx_vilvh_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vilvh.d"] - fn __lsx_vilvh_d(a: __v2i64, b: __v2i64) -> __v2i64; - #[link_name = "llvm.loongarch.lsx.vilvl.b"] - fn __lsx_vilvl_b(a: __v16i8, b: __v16i8) -> __v16i8; - #[link_name = "llvm.loongarch.lsx.vilvl.h"] - fn __lsx_vilvl_h(a: __v8i16, b: __v8i16) -> __v8i16; - #[link_name = "llvm.loongarch.lsx.vilvl.w"] - fn __lsx_vilvl_w(a: __v4i32, b: __v4i32) -> __v4i32; - #[link_name = "llvm.loongarch.lsx.vilvl.d"] - fn __lsx_vilvl_d(a: __v2i64, b: __v2i64) -> __v2i64; #[link_name = "llvm.loongarch.lsx.vpackev.b"] fn __lsx_vpackev_b(a: __v16i8, b: __v16i8) -> __v16i8; #[link_name = "llvm.loongarch.lsx.vpackev.h"] @@ -1577,62 +1561,6 @@ pub fn lsx_vreplvei_d(a: m128i) -> m128i { unsafe { transmute(__lsx_vreplvei_d(transmute(a), IMM1)) } } -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvh_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvh_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvh_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvh_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvh_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvh_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvh_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvh_d(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvl_b(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvl_b(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvl_h(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvl_h(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvl_w(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvl_w(transmute(a), transmute(b))) } -} - -#[inline] -#[target_feature(enable = "lsx")] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn lsx_vilvl_d(a: m128i, b: m128i) -> m128i { - unsafe { transmute(__lsx_vilvl_d(transmute(a), transmute(b))) } -} - #[inline] #[target_feature(enable = "lsx")] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs index 24f9af851d8c1..99366dedb24c3 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/lsx/portable.rs @@ -53,6 +53,54 @@ const unsafe fn simd_pickod_d(a: T, b: T) -> T { simd_shuffle!(b, a, [1, 3]) } +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_b(a: T, b: T) -> T { + simd_shuffle!(b, a, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [4, 12, 5, 13, 6, 14, 7, 15]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [2, 6, 3, 7]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvh_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [1, 3]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_b(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_h(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 8, 1, 9, 2, 10, 3, 11]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_w(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 4, 1, 5]) +} + +#[inline(always)] +#[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +pub(crate) const unsafe fn simd_ilvl_d(a: T, b: T) -> T { + simd_shuffle!(b, a, [0, 2]) +} + impl_vv!("lsx", lsx_vpcnt_b, is::simd_ctpop, m128i, i8x16); impl_vv!("lsx", lsx_vpcnt_h, is::simd_ctpop, m128i, i16x8); impl_vv!("lsx", lsx_vpcnt_w, is::simd_ctpop, m128i, i32x4); @@ -216,6 +264,14 @@ impl_vvv!("lsx", lsx_vpickod_b, simd_pickod_b, m128i, i8x16); impl_vvv!("lsx", lsx_vpickod_h, simd_pickod_h, m128i, i16x8); impl_vvv!("lsx", lsx_vpickod_w, simd_pickod_w, m128i, i32x4); impl_vvv!("lsx", lsx_vpickod_d, simd_pickod_d, m128i, i64x2); +impl_vvv!("lsx", lsx_vilvh_b, simd_ilvh_b, m128i, i8x16); +impl_vvv!("lsx", lsx_vilvh_h, simd_ilvh_h, m128i, i16x8); +impl_vvv!("lsx", lsx_vilvh_w, simd_ilvh_w, m128i, i32x4); +impl_vvv!("lsx", lsx_vilvh_d, simd_ilvh_d, m128i, i64x2); +impl_vvv!("lsx", lsx_vilvl_b, simd_ilvl_b, m128i, i8x16); +impl_vvv!("lsx", lsx_vilvl_h, simd_ilvl_h, m128i, i16x8); +impl_vvv!("lsx", lsx_vilvl_w, simd_ilvl_w, m128i, i32x4); +impl_vvv!("lsx", lsx_vilvl_d, simd_ilvl_d, m128i, i64x2); impl_vuv!("lsx", lsx_vslli_b, is::simd_shl, m128i, i8x16); impl_vuv!("lsx", lsx_vslli_h, is::simd_shl, m128i, i16x8); diff --git a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs index f464dbd356b7f..e8bf098a33327 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/mod.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/mod.rs @@ -22,8 +22,20 @@ pub fn rdtime_d() -> (i64, isize) { #[allow(improper_ctypes)] unsafe extern "unadjusted" { + #[link_name = "llvm.loongarch.crc.w.b.w"] + fn __crc_w_b_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crc.w.h.w"] + fn __crc_w_h_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crc.w.w.w"] + fn __crc_w_w_w(a: i32, b: i32) -> i32; #[link_name = "llvm.loongarch.crc.w.d.w"] fn __crc_w_d_w(a: i64, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.b.w"] + fn __crcc_w_b_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.h.w"] + fn __crcc_w_h_w(a: i32, b: i32) -> i32; + #[link_name = "llvm.loongarch.crcc.w.w.w"] + fn __crcc_w_w_w(a: i32, b: i32) -> i32; #[link_name = "llvm.loongarch.crcc.w.d.w"] fn __crcc_w_d_w(a: i64, b: i32) -> i32; #[link_name = "llvm.loongarch.cacop.d"] @@ -48,6 +60,27 @@ unsafe extern "unadjusted" { fn __ldpte(a: i64, b: i64); } +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline(always)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn crc_w_b_w(a: i8, b: i32) -> i32 { + unsafe { __crc_w_b_w(a as i32, b) } +} + +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline(always)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn crc_w_h_w(a: i16, b: i32) -> i32 { + unsafe { __crc_w_h_w(a as i32, b) } +} + +/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) +#[inline(always)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn crc_w_w_w(a: i32, b: i32) -> i32 { + unsafe { __crc_w_w_w(a, b) } +} + /// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) #[inline(always)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] @@ -55,6 +88,27 @@ pub fn crc_w_d_w(a: i64, b: i32) -> i32 { unsafe { __crc_w_d_w(a, b) } } +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline(always)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn crcc_w_b_w(a: i8, b: i32) -> i32 { + unsafe { __crcc_w_b_w(a as i32, b) } +} + +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline(always)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn crcc_w_h_w(a: i16, b: i32) -> i32 { + unsafe { __crcc_w_h_w(a as i32, b) } +} + +/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) +#[inline(always)] +#[unstable(feature = "stdarch_loongarch", issue = "117427")] +pub fn crcc_w_w_w(a: i32, b: i32) -> i32 { + unsafe { __crcc_w_w_w(a, b) } +} + /// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) #[inline(always)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs index b4ec6881c36ab..b102e137f1133 100644 --- a/library/stdarch/crates/core_arch/src/loongarch64/simd.rs +++ b/library/stdarch/crates/core_arch/src/loongarch64/simd.rs @@ -14,6 +14,7 @@ pub(super) const trait SimdExt: Sized { unsafe fn splat(v: i64) -> Self; } +#[rustfmt::skip] // FIXME: https://github.com/rust-lang/stdarch/pull/2133#issuecomment-4524350350 macro_rules! impl_simd_ext { ($v:ident, $e:ty) => { #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] diff --git a/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs b/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs index 948c98df61971..4f24ff2932210 100644 --- a/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs +++ b/library/stdarch/crates/core_arch/src/loongarch_shared/mod.rs @@ -22,18 +22,6 @@ pub fn rdtimeh_w() -> (i32, isize) { #[allow(improper_ctypes)] unsafe extern "unadjusted" { - #[link_name = "llvm.loongarch.crc.w.b.w"] - fn __crc_w_b_w(a: i32, b: i32) -> i32; - #[link_name = "llvm.loongarch.crc.w.h.w"] - fn __crc_w_h_w(a: i32, b: i32) -> i32; - #[link_name = "llvm.loongarch.crc.w.w.w"] - fn __crc_w_w_w(a: i32, b: i32) -> i32; - #[link_name = "llvm.loongarch.crcc.w.b.w"] - fn __crcc_w_b_w(a: i32, b: i32) -> i32; - #[link_name = "llvm.loongarch.crcc.w.h.w"] - fn __crcc_w_h_w(a: i32, b: i32) -> i32; - #[link_name = "llvm.loongarch.crcc.w.w.w"] - fn __crcc_w_w_w(a: i32, b: i32) -> i32; #[link_name = "llvm.loongarch.dbar"] fn __dbar(a: i32); #[link_name = "llvm.loongarch.ibar"] @@ -70,48 +58,6 @@ unsafe extern "unadjusted" { fn __frsqrte_d(a: f64) -> f64; } -/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) -#[inline(always)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn crc_w_b_w(a: i32, b: i32) -> i32 { - unsafe { __crc_w_b_w(a, b) } -} - -/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) -#[inline(always)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn crc_w_h_w(a: i32, b: i32) -> i32 { - unsafe { __crc_w_h_w(a, b) } -} - -/// Calculate the CRC value using the IEEE 802.3 polynomial (0xEDB88320) -#[inline(always)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn crc_w_w_w(a: i32, b: i32) -> i32 { - unsafe { __crc_w_w_w(a, b) } -} - -/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) -#[inline(always)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn crcc_w_b_w(a: i32, b: i32) -> i32 { - unsafe { __crcc_w_b_w(a, b) } -} - -/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) -#[inline(always)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn crcc_w_h_w(a: i32, b: i32) -> i32 { - unsafe { __crcc_w_h_w(a, b) } -} - -/// Calculate the CRC value using the Castagnoli polynomial (0x82F63B78) -#[inline(always)] -#[unstable(feature = "stdarch_loongarch", issue = "117427")] -pub fn crcc_w_w_w(a: i32, b: i32) -> i32 { - unsafe { __crcc_w_w_w(a, b) } -} - /// Generates the memory barrier instruction #[inline(always)] #[unstable(feature = "stdarch_loongarch", issue = "117427")] diff --git a/library/stdarch/crates/core_arch/src/simd.rs b/library/stdarch/crates/core_arch/src/simd.rs index 2c6829b465c42..9a756eee446d2 100644 --- a/library/stdarch/crates/core_arch/src/simd.rs +++ b/library/stdarch/crates/core_arch/src/simd.rs @@ -87,6 +87,7 @@ impl Clone for Simd { } #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +#[rustfmt::skip] // FIXME: https://github.com/rust-lang/stdarch/pull/2133#issuecomment-4524350350 impl const crate::cmp::PartialEq for Simd { #[inline] fn eq(&self, other: &Self) -> bool { @@ -299,6 +300,7 @@ impl Clone for SimdM { } #[rustc_const_unstable(feature = "stdarch_const_helpers", issue = "none")] +#[rustfmt::skip] // FIXME: https://github.com/rust-lang/stdarch/pull/2133#issuecomment-4524350350 impl const crate::cmp::PartialEq for SimdM { #[inline] fn eq(&self, other: &Self) -> bool { diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64_be_clang.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64_be_clang.txt new file mode 100644 index 0000000000000..001538b3ea504 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64_be_clang.txt @@ -0,0 +1,31 @@ +# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190) +vcmla_laneq_f16 +vcmla_rot180_laneq_f16 +vcmla_rot270_laneq_f16 +vcmla_rot90_laneq_f16 +vcmlaq_lane_f16 +vcmlaq_laneq_f16 +vcmlaq_rot180_lane_f16 +vcmlaq_rot180_laneq_f16 +vcmlaq_rot270_lane_f16 +vcmlaq_rot270_laneq_f16 +vcmlaq_rot90_lane_f16 +vcmlaq_rot90_laneq_f16 + +# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue. +vdot_lane_s32 +vdot_lane_u32 +vdot_laneq_s32 +vdot_laneq_u32 +vdotq_lane_s32 +vdotq_lane_u32 +vdotq_laneq_s32 +vdotq_laneq_u32 +vsudot_lane_s32 +vsudot_laneq_s32 +vsudotq_lane_s32 +vsudotq_laneq_s32 +vusdot_lane_s32 +vusdot_laneq_s32 +vusdotq_lane_s32 +vusdotq_laneq_s32 diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64_be_common.txt similarity index 93% rename from library/stdarch/crates/intrinsic-test/missing_aarch64.txt rename to library/stdarch/crates/intrinsic-test/missing_aarch64_be_common.txt index f0c9eeb6ce2c9..327c8207a0ff5 100644 --- a/library/stdarch/crates/intrinsic-test/missing_aarch64.txt +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64_be_common.txt @@ -79,6 +79,3 @@ vcvtns_s64_f32 vcvtns_u64_f32 vcvtps_s64_f32 vcvtps_u64_f32 - -# Broken in Clang (fixed in https://github.com/llvm/llvm-project/pull/156029) -vcvth_s16_f16 diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64_be_gcc.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64_be_gcc.txt new file mode 100644 index 0000000000000..eccd8a8a14dff --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64_be_gcc.txt @@ -0,0 +1,20 @@ +# Broken in LLVM llvm/llvm-project#196999 +vmull_p64 +vmull_high_p64 + +# Broken in LLVM llvm/llvm-project#197083 +vcvth_n_s32_f16 +vcvth_n_u32_f16 +vcvth_n_s64_f16 +vcvth_n_u64_f16 +vcvth_n_f16_s32 +vcvth_n_f16_u32 +vcvth_n_f16_s64 +vcvth_n_f16_u64 + +# Broken in GCC https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125279 +vmaxh_f16 +vminh_f16 + +# Rounding errors +vfms_n_f64 diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64_clang.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64_clang.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64_be.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64_common.txt similarity index 57% rename from library/stdarch/crates/intrinsic-test/missing_aarch64_be.txt rename to library/stdarch/crates/intrinsic-test/missing_aarch64_common.txt index 9163aaa1c8db0..327c8207a0ff5 100644 --- a/library/stdarch/crates/intrinsic-test/missing_aarch64_be.txt +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64_common.txt @@ -1,43 +1,3 @@ -# Bad LLVM codegen for BE in O2 in clang, and release in rust (https://github.com/llvm/llvm-project/issues/166190) -vcmla_lane_f16 -vcmla_laneq_f16 -vcmla_rot180_lane_f16 -vcmla_rot180_laneq_f16 -vcmla_rot270_lane_f16 -vcmla_rot270_laneq_f16 -vcmla_rot90_lane_f16 -vcmla_rot90_laneq_f16 -vcmlaq_lane_f16 -vcmlaq_laneq_f16 -vcmlaq_laneq_f32 -vcmlaq_rot180_lane_f16 -vcmlaq_rot180_laneq_f16 -vcmlaq_rot180_laneq_f32 -vcmlaq_rot270_lane_f16 -vcmlaq_rot270_laneq_f16 -vcmlaq_rot270_laneq_f32 -vcmlaq_rot90_lane_f16 -vcmlaq_rot90_laneq_f16 -vcmlaq_rot90_laneq_f32 -# Bad codegen for BE in O2 in clang, correct in rust. Same cause as above issue. -vdot_lane_s32 -vdot_lane_u32 -vdot_laneq_s32 -vdot_laneq_u32 -vdotq_lane_s32 -vdotq_lane_u32 -vdotq_laneq_s32 -vdotq_laneq_u32 -vsudot_lane_s32 -vsudot_laneq_s32 -vsudotq_lane_s32 -vsudotq_laneq_s32 -vusdot_lane_s32 -vusdot_laneq_s32 -vusdotq_lane_s32 -vusdotq_laneq_s32 - -# Below are in common to missing_aarch64.txt # Not supported by qemu (will throw illegal instruction) vamin_f16 vaminq_f16 @@ -119,6 +79,3 @@ vcvtns_s64_f32 vcvtns_u64_f32 vcvtps_s64_f32 vcvtps_u64_f32 - -# Broken in Clang -vcvth_s16_f16 diff --git a/library/stdarch/crates/intrinsic-test/missing_aarch64_gcc.txt b/library/stdarch/crates/intrinsic-test/missing_aarch64_gcc.txt new file mode 100644 index 0000000000000..a7e90142e4bcd --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_aarch64_gcc.txt @@ -0,0 +1,19 @@ +# Broken in GCC https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123584, fixed in GCC 16 +vxarq_u64 + +# Broken in LLVM llvm/llvm-project#197083 +vcvth_n_s32_f16 +vcvth_n_u32_f16 +vcvth_n_s64_f16 +vcvth_n_u64_f16 +vcvth_n_f16_s32 +vcvth_n_f16_u32 +vcvth_n_f16_s64 +vcvth_n_f16_u64 + +# Broken in GCC https://gcc.gnu.org/bugzilla/show_bug.cgi?id=125279 +vmaxh_f16 +vminh_f16 + +# Rounding errors +vfms_n_f64 diff --git a/library/stdarch/crates/intrinsic-test/missing_arm_clang.txt b/library/stdarch/crates/intrinsic-test/missing_arm_clang.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/library/stdarch/crates/intrinsic-test/missing_arm.txt b/library/stdarch/crates/intrinsic-test/missing_arm_common.txt similarity index 100% rename from library/stdarch/crates/intrinsic-test/missing_arm.txt rename to library/stdarch/crates/intrinsic-test/missing_arm_common.txt diff --git a/library/stdarch/crates/intrinsic-test/missing_x86_clang.txt b/library/stdarch/crates/intrinsic-test/missing_x86_clang.txt new file mode 100644 index 0000000000000..b1531830c1a19 --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_x86_clang.txt @@ -0,0 +1,24 @@ +# not present in Clang +_bswap +_bswap64 +_mm_cvtsd_si64x +_mm_cvtsi128_si64x +_mm_cvtsi64x_sd +_mm_cvtsi64x_si128 +_mm_cvttsd_si64x +_popcnt32 +_popcnt64 + +# Clang bug +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/library/stdarch/crates/intrinsic-test/missing_x86.txt b/library/stdarch/crates/intrinsic-test/missing_x86_common.txt similarity index 59% rename from library/stdarch/crates/intrinsic-test/missing_x86.txt rename to library/stdarch/crates/intrinsic-test/missing_x86_common.txt index c7aabb95a84b0..f9b71bbe8c2ca 100644 --- a/library/stdarch/crates/intrinsic-test/missing_x86.txt +++ b/library/stdarch/crates/intrinsic-test/missing_x86_common.txt @@ -1,19 +1,22 @@ -# Are defined under a similar name - -#__bswap_64 -_bswap64 - -# not present in Clang and Rust +# not present in Rust _bit_scan_forward _bit_scan_reverse _castf32_u32 _castf64_u64 _castu32_f32 _castu64_f64 +_cvtsh_ss +_cvtss_sh _lrotl _lrotr _may_i_use_cpu_feature _may_i_use_cpu_feature_ext +_mm256_set1_pch +_mm512_set1_pch +_mm_malloc +_mm_popcnt_u32 +_mm_popcnt_u64 +_mm_set1_pch _rdpmc _rotl _rotl64 @@ -21,29 +24,9 @@ _rotr _rotr64 _rotwl _rotwr -_urdmsr - -# not present in Clang -_bswap -_mm_cvtsd_si64x -_mm_cvtsi128_si64x -_mm_cvtsi64x_sd -_mm_cvtsi64x_si128 -_mm_cvttsd_si64x -_popcnt32 -_popcnt64 - -# not present in Rust -_cvtsh_ss -_cvtss_sh -_mm256_set1_pch -_mm512_set1_pch -_mm_malloc -_mm_popcnt_u32 -_mm_popcnt_u64 -_mm_set1_pch _tpause _umwait +_urdmsr # SDE ERROR: Cannot execute XGETBV with ECX != 0 _xgetbv @@ -63,17 +46,3 @@ _mm512_castph256_ph512 _mm512_castps256_ps512 _mm512_castpd256_pd512 _mm512_castsi256_si512 - -# Clang bug -_mm512_mask_reduce_max_pd -_mm512_mask_reduce_max_ps -_mm512_mask_reduce_min_pd -_mm512_mask_reduce_min_ps - -# Rounding errors in release mode -_mm_maskz_fmadd_sd -_mm_maskz_fmadd_ss -_mm_maskz_fmsub_sd -_mm_maskz_fmsub_ss -_mm_maskz_fnmadd_sd -_mm_maskz_fnmadd_ss diff --git a/library/stdarch/crates/intrinsic-test/missing_x86_gcc.txt b/library/stdarch/crates/intrinsic-test/missing_x86_gcc.txt new file mode 100644 index 0000000000000..5b71b0698eafc --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_x86_gcc.txt @@ -0,0 +1,33 @@ +# not present in GCC +_bextr2_u32 +_bextr2_u64 +_mm512_cvtepi32lo_pd +_mm512_mask_cvtepi32lo_pd +_mm512_cvtepu32lo_pd +_mm512_mask_cvtepu32lo_pd +_mm512_cvtpd_pslo +_mm512_mask_cvtpd_pslo +_mm512_cvtpslo_pd +_mm512_mask_cvtpslo_pd +_mm512_permutevar_epi32 +_mm512_mask_permutevar_epi32 +_mm_tzcnt_32 +_mm_tzcnt_64 + +# GCC bug +_mm512_reduce_max_pd +_mm512_reduce_max_ps +_mm512_reduce_min_pd +_mm512_reduce_min_ps +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/library/stdarch/crates/intrinsic-test/missing_x86_icx.txt b/library/stdarch/crates/intrinsic-test/missing_x86_icx.txt new file mode 100644 index 0000000000000..6d3133c85db9c --- /dev/null +++ b/library/stdarch/crates/intrinsic-test/missing_x86_icx.txt @@ -0,0 +1,20 @@ +# not present in ICX +_mm_cvtsd_si64x +_mm_cvtsi128_si64x +_mm_cvtsi64x_sd +_mm_cvtsi64x_si128 +_mm_cvttsd_si64x + +# ICX bug +_mm512_mask_reduce_max_pd +_mm512_mask_reduce_max_ps +_mm512_mask_reduce_min_pd +_mm512_mask_reduce_min_ps + +# Rounding errors in release mode +_mm_maskz_fmadd_sd +_mm_maskz_fmadd_ss +_mm_maskz_fmsub_sd +_mm_maskz_fmsub_ss +_mm_maskz_fnmadd_sd +_mm_maskz_fnmadd_ss diff --git a/library/stdarch/crates/intrinsic-test/src/arm/argument.rs b/library/stdarch/crates/intrinsic-test/src/arm/argument.rs deleted file mode 100644 index c43609bb2db0d..0000000000000 --- a/library/stdarch/crates/intrinsic-test/src/arm/argument.rs +++ /dev/null @@ -1,15 +0,0 @@ -use crate::arm::intrinsic::ArmIntrinsicType; -use crate::common::argument::Argument; - -// This functionality is present due to the nature -// of how intrinsics are defined in the JSON source -// of ARM intrinsics. -impl Argument { - pub fn type_and_name_from_c(arg: &str) -> (&str, &str) { - let split_index = arg - .rfind([' ', '*']) - .expect("Couldn't split type and argname"); - - (arg[..split_index + 1].trim_end(), &arg[split_index + 1..]) - } -} diff --git a/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs index 29343bee4c300..a54e5857192e0 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/intrinsic.rs @@ -2,21 +2,18 @@ use crate::common::intrinsic_helpers::IntrinsicType; use std::ops::{Deref, DerefMut}; #[derive(Debug, Clone, PartialEq)] -pub struct ArmIntrinsicType { - pub data: IntrinsicType, - pub target: String, -} +pub struct ArmIntrinsicType(pub IntrinsicType); impl Deref for ArmIntrinsicType { type Target = IntrinsicType; fn deref(&self) -> &Self::Target { - &self.data + &self.0 } } impl DerefMut for ArmIntrinsicType { fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.data + &mut self.0 } } diff --git a/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs index c1563a7364ce7..06cf78a422285 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/json_parser.rs @@ -1,8 +1,9 @@ use super::intrinsic::ArmIntrinsicType; +use crate::arm::types::parse_intrinsic_type; use crate::common::argument::{Argument, ArgumentList}; use crate::common::constraint::Constraint; use crate::common::intrinsic::Intrinsic; -use crate::common::intrinsic_helpers::IntrinsicType; +use crate::common::intrinsic_helpers::{IntrinsicType, TypeKind}; use serde::Deserialize; use serde_json::Value; use std::collections::HashMap; @@ -58,7 +59,6 @@ struct JsonIntrinsic { pub fn get_neon_intrinsics( filename: &Path, - target: &str, ) -> Result>, Box> { let file = std::fs::File::open(filename)?; let reader = std::io::BufReader::new(file); @@ -68,7 +68,7 @@ pub fn get_neon_intrinsics( .into_iter() .filter_map(|intr| { if intr.simd_isa == "Neon" { - Some(json_to_intrinsic(intr, target).expect("Couldn't parse JSON")) + Some(json_to_intrinsic(intr).expect("Couldn't parse JSON")) } else { None } @@ -79,32 +79,58 @@ pub fn get_neon_intrinsics( fn json_to_intrinsic( mut intr: JsonIntrinsic, - target: &str, ) -> Result, Box> { let name = intr.name.replace(['[', ']'], ""); - let results = ArmIntrinsicType::from_c(&intr.return_type.value, target)?; + let result_ty = ArmIntrinsicType(parse_intrinsic_type(&intr.return_type.value)?); let args = intr .arguments .into_iter() .enumerate() .map(|(i, arg)| { - let (type_name, arg_name) = Argument::::type_and_name_from_c(&arg); + let (type_name, arg_name) = { + let split_index = arg + .rfind([' ', '*']) + .expect("Couldn't split type and argname"); + + (arg[..split_index + 1].trim_end(), &arg[split_index + 1..]) + }; + + let arg_ty = parse_intrinsic_type(type_name) + .unwrap_or_else(|_| panic!("Failed to parse argument '{arg}'")); + let metadata = intr.args_prep.as_mut(); let metadata = metadata.and_then(|a| a.remove(arg_name)); let arg_prep: Option = metadata.and_then(|a| a.try_into().ok()); - let constraint: Option = arg_prep.and_then(|a| a.try_into().ok()); - let ty = ArmIntrinsicType::from_c(type_name, target) - .unwrap_or_else(|_| panic!("Failed to parse argument '{arg}'")); - - let mut arg = - Argument::::new(i, String::from(arg_name), ty, constraint); + let constraint: Option = + arg_prep.and_then(|a| a.try_into().ok()).or_else(|| { + if arg_ty.kind() == TypeKind::SvPattern { + Some(Constraint::SvPattern) + } else if arg_ty.kind() == TypeKind::SvPrefetchOp { + Some(Constraint::SvPrefetchOp) + } else if arg_name == "imm_rotation" { + if name.starts_with("svcadd_") || name.starts_with("svqcadd_") { + Some(Constraint::SvImmRotationAdd) + } else { + Some(Constraint::SvImmRotation) + } + } else { + None + } + }); + + let mut arg = Argument::::new( + i, + String::from(arg_name), + ArmIntrinsicType(arg_ty), + constraint, + ); // The JSON doesn't list immediates as const let IntrinsicType { ref mut constant, .. - } = arg.ty.data; + } = *arg.ty; if arg.name.starts_with("imm") { *constant = true } @@ -117,7 +143,7 @@ fn json_to_intrinsic( Ok(Intrinsic { name, arguments, - results, + results: result_ty, arch_tags: intr.architectures, }) } diff --git a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs index 9bf6c95ffdcb3..378f23ba7c361 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/mod.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/mod.rs @@ -1,11 +1,10 @@ -mod argument; mod config; mod intrinsic; mod json_parser; mod types; use crate::common::SupportedArchitectureTest; -use crate::common::cli::ProcessedCli; +use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; use intrinsic::ArmIntrinsicType; @@ -29,14 +28,18 @@ impl SupportedArchitectureTest for ArmArchitectureTest { const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn arch_flags(&self) -> Vec<&str> { - vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"] + fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str> { + // GCC uses an extra `-` in the arch name + match cli_options.cc_arg_style { + CcArgStyle::Clang => vec!["-march=armv8.6a+crypto+crc+dotprod+fp16"], + CcArgStyle::Gcc => vec!["-march=armv8.6-a+crypto+crc+dotprod+fp16+sha3+sm4"], + } } - fn create(cli_options: ProcessedCli) -> Self { + fn create(cli_options: &ProcessedCli) -> Self { let a32 = cli_options.target.starts_with("armv7"); - let mut intrinsics = get_neon_intrinsics(&cli_options.filename, &cli_options.target) - .expect("Error parsing input file"); + let mut intrinsics = + get_neon_intrinsics(&cli_options.filename).expect("Error parsing input file"); intrinsics.sort_by(|a, b| a.name.cmp(&b.name)); intrinsics.dedup(); @@ -46,15 +49,19 @@ impl SupportedArchitectureTest for ArmArchitectureTest { let intrinsics = intrinsics .into_iter() - // Not sure how we would compare intrinsic that returns void. + // Skip intrinsics that don't return a value. .filter(|i| i.results.kind() != TypeKind::Void) + // Skip bfloat intrinsics - not currently supported .filter(|i| i.results.kind() != TypeKind::BFloat) .filter(|i| !i.arguments.iter().any(|a| a.ty.kind() == TypeKind::BFloat)) // Skip pointers for now, we would probably need to look at the return // type to work out how many elements we need to point to. .filter(|i| !i.arguments.iter().any(|a| a.is_ptr())) + // Skip intrinsics with 128-bit elements (e.g. `p128`) .filter(|i| !i.arguments.iter().any(|a| a.ty.inner_size() == 128)) + // Skip intrinsics from `--skip` .filter(|i| !cli_options.skip.contains(&i.name)) + // Skip A64-specific intrinsics on A32 .filter(|i| !(a32 && i.arch_tags == vec!["A64".to_string()])) .take(sample_size) .collect::>(); diff --git a/library/stdarch/crates/intrinsic-test/src/arm/types.rs b/library/stdarch/crates/intrinsic-test/src/arm/types.rs index e9614eba218cb..cd420f10678fc 100644 --- a/library/stdarch/crates/intrinsic-test/src/arm/types.rs +++ b/library/stdarch/crates/intrinsic-test/src/arm/types.rs @@ -1,5 +1,7 @@ use super::intrinsic::ArmIntrinsicType; -use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; +use crate::common::intrinsic_helpers::{ + IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind, +}; impl IntrinsicTypeDefinition for ArmIntrinsicType { /// Gets a string containing the typename for this type in C format. @@ -9,8 +11,14 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { if let Some(bit_len) = self.bit_len { match (self.simd_len, self.vec_len) { (None, None) => format!("{prefix}{bit_len}_t"), - (Some(simd), None) => format!("{prefix}{bit_len}x{simd}_t"), - (Some(simd), Some(vec)) => format!("{prefix}{bit_len}x{simd}x{vec}_t"), + (Some(SimdLen::Fixed(simd)), None) => format!("{prefix}{bit_len}x{simd}_t"), + (Some(SimdLen::Fixed(simd)), Some(vec)) => { + format!("{prefix}{bit_len}x{simd}x{vec}_t") + } + (Some(SimdLen::Scalable), None) => format!("sv{prefix}{bit_len}_t"), + (Some(SimdLen::Scalable), Some(vec)) => { + format!("sv{prefix}{bit_len}x{vec}_t") + } (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case } } else { @@ -25,8 +33,14 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { if let Some(bit_len) = self.bit_len { match (self.simd_len, self.vec_len) { (None, None) => format!("{rust_prefix}{bit_len}"), - (Some(simd), None) => format!("{c_prefix}{bit_len}x{simd}_t"), - (Some(simd), Some(vec)) => format!("{c_prefix}{bit_len}x{simd}x{vec}_t"), + (Some(SimdLen::Fixed(simd)), None) => format!("{c_prefix}{bit_len}x{simd}_t"), + (Some(SimdLen::Fixed(simd)), Some(vec)) => { + format!("{c_prefix}{bit_len}x{simd}x{vec}_t") + } + (Some(SimdLen::Scalable), None) => format!("sv{c_prefix}{bit_len}_t"), + (Some(SimdLen::Scalable), Some(vec)) => { + format!("sv{c_prefix}{bit_len}x{vec}_t") + } (None, Some(_)) => todo!("{self:#?}"), // Likely an invalid case } } else { @@ -39,16 +53,11 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { if let IntrinsicType { kind: k, bit_len: Some(bl), - simd_len, vec_len, .. - } = &self.data + } = **self { - let quad = if simd_len.unwrap_or(1) * bl > 64 { - "q" - } else { - "" - }; + let quad = if self.num_lanes() * bl > 64 { "q" } else { "" }; format!( "vld{len}{quad}_{type}{size}", @@ -69,79 +78,99 @@ impl IntrinsicTypeDefinition for ArmIntrinsicType { } } -impl ArmIntrinsicType { - pub fn from_c(s: &str, target: &str) -> Result { - const CONST_STR: &str = "const"; - if let Some(s) = s.strip_suffix('*') { - let (s, constant) = match s.trim().strip_suffix(CONST_STR) { - Some(stripped) => (stripped, true), - None => (s, false), - }; - let s = s.trim_end(); - let temp_return = ArmIntrinsicType::from_c(s, target); - temp_return.map(|mut op| { - op.ptr = true; - op.ptr_constant = constant; - op - }) +pub fn parse_intrinsic_type(s: &str) -> Result { + const CONST_STR: &str = "const"; + const ENUM_STR: &str = "enum "; + + // Recurse to handle pointers.. + if let Some(s) = s.strip_suffix('*') { + let s = s.trim(); + let (s, constant) = if s.ends_with(CONST_STR) || s.starts_with(CONST_STR) { + ( + s.trim_start_matches(CONST_STR).trim_end_matches(CONST_STR), + true, + ) } else { - // [const ]TYPE[{bitlen}[x{simdlen}[x{vec_len}]]][_t] - let (mut s, constant) = match s.strip_prefix(CONST_STR) { - Some(stripped) => (stripped.trim(), true), - None => (s, false), - }; - s = s.strip_suffix("_t").unwrap_or(s); - let mut parts = s.split('x'); // [[{bitlen}], [{simdlen}], [{vec_len}] ] - let start = parts.next().ok_or("Impossible to parse type")?; - if let Some(digit_start) = start.find(|c: char| c.is_ascii_digit()) { - let (arg_kind, bit_len) = start.split_at(digit_start); - let arg_kind = arg_kind.parse::()?; - let bit_len = bit_len.parse::().map_err(|err| err.to_string())?; - let simd_len = match parts.next() { - Some(part) => Some( - part.parse::() - .map_err(|_| "Couldn't parse simd_len: {part}")?, - ), - None => None, - }; - let vec_len = match parts.next() { - Some(part) => Some( - part.parse::() - .map_err(|_| "Couldn't parse vec_len: {part}")?, - ), - None => None, - }; - Ok(ArmIntrinsicType { - data: IntrinsicType { - ptr: false, - ptr_constant: false, - constant, - kind: arg_kind, - bit_len: Some(bit_len), - simd_len, - vec_len, - }, - target: target.to_string(), - }) - } else { - let kind = start.parse::()?; - let bit_len = match kind { - TypeKind::Int(_) => Some(32), - _ => None, - }; - Ok(ArmIntrinsicType { - data: IntrinsicType { - ptr: false, - ptr_constant: false, - constant, - kind: start.parse::()?, - bit_len, - simd_len: None, - vec_len: None, - }, - target: target.to_string(), - }) - } - } + (s, false) + }; + + let mut ty = parse_intrinsic_type(s.trim())?; + ty.ptr = true; + ty.ptr_constant = constant; + return Ok(ty); } + + // [const ][sv]TYPE[{element_bits}[x{num_lanes}[x{num_vecs}]]][_t] + // | [enum ]TYPE + let (mut s, constant) = match (s.strip_prefix(CONST_STR), s.strip_prefix(ENUM_STR)) { + (Some(const_strip), _) => (const_strip, true), + (_, Some(enum_strip)) => (enum_strip, true), + (None, None) => (s, false), + }; + s = s.trim(); + s = s.strip_suffix("_t").unwrap_or(s); + + // Consider the following types as examples: + // A) `svuint32x3_t` + // B) `float16x4x2_t` + // C) `svbool_t` + + let sve = s.starts_with("sv"); + + let mut parts = s.split('x'); + let start = parts.next().ok_or("failed to parse type")?; + + // Continuing the previous examples.. + // A) kind=TypeKind::Int(Sign::Unsigned), bit_len=Some(32) + // B) kind=TypeKind::Float, bit_len=Some(16) + // C) kind=TypeKind::Bool, bit_len=None + let (kind, bit_len) = if let Some(digit_start) = start.find(|c: char| c.is_ascii_digit()) { + let (element_kind, element_bits) = start.split_at(digit_start); + let element_kind = element_kind.parse::()?; + let element_bits = element_bits.parse::().map_err(|err| err.to_string())?; + (element_kind, Some(element_bits)) + } else { + let element_kind = start.parse::()?; + (element_kind, None) + }; + + let bit_len = match (bit_len, kind) { + (None, TypeKind::SvPattern | TypeKind::SvPrefetchOp | TypeKind::Int(_)) => Some(32), + (None, TypeKind::Bool) => Some(8), + _ => bit_len, + }; + + // Continuing the previous examples.. + // A) second_len=Some(3) + // B) second_len=Some(4) + // C) second_len=None + let second_len = parts.next().map(|part| { + part.parse::() + .expect("failed to parse second part of type") + }); + + // Continuing the previous examples.. + // A) third_len=None + // B) third_len=Some(2) + // C) third_len=None + let third_len = parts.next().map(|part| { + part.parse::() + .expect("failed to parse third part of type") + }); + + let (simd_len, vec_len) = if sve { + (Some(SimdLen::Scalable), second_len) + } else { + (second_len.map(SimdLen::Fixed), third_len) + }; + + Ok(IntrinsicType { + ptr: false, + ptr_constant: false, + constant, + kind, + bit_len, + simd_len, + vec_len, + }) } diff --git a/library/stdarch/crates/intrinsic-test/src/common/argument.rs b/library/stdarch/crates/intrinsic-test/src/common/argument.rs index 885d5e998ef54..25207a8c458fd 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/argument.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/argument.rs @@ -4,7 +4,6 @@ use crate::common::intrinsic_helpers::TypeKind; use super::constraint::Constraint; use super::gen_rust::PASSES; -use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; /// An argument for the intrinsic. @@ -53,7 +52,8 @@ where self.constraint.is_some() } - /// The name (e.g. "A_VALS" or "a_vals") for the array of possible test inputs. + /// Returns a string with the name of the static variable containing test values for intrinsic + /// arguments of this type. pub(crate) fn rust_vals_array_name(&self) -> impl std::fmt::Display { let loads = crate::common::gen_rust::PASSES; format!( @@ -63,12 +63,15 @@ where ) } + /// Should this argument be passed by reference in C wrapper function declarations? + /// + /// SIMD types and `f16` are currently passed by reference. pub(crate) fn pass_by_ref(&self) -> bool { - // pass SIMD types and `f16` by reference self.is_simd() || (self.ty.kind() == TypeKind::Float && self.ty.inner_size() == 16) } } +/// Arguments of an intrinsic - including parameters that end up being const generics. #[derive(Debug, PartialEq, Clone)] pub struct ArgumentList { pub args: Vec>, @@ -78,6 +81,11 @@ impl ArgumentList where T: IntrinsicTypeDefinition, { + /// Returns a string with the arguments in `self` as a parameter list for a wrapper fn + /// definition in C (e.g. `$ty1 $arg1, $ty2 $arg2`). + /// + /// Skips arguments with constraints - which correspond to arguments that must take immediates - + /// as a different C definition will be generated for each value of these being tested. pub fn as_non_imm_arglist_c(&self) -> String { self.iter() .filter(|arg| !arg.has_constraint()) @@ -91,6 +99,11 @@ where .to_string() } + /// Returns a string with the arguments in `self` as a parameter list for a Rust declaration of + /// a C wrapper fn (e.g. `$arg1: $ty1, $arg2: $ty2`). + /// + /// Skips arguments with constraints - which correspond to arguments that must take immediates - + /// as a different C definition will be generated for each value of these being tested. pub fn as_non_imm_arglist_rust(&self) -> String { self.iter() .filter(|arg| !arg.has_constraint()) @@ -108,6 +121,8 @@ where .to_string() } + /// Returns a string with the arguments in `self` being passed to an intrinsic call in C + /// (e.g. `$arg1, 2 /* imm_args[0] */, $arg3` where `$arg2` has a constraint). pub fn as_call_params_c(&self, imm_args: &[i64]) -> String { let mut imm_args = imm_args.iter(); self.iter() @@ -124,8 +139,9 @@ where .to_string() } - /// Converts the argument list into the call parameters for a Rust function. - /// e.g. this would generate something like `a, b, c` + /// Returns a string with the arguments in `self` being passed to an intrinsic call in Rust. + /// (e.g. `$arg1, $arg3` where `$arg2` has a constraint and so corresponds to a const generic + /// parameter). pub fn as_call_param_rust(&self) -> String { self.iter() .filter(|a| !a.has_constraint()) @@ -133,6 +149,9 @@ where .join(", ") } + /// Returns a string with the arguments in `self` being passed to the declaration of a C wrapper + /// fn from Rust (e.g. `$arg1, $arg3` (where `$arg2` has a constraint and so corresponds to a + /// const generic parameter). pub fn as_c_call_param_rust(&self) -> String { self.iter() .filter(|a| !a.has_constraint()) @@ -146,40 +165,73 @@ where .join("") } + /// Returns a string defining a static variable with test values used for all intrinsics with + /// arguments of `arg`'s type. + /// + /// e.g. + /// ```rust,ignore + /// static U8_20: [u8; 20] = [ + /// 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xf0, + /// 0x80, 0x3b, 0xff, + /// ]; + /// ``` + /// + /// `num_lanes * num_vectors + loads - 1` elements are present in the array, which is sufficient + /// for a `loads` number of `num_lanes * num_vectors` windows into the array to be loaded: + /// + /// ```text + /// [0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xf0, 0x80, 0x3b, 0xff] + /// ^^^^^^^^^^^^^^^^^^^ first window of `num_lanes * num_vectors` elements (e.g. four elements) + /// ^^^^^^^^^^^^^^^^^^ second window + /// `loads`th window ^^^^^^^^^^^^^^^^^^^^^^ + /// ``` pub fn gen_arg_rust( arg: &Argument, w: &mut impl std::io::Write, - indentation: Indentation, loads: u32, ) -> std::io::Result<()> { writeln!( w, - "{indentation}static {name}: [{ty}; {load_size}] = {values};\n", + "static {name}: [{ty}; {load_size}] = {values};\n", name = arg.rust_vals_array_name(), ty = arg.ty.rust_scalar_type(), load_size = arg.ty.num_lanes() * arg.ty.num_vectors() + loads - 1, - values = arg.ty.populate_random(indentation, loads) + values = arg.ty.populate_random(loads) ) } - /// Creates a line for each argument that initializes the argument from array `[ARG]_VALS` at - /// an offset `i` using a load intrinsic, in Rust. - /// e.g `let a = vld1_u8(A_VALS.as_ptr().offset(i));` - pub fn load_values_rust(&self, indentation: Indentation) -> String { + /// Returns a string defining a local variable for each argument and loading a value into each + /// using a load intrinsic. + /// + /// e.g. + /// ```rust,ignore + /// let a = vld1_u8(I16_23.as_ptr().offset((i + 0 /* idx */) % 20 /* PASSES */)); + /// ```` + /// + /// The generator will have already generated arrays of appropriate length with values that can + /// be used for testing (see the `gen_args_rust` function). + /// + /// Each load is assumed to have a variable `i` in scope which comes from a loop which repeats + /// the testing of the intrinsic for different values - each subsequent `i` shifts the window + /// of values being loaded along the pre-prepared array. + /// + /// Each subsequent argument's first window is started one element further into the array + /// then the previous. + pub fn load_values_rust(&self) -> String { self.iter() .filter(|&arg| !arg.has_constraint()) .enumerate() .map(|(idx, arg)| { if arg.is_simd() { format!( - "{indentation}let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", + "let {name} = {load}({vals_name}.as_ptr().add((i+{idx}) % {PASSES}) as _);\n", name = arg.generate_name(), vals_name = arg.rust_vals_array_name(), load = arg.ty.get_load_function(), ) } else { format!( - "{indentation}let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n", + "let {name} = {vals_name}[(i+{idx}) % {PASSES}];\n", name = arg.generate_name(), vals_name = arg.rust_vals_array_name(), ) @@ -188,6 +240,7 @@ where .collect() } + /// Returns an iterator over the contained arguments pub fn iter(&self) -> std::slice::Iter<'_, Argument> { self.args.iter() } diff --git a/library/stdarch/crates/intrinsic-test/src/common/cli.rs b/library/stdarch/crates/intrinsic-test/src/common/cli.rs index f407b5ceb7d48..3ea87df51fda2 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/cli.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/cli.rs @@ -13,14 +13,25 @@ pub struct Cli { /// Filename for a list of intrinsics to skip (one per line) #[arg(long)] - pub skip: Option, + pub skip: Vec, /// Pass a target the test suite #[arg(long)] pub target: String, + /// Percentage of intrinsics to test (used to limit testing to keep CI times manageable) #[arg(long, default_value_t = 100u8)] pub sample_percentage: u8, + + /// Argument style of the C compiler + #[arg(long)] + pub cc_arg_style: CcArgStyle, +} + +#[derive(Copy, Clone, clap::ValueEnum)] +pub enum CcArgStyle { + Gcc, + Clang, } pub struct ProcessedCli { @@ -28,6 +39,7 @@ pub struct ProcessedCli { pub target: String, pub skip: Vec, pub sample_percentage: u8, + pub cc_arg_style: CcArgStyle, } impl ProcessedCli { @@ -36,22 +48,25 @@ impl ProcessedCli { let target = cli_options.target; let sample_percentage = cli_options.sample_percentage; - let skip = if let Some(filename) = cli_options.skip { - let data = std::fs::read_to_string(&filename).expect("Failed to open file"); - data.lines() - .map(str::trim) - .filter(|s| !s.contains('#')) - .map(String::from) - .collect_vec() - } else { - Default::default() - }; + let skip = cli_options + .skip + .iter() + .flat_map(|filename| { + std::fs::read_to_string(&filename) + .expect("Failed to open file") + .lines() + .map(|line| line.trim().to_owned()) + .filter(|line| !line.contains('#')) + .collect_vec() + }) + .collect_vec(); Self { target, skip, filename, sample_percentage, + cc_arg_style: cli_options.cc_arg_style, } } } diff --git a/library/stdarch/crates/intrinsic-test/src/common/constraint.rs b/library/stdarch/crates/intrinsic-test/src/common/constraint.rs index 5984e0fcc22f9..ab52d866ab20a 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/constraint.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/constraint.rs @@ -1,24 +1,43 @@ use serde::Deserialize; use std::ops::Range; -/// Describes the values to test for a const generic parameter. +/// Describes the values to test for a const generic parameter #[derive(Debug, PartialEq, Clone, Deserialize)] pub enum Constraint { - /// Test a single value. + /// Test a single value Equal(i64), /// Test a range of values, e.g. `0..16`. Range(Range), /// Test discrete values, e.g. `vec![1, 2, 4, 8]`. Set(Vec), + /// Values of `core::arch::aarch64::svpattern` + SvPattern, + /// Values of `core::arch::aarch64::svprfop` + SvPrefetchOp, + // Values of the `imm_rotation` argument in SVE intrinsics where arguments contain complex + // pairs and `imm_rotation` corresponds to the rotation. + SvImmRotation, + // Values of the `imm_rotation` argument in SVE intrinsics where arguments contain complex + // pairs and `imm_rotation` corresponds to the rotation (this variant is specifically for + // `svcadd` and `svqcadd` where only 90 and 270 are valid arguments). + SvImmRotationAdd, } impl Constraint { - /// Iterate over the values of this constraint. - pub fn iter<'a>(&'a self) -> impl Iterator + 'a { + /// Returns an iterator over the values of this constraint + pub fn iter(&self) -> Box + '_> { match self { - Constraint::Equal(i) => std::slice::Iter::default().copied().chain(*i..*i + 1), - Constraint::Range(range) => std::slice::Iter::default().copied().chain(range.clone()), - Constraint::Set(items) => items.iter().copied().chain(std::ops::Range::default()), + Constraint::Equal(i) => Box::new(std::iter::once(*i)), + Constraint::Range(range) => Box::new(range.clone()), + Constraint::Set(items) => Box::new(items.iter().copied().chain(Range::default())), + // These values are discriminants of the `svpattern` enum + Constraint::SvPattern => Box::new((0..=13).chain(29..=31)), + // These values are discriminants of the `svprfop` enum + Constraint::SvPrefetchOp => Box::new((0..=5).chain(8..=14)), + // Valid rotations for intrinsics operating on complex pairs: 0, 90, 180, 270 + Constraint::SvImmRotation => Box::new((0..=270).step_by(90)), + // Valid rotations for `svcadd` and `svqcadd`: 0, 270 + Constraint::SvImmRotationAdd => Box::new((90..=270).step_by(180)), } } } diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs index bdf6f68d58cc2..24756324c48e4 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/gen_c.rs @@ -4,6 +4,16 @@ use crate::common::intrinsic::Intrinsic; use super::intrinsic_helpers::IntrinsicTypeDefinition; +/// Generates a C source file containing wrapper functions around each specialisation of each +/// intrinsic (that is, intrinsics with specific values for the the immediate arguments). Each +/// wrapper function is invoked via FFI from the Rust binary doing the testing. +/// +/// e.g. +/// ```c +/// void __crc32cd_wrapper(uint32_t* __dst, uint32_t a, uint64_t b) { +/// *__dst = __crc32cd(a, b); +/// } +/// ``` pub fn write_wrapper_c( w: &mut impl std::io::Write, notice: &str, diff --git a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs index d23710451d478..cb07fa600474c 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/gen_rust.rs @@ -1,15 +1,20 @@ +use std::process::Command; + use itertools::Itertools; -use super::indentation::Indentation; use super::intrinsic_helpers::IntrinsicTypeDefinition; use crate::common::argument::ArgumentList; +use crate::common::cli::{CcArgStyle, ProcessedCli}; use crate::common::intrinsic::Intrinsic; use crate::common::intrinsic_helpers::TypeKind; -// The number of times each intrinsic will be called. +// The number of times each intrinsic will be called - influences the generation of the +// test arrays to minimise repeated testing of the same test values. pub(crate) const PASSES: u32 = 20; -// we need a reflexive equality relation, so treat NaNs as equal +/// Rust definitions that are included verbatim in the generated source. In particular, defines +/// a wrapper around float types that defines `NaN`s to be equal reflexively to enable +/// comparison of results that use floats types. const COMMON_RUST_DEFINITIONS: &str = r#" macro_rules! wrap_partialeq { ($($wrapper:ident ($inner:ty)),*) => {$( @@ -36,6 +41,29 @@ macro_rules! concatln { }; } +/// Run rustfmt on the generated source code +pub fn run_rustfmt(source_path: &str) { + let output = Command::new("rustfmt") + .args([source_path]) + .output() + .expect("failed to run rustfmt on generated sources"); + + if !output.status.success() { + panic!( + "failed to run rustfmt on generated sources:\nstdout:{stdout}\nstderr:{stderr}", + stdout = String::from_utf8_lossy(&output.stdout), + stderr = String::from_utf8_lossy(&output.stderr) + ); + } +} + +/// Writes a `Cargo.toml` containing a workspace with `module_count` members to `w`. +/// +/// e.g. +/// ```toml +/// [workspace] +/// members = [ "mod_0", "mod_1" ] +/// ``` pub fn write_bin_cargo_toml( w: &mut impl std::io::Write, module_count: usize, @@ -47,6 +75,8 @@ pub fn write_bin_cargo_toml( writeln!(w, "]") } +/// Writes a `Cargo.toml` for a crate with name `name` to `w` that will contain a single Rust source +/// file with a subset of the testing being generated. pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io::Result<()> { write!( w, @@ -73,6 +103,8 @@ pub fn write_lib_cargo_toml(w: &mut impl std::io::Write, name: &str) -> std::io: ) } +/// Writes a Rust source file into `w` with common definitions, static arrays with test values, +/// declarations of C wrapper functions for FFI and Rust test functions. pub fn write_lib_rs( w: &mut impl std::io::Write, notice: &str, @@ -106,7 +138,7 @@ pub fn write_lib_rs( let name = arg.rust_vals_array_name().to_string(); if seen.insert(name) { - ArgumentList::gen_arg_rust(arg, w, Indentation::default(), PASSES)?; + ArgumentList::gen_arg_rust(arg, w, PASSES)?; } } } @@ -121,6 +153,13 @@ pub fn write_lib_rs( Ok(()) } +/// Writes the body of an intrinsic test to `w` for `intrinsic`. +/// +/// Each specialisation of the intrinsic (i.e. specific instantiations of the immediate arguments +/// of the intrinsic) is added to an array of specialisations. Each specialisation is tested +/// (first loop) `PASSES` number of times (second loop). For a given iteration of a given +/// specialisation, test values are loaded for each argument and passed to the Rust intrinsic +/// and the C wrapper function, and the results are compared. fn generate_rust_test_loop( w: &mut impl std::io::Write, intrinsic: &Intrinsic, @@ -205,9 +244,7 @@ fn generate_rust_test_loop( " }}", " }}", ), - loaded_args = intrinsic - .arguments - .load_values_rust(Indentation::default().nest_by(4)), + loaded_args = intrinsic.arguments.load_values_rust(), rust_args = intrinsic.arguments.as_call_param_rust(), c_args = intrinsic.arguments.as_c_call_param_rust(), passes = passes, @@ -216,6 +253,8 @@ fn generate_rust_test_loop( ) } +/// Writes a test function for an given intrinsic to `w`, with a body generated by +/// `generate_rust_test_loop`. fn create_rust_test( w: &mut impl std::io::Write, intrinsic: &Intrinsic, @@ -235,6 +274,8 @@ fn create_rust_test( Ok(()) } +/// Writes an `extern "C"` block with function declarations for each of the C wrapper functions into +/// `w`. pub fn write_bindings_rust( w: &mut impl std::io::Write, i: usize, @@ -268,12 +309,24 @@ pub fn write_bindings_rust( writeln!(w, "}}") } +/// Writes a `build.rs` into `w` for each test crate that compiles the corresponding C source code +/// with wrapper functions. pub fn write_build_rs( w: &mut impl std::io::Write, i: usize, arch_flags: &[&str], + cli_options: &ProcessedCli, ) -> std::io::Result<()> { - const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-ffp-model=strict", "-Wno-narrowing"]; + const COMMON_FLAGS: &[&str] = &["-ffp-contract=off", "-Wno-narrowing"]; + const CLANG_FLAGS: &[&str] = &["-ffp-model=strict"]; + const GCC_FLAGS: &[&str] = &[ + "-flax-vector-conversions", + "-fno-fast-math", + "-frounding-math", + "-fexcess-precision=standard", + "-ftrapping-math", + "-fsignaling-nans", + ]; write!( w, @@ -287,9 +340,17 @@ pub fn write_build_rs( i = i )?; - let indentation = Indentation::default().nest_by(2); - for flag in COMMON_FLAGS.iter().chain(arch_flags) { - writeln!(w, "{indentation}\"{flag}\",")?; + let compiler_specific_flags = match cli_options.cc_arg_style { + CcArgStyle::Gcc => GCC_FLAGS, + CcArgStyle::Clang => CLANG_FLAGS, + }; + + for flag in COMMON_FLAGS + .iter() + .chain(compiler_specific_flags) + .chain(arch_flags) + { + writeln!(w, "\"{flag}\",")?; } write!( diff --git a/library/stdarch/crates/intrinsic-test/src/common/indentation.rs b/library/stdarch/crates/intrinsic-test/src/common/indentation.rs deleted file mode 100644 index 9c2cc886e6544..0000000000000 --- a/library/stdarch/crates/intrinsic-test/src/common/indentation.rs +++ /dev/null @@ -1,26 +0,0 @@ -//! Basic code formatting tools. -//! -//! We don't need perfect formatting for the generated tests, but simple indentation can make -//! debugging a lot easier. - -#[derive(Copy, Clone, Debug, Default)] -pub struct Indentation(u32); - -impl Indentation { - pub fn nested(self) -> Self { - Self(self.0 + 1) - } - - pub fn nest_by(&self, additional_levels: u32) -> Self { - Self(self.0 + additional_levels) - } -} - -impl std::fmt::Display for Indentation { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - for _ in 0..self.0 { - write!(f, " ")?; - } - Ok(()) - } -} diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs index 76e5959153d07..d69644388a830 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic.rs @@ -19,6 +19,10 @@ pub struct Intrinsic { pub arch_tags: Vec, } +/// Invokes `f` for each combination of the values in the constraint ranges. +/// +/// For example, given `constraints=[Equal(0), Range(1..2), Set([3, 4])]` and `imm_values=[]`, this +/// produces the four calls to `f`: `f([0, 1, 3])`, `f([0, 1, 4])`, `f([0, 2, 3])`, `f([0, 2, 4])`. fn recurse_specializations<'a, E>( constraints: &mut (impl Iterator + Clone), imm_values: &mut Vec, @@ -37,6 +41,13 @@ fn recurse_specializations<'a, E>( } impl Intrinsic { + /// Invokes `f` for "specialisation" of the intrinsic - a specific instantiation of the + /// constant generics of the intrinsic. `f` takes a slice where the `i`th element corresponds + /// to the value of the `i`th const generic argument of the intrinsic. + /// + /// For an intrinsic with three arguments with constraints `Equal(0)`, `Range(1..2)`, + /// `Set([3, 4])` respectively, this would produce four calls to `f`: `f(0, 1, 3)`, + /// `f(0, 1, 4)`, `f(0, 2, 3)`, `f(0, 2, 4)`. pub fn iter_specializations( &self, mut f: impl FnMut(&[i64]) -> Result<(), E>, diff --git a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs index ab4a565200bc8..a894d5c0164e1 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/intrinsic_helpers.rs @@ -5,7 +5,6 @@ use std::str::FromStr; use itertools::Itertools as _; -use super::indentation::Indentation; use super::values::value_for_array; #[derive(Debug, PartialEq, Copy, Clone)] @@ -16,6 +15,7 @@ pub enum Sign { #[derive(Debug, PartialEq, Copy, Clone)] pub enum TypeKind { + Bool, BFloat, Float, Int(Sign), @@ -24,6 +24,8 @@ pub enum TypeKind { Void, Mask, Vector, + SvPattern, + SvPrefetchOp, } impl FromStr for TypeKind { @@ -31,17 +33,22 @@ impl FromStr for TypeKind { fn from_str(s: &str) -> Result { match s { - "bfloat" | "BF16" => Ok(Self::BFloat), - "float" | "double" | "FP16" | "FP32" | "FP64" => Ok(Self::Float), - "int" | "long" | "short" | "SI8" | "SI16" | "SI32" | "SI64" => { + "svbool" | "bool" => Ok(Self::Bool), + "svbfloat" | "bfloat" | "BF16" => Ok(Self::BFloat), + "svfloat" | "float" | "double" | "FP16" | "FP32" | "FP64" => Ok(Self::Float), + "svint" | "int" | "long" | "short" | "SI8" | "SI16" | "SI32" | "SI64" => { Ok(Self::Int(Sign::Signed)) } "poly" => Ok(Self::Poly), "char" => Ok(Self::Char(Sign::Signed)), - "uint" | "unsigned" | "UI8" | "UI16" | "UI32" | "UI64" => Ok(Self::Int(Sign::Unsigned)), + "svuint" | "uint" | "unsigned" | "UI8" | "UI16" | "UI32" | "UI64" => { + Ok(Self::Int(Sign::Unsigned)) + } "void" => Ok(Self::Void), "MASK" => Ok(Self::Mask), "M128" | "M256" | "M512" => Ok(Self::Vector), + "svpattern" => Ok(Self::SvPattern), + "svprfop" => Ok(Self::SvPrefetchOp), _ => Err(format!("Impossible to parse argument kind {s}")), } } @@ -53,6 +60,7 @@ impl fmt::Display for TypeKind { f, "{}", match self { + Self::Bool => "bool", Self::BFloat => "bfloat", Self::Float => "float", Self::Int(Sign::Signed) => "int", @@ -63,15 +71,18 @@ impl fmt::Display for TypeKind { Self::Char(Sign::Unsigned) => "unsigned char", Self::Mask => "mask", Self::Vector => "vector", + Self::SvPattern => "svpattern", + Self::SvPrefetchOp => "svprfop", } ) } } impl TypeKind { - /// Gets the type part of a c typedef for a type that's in the form of {type}{size}_t. + /// Returns the type component of a C typedef for a type of the form of `{type}{size}_t` pub fn c_prefix(&self) -> &str { match self { + Self::Bool => "bool", Self::Float => "float", Self::Int(Sign::Signed) => "int", Self::Int(Sign::Unsigned) => "uint", @@ -83,7 +94,7 @@ impl TypeKind { } } - /// Gets the rust prefix for the type kind i.e. i, u, f. + /// Returns the Rust prefix for this type kind i.e. `i`, `u`, or `f`. pub fn rust_prefix(&self) -> &str { match self { Self::BFloat => "bf", @@ -99,37 +110,60 @@ impl TypeKind { } } +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum SimdLen { + Scalable, + Fixed(u32), +} + +impl std::fmt::Display for SimdLen { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Scalable => unimplemented!(), + Self::Fixed(len) => ::fmt(len, f), + } + } +} + #[derive(Debug, PartialEq, Clone)] pub struct IntrinsicType { + /// Is this an immediate? pub constant: bool, - /// whether this object is a const pointer + /// Is this is a const pointer to the type? pub ptr_constant: bool, + /// Is this is a pointer to the type? pub ptr: bool, + /// Element type (e.g. `TypeKind::Int(Sign::Unsigned)` for `uint64x2_t`). pub kind: TypeKind, - /// The bit length of this type (e.g. 32 for u32). + + /// Number of bits of this type (e.g. 32 for `u32`). pub bit_len: Option, - /// Length of the SIMD vector (i.e. 4 for uint32x4_t), A value of `None` - /// means this is not a simd type. A `None` can be assumed to be 1, - /// although in some places a distinction is needed between `u64` and - /// `uint64x1_t` this signals that. - pub simd_len: Option, + /// Length of a SIMD vector (i.e. `Fixed(4)` for `uint32x4_t`). + /// + /// A value of `None` means this is not a SIMD type. The number of lanes of a type with + /// `simd_len=None` can be assumed to be one, though it is important to maintain a distinction + /// between `simd_len=None` and `simd_len=Some(Fixed(1))` so as to differentiate between `u64` + /// and `uint64x1_t`. A value of `Some(Scalable)` indicates that this is a scalable vector. + pub simd_len: Option, - /// The number of rows for SIMD matrices (i.e. 2 for uint8x8x2_t). - /// A value of `None` represents a type that does not contain any - /// rows encoded in the type (e.g. uint8x8_t). - /// A value of `None` can be assumed to be 1 though. + /// Number of rows of a SIMD matrix (i.e. 2 for `uint8x8x2_t`). + /// + /// A value of `None` means this is not a SIMD matrix (e.g. `uint8x8_t`). The number of rows of + /// a type with `vec_len=None` can be assumed to be one. pub vec_len: Option, } impl IntrinsicType { + /// Returns the element type pub fn kind(&self) -> TypeKind { self.kind } + /// Returns the number of bits of the type (with a minimum of `8`) pub fn inner_size(&self) -> u32 { if let Some(bl) = self.bit_len { cmp::max(bl, 8) @@ -138,37 +172,54 @@ impl IntrinsicType { } } + /// Returns the number of lanes of the type pub fn num_lanes(&self) -> u32 { - self.simd_len.unwrap_or(1) + self.simd_len + .as_ref() + .map(|len| match len { + SimdLen::Scalable => unimplemented!(), + SimdLen::Fixed(len) => *len, + }) + .unwrap_or(1) } + /// Returns the number of vectors of the type pub fn num_vectors(&self) -> u32 { self.vec_len.unwrap_or(1) } + /// Returns `true` if this represents a SIMD vector pub fn is_simd(&self) -> bool { self.simd_len.is_some() || self.vec_len.is_some() } + /// Returns `true` if this is a pointer pub fn is_ptr(&self) -> bool { self.ptr } - pub fn populate_random(&self, indentation: Indentation, loads: u32) -> String { + /// Returns the elements used in the test value arrays in `gen_arg_rust`. Uses the same + /// `num_lanes * num_vectors + loads - 1` arithmetic to produce the number of values that + /// `ArgumentList::gen_arg_rust` expects and `ArgumentList::load_values_rust` needs. + /// + /// Each value in the array starts as a bit pattern from `common::values::value_from_array` + /// which is then printed as a hex value in the generated code (and if identified as a negative + /// value, with the appropriate minus and corrected hex pattern). Calls to `fN::from_bits` are + /// generated for floats. + pub fn populate_random(&self, loads: u32) -> String { match self { IntrinsicType { bit_len: Some(bit_len @ (1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 16 | 32 | 64)), kind: kind @ (TypeKind::Int(_) | TypeKind::Poly | TypeKind::Char(_) | TypeKind::Mask), - simd_len, vec_len, .. } => { - let body_indentation = indentation.nested(); format!( - "[\n{body}\n{indentation}]", - body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) - .format_with(",\n", |i, fmt| { + "[\n{body}\n]", + body = (0..(self.num_lanes() * vec_len.unwrap_or(1) + loads - 1)).format_with( + ",\n", + |i, fmt| { let src = value_for_array(*bit_len, i); assert!(src == 0 || src.ilog2() < *bit_len); if *kind == TypeKind::Int(Sign::Signed) && (src >> (*bit_len - 1)) != 0 @@ -177,43 +228,43 @@ impl IntrinsicType { let mask = !0u64 >> (64 - *bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) + fmt(&format_args!("-{twos_compl:#x}")) } else { - fmt(&format_args!("{body_indentation}{src:#x}")) + fmt(&format_args!("{src:#x}")) } - }) + } + ) ) } IntrinsicType { kind: TypeKind::Float, bit_len: Some(bit_len @ (16 | 32 | 64)), - simd_len, vec_len, .. } => { format!( - "[\n{body}\n{indentation}]", - body = (0..(simd_len.unwrap_or(1) * vec_len.unwrap_or(1) + loads - 1)) - .format_with(",\n", |i, fmt| fmt(&format_args!( - "{indentation}f{bit_len}::from_bits({src:#x})", - indentation = indentation.nested(), + "[\n{body}\n]", + body = (0..(self.num_lanes() * vec_len.unwrap_or(1) + loads - 1)).format_with( + ",\n", + |i, fmt| fmt(&format_args!( + "f{bit_len}::from_bits({src:#x})", src = value_for_array(*bit_len, i) - ))) + )) + ) ) } IntrinsicType { kind: TypeKind::Vector, bit_len: Some(128 | 256 | 512), - simd_len, vec_len, .. } => { - let body_indentation = indentation.nested(); let effective_bit_len = 32; format!( - "[\n{body}\n{indentation}]", - body = (0..(vec_len.unwrap_or(1) * simd_len.unwrap_or(1) + loads - 1)) - .format_with(",\n", |i, fmt| { + "[\n{body}\n]", + body = (0..(vec_len.unwrap_or(1) * self.num_lanes() + loads - 1)).format_with( + ",\n", + |i, fmt| { let src = value_for_array(effective_bit_len, i); assert!(src == 0 || src.ilog2() < effective_bit_len); if (src >> (effective_bit_len - 1)) != 0 { @@ -221,11 +272,12 @@ impl IntrinsicType { let mask = !0u64 >> (64 - effective_bit_len); let ones_compl = src ^ mask; let twos_compl = ones_compl + 1; - fmt(&format_args!("{body_indentation}-{twos_compl:#x}")) + fmt(&format_args!("-{twos_compl:#x}")) } else { - fmt(&format_args!("{body_indentation}{src:#x}")) + fmt(&format_args!("{src:#x}")) } - }) + } + ) ) } _ => unimplemented!("populate random: {self:#?}"), @@ -235,18 +287,16 @@ impl IntrinsicType { pub trait IntrinsicTypeDefinition: Deref { /// Determines the load function for this type. - /// can be implemented in an `impl` block fn get_load_function(&self) -> String; - /// Gets a string containing the typename for this type in C format. - /// can be directly defined in `impl` blocks + /// Gets a string containing the typename for this type in C. fn c_type(&self) -> String; - /// Gets a string containing the typename for this type in Rust format. - /// can be directly defined in `impl` blocks + /// Gets a string containing the typename for this type in Rust. fn rust_type(&self) -> String; - /// To enable architecture-specific logic + /// Gets a string containing the name of the scalar type corresponding to this type if it is a + /// vector. fn rust_scalar_type(&self) -> String { if self.is_simd() { format!( diff --git a/library/stdarch/crates/intrinsic-test/src/common/mod.rs b/library/stdarch/crates/intrinsic-test/src/common/mod.rs index 86849f7db34e0..3775e453c24b0 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/mod.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/mod.rs @@ -6,7 +6,9 @@ use cli::ProcessedCli; use crate::common::{ gen_c::write_wrapper_c, - gen_rust::{write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs}, + gen_rust::{ + run_rustfmt, write_bin_cargo_toml, write_build_rs, write_lib_cargo_toml, write_lib_rs, + }, intrinsic::Intrinsic, intrinsic_helpers::IntrinsicTypeDefinition, }; @@ -19,7 +21,6 @@ pub mod intrinsic_helpers; mod gen_c; mod gen_rust; -mod indentation; mod values; /// Architectures must support this trait @@ -29,7 +30,7 @@ pub trait SupportedArchitectureTest { fn intrinsics(&self) -> &[Intrinsic]; - fn create(cli_options: ProcessedCli) -> Self; + fn create(cli_options: &ProcessedCli) -> Self; const NOTICE: &str; @@ -38,14 +39,14 @@ pub trait SupportedArchitectureTest { const PLATFORM_RUST_CFGS: &str; const PLATFORM_RUST_DEFINITIONS: &str; - fn arch_flags(&self) -> Vec<&str>; + fn arch_flags(&self, cli_options: &ProcessedCli) -> Vec<&str>; fn generate_c_file(&self) { - let (chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); + let (max_chunk_size, _chunk_count) = manual_chunk(self.intrinsics().len()); std::fs::create_dir_all("c_programs").unwrap(); self.intrinsics() - .par_chunks(chunk_size) + .par_chunks(max_chunk_size) .enumerate() .map(|(i, chunk)| { let c_filename = format!("c_programs/wrapper_{i}.c"); @@ -56,25 +57,25 @@ pub trait SupportedArchitectureTest { .unwrap(); } - fn generate_rust_file(&self) { - let arch_flags = self.arch_flags(); + fn generate_rust_file(&self, cli_options: &ProcessedCli) { + let arch_flags = self.arch_flags(cli_options); std::fs::create_dir_all("rust_programs").unwrap(); - let (chunk_size, chunk_count) = manual_chunk(self.intrinsics().len()); + let (max_chunk_size, chunk_count) = manual_chunk(self.intrinsics().len()); let mut cargo = File::create("rust_programs/Cargo.toml").unwrap(); write_bin_cargo_toml(&mut cargo, chunk_count).unwrap(); self.intrinsics() - .chunks(chunk_size) + .chunks(max_chunk_size) .enumerate() .map(|(i, chunk)| { std::fs::create_dir_all(format!("rust_programs/mod_{i}/src"))?; let rust_filename = format!("rust_programs/mod_{i}/src/lib.rs"); trace!("generating `{rust_filename}`"); - let mut file = File::create(rust_filename)?; + let mut file = File::create(&rust_filename)?; write_lib_rs( &mut file, @@ -84,6 +85,7 @@ pub trait SupportedArchitectureTest { i, chunk, )?; + run_rustfmt(&rust_filename); let toml_filename = format!("rust_programs/mod_{i}/Cargo.toml"); trace!("generating `{toml_filename}`"); @@ -93,9 +95,10 @@ pub trait SupportedArchitectureTest { let build_rs_filename = format!("rust_programs/mod_{i}/build.rs"); trace!("generating `{build_rs_filename}`"); - let mut file = File::create(build_rs_filename).unwrap(); + let mut file = File::create(&build_rs_filename).unwrap(); - write_build_rs(&mut file, i, &arch_flags).unwrap(); + write_build_rs(&mut file, i, &arch_flags, &cli_options).unwrap(); + run_rustfmt(&build_rs_filename); Ok(()) }) @@ -106,5 +109,7 @@ pub trait SupportedArchitectureTest { pub fn manual_chunk(intrinsic_count: usize) -> (usize, usize) { let ncores = std::thread::available_parallelism().unwrap().into(); - (intrinsic_count.div_ceil(ncores), ncores) + let max_intrinsics_per_chunk = intrinsic_count.div_ceil(ncores); + let number_of_chunks = intrinsic_count.div_ceil(max_intrinsics_per_chunk); + (max_intrinsics_per_chunk, number_of_chunks) } diff --git a/library/stdarch/crates/intrinsic-test/src/common/values.rs b/library/stdarch/crates/intrinsic-test/src/common/values.rs index 6c94ef2c22e1d..01dc0713f0f00 100644 --- a/library/stdarch/crates/intrinsic-test/src/common/values.rs +++ b/library/stdarch/crates/intrinsic-test/src/common/values.rs @@ -1,6 +1,12 @@ -/// Get a single value for an argument values array in a determistic way. -/// * `bits`: The number of bits for the type, only 8, 16, 32, 64 are valid values -/// * `index`: The position in the array we are generating for +/// Returns a bit pattern for a value being output into a array of test values. Bit patterns come +/// from one of many constant arrays of test values. The specific constant array used depends on +/// the number of bits - `bits` - of the type having test values generated for it. This function +/// is called repeatedly with incrementing values of `index` to produce an entire array of test +/// values. +/// +/// Each constant array of bit patterns should ideally be at least the length of the largest array +/// of test values that will be requested (e.g. 51 for a `poly8x8x4` when `PASSES=20`: +/// `(8 * 4) + 20 - 1`), otherwise values will be repeated. pub fn value_for_array(bits: u32, index: u32) -> u64 { let index = index as usize; match bits { diff --git a/library/stdarch/crates/intrinsic-test/src/main.rs b/library/stdarch/crates/intrinsic-test/src/main.rs index 9f57c99f12cf5..4c0136041fc35 100644 --- a/library/stdarch/crates/intrinsic-test/src/main.rs +++ b/library/stdarch/crates/intrinsic-test/src/main.rs @@ -18,18 +18,24 @@ fn main() { if processed_cli_options.target.starts_with("arm") | processed_cli_options.target.starts_with("aarch64") { - run(ArmArchitectureTest::create(processed_cli_options)) + run( + ArmArchitectureTest::create(&processed_cli_options), + processed_cli_options, + ) } else if processed_cli_options.target.starts_with("x86") { - run(X86ArchitectureTest::create(processed_cli_options)) + run( + X86ArchitectureTest::create(&processed_cli_options), + processed_cli_options, + ) } else { unimplemented!("Unsupported target {}", processed_cli_options.target) } } -fn run(test_environment: impl SupportedArchitectureTest) { +fn run(test_environment: impl SupportedArchitectureTest, processed_cli_options: ProcessedCli) { info!("building C binaries"); test_environment.generate_c_file(); info!("building Rust binaries"); - test_environment.generate_rust_file(); + test_environment.generate_rust_file(&processed_cli_options); } diff --git a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs index 5d4798482a1d3..288bd8bdf8961 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/mod.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/mod.rs @@ -29,8 +29,11 @@ impl SupportedArchitectureTest for X86ArchitectureTest { const PLATFORM_RUST_DEFINITIONS: &str = config::PLATFORM_RUST_DEFINITIONS; const PLATFORM_RUST_CFGS: &str = config::PLATFORM_RUST_CFGS; - fn arch_flags(&self) -> Vec<&str> { + fn arch_flags(&self, _cli_options: &ProcessedCli) -> Vec<&str> { vec![ + "-maes", + "-mf16c", + "-mfma", "-mavx", "-mavx2", "-mavx512f", @@ -66,7 +69,7 @@ impl SupportedArchitectureTest for X86ArchitectureTest { ] } - fn create(cli_options: ProcessedCli) -> Self { + fn create(cli_options: &ProcessedCli) -> Self { let mut intrinsics = get_xml_intrinsics(&cli_options.filename).expect("Error parsing input file"); diff --git a/library/stdarch/crates/intrinsic-test/src/x86/types.rs b/library/stdarch/crates/intrinsic-test/src/x86/types.rs index c6ea15e150752..a0e14c77d6b5e 100644 --- a/library/stdarch/crates/intrinsic-test/src/x86/types.rs +++ b/library/stdarch/crates/intrinsic-test/src/x86/types.rs @@ -3,7 +3,9 @@ use std::str::FromStr; use itertools::Itertools; use super::intrinsic::X86IntrinsicType; -use crate::common::intrinsic_helpers::{IntrinsicType, IntrinsicTypeDefinition, Sign, TypeKind}; +use crate::common::intrinsic_helpers::{ + IntrinsicType, IntrinsicTypeDefinition, Sign, SimdLen, TypeKind, +}; use crate::x86::xml_parser::Parameter; impl IntrinsicTypeDefinition for X86IntrinsicType { @@ -187,7 +189,7 @@ impl X86IntrinsicType { Ok(num_bits) => self .data .bit_len - .and_then(|bit_len| Some(num_bits / bit_len)), + .and_then(|bit_len| Some(SimdLen::Fixed(num_bits / bit_len))), Err(_) => None, }; } @@ -297,7 +299,7 @@ impl X86IntrinsicType { // - _mm512_conj_pch if param.type_data == "__m512h" && param.etype == "FP32" { data.bit_len = Some(16); - data.simd_len = Some(32); + data.simd_len = Some(SimdLen::Fixed(32)); } let mut result = X86IntrinsicType { diff --git a/library/stdarch/crates/stdarch-gen-hexagon-scalar/src/main.rs b/library/stdarch/crates/stdarch-gen-hexagon-scalar/src/main.rs index bbe28174ffa05..3e3f00bb7fb02 100644 --- a/library/stdarch/crates/stdarch-gen-hexagon-scalar/src/main.rs +++ b/library/stdarch/crates/stdarch-gen-hexagon-scalar/src/main.rs @@ -513,9 +513,11 @@ fn generate_functions(intrinsics: &[ScalarIntrinsic]) -> String { } // Attributes - output.push_str("#[inline(always)]\n"); if let Some(tf_attr) = info.arch_guard.target_feature_attr() { + output.push_str("#[inline]\n"); // https://github.com/rust-lang/rust/issues/145574 output.push_str(&format!("{}\n", tf_attr)); + } else { + output.push_str("#[inline(always)]\n"); } // Immediate parameters become const generics but are passed as positional diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec index 867e071b62edc..5f85b1909db1f 100644 --- a/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec +++ b/library/stdarch/crates/stdarch-gen-loongarch/lasx.spec @@ -1468,41 +1468,49 @@ asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvilvh_b +impl = portable name = lasx_xvilvh_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvilvh_h +impl = portable name = lasx_xvilvh_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvilvh_w +impl = portable name = lasx_xvilvh_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvilvh_d +impl = portable name = lasx_xvilvh_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI /// lasx_xvilvl_b +impl = portable name = lasx_xvilvl_b asm-fmts = xd, xj, xk data-types = V32QI, V32QI, V32QI /// lasx_xvilvl_h +impl = portable name = lasx_xvilvl_h asm-fmts = xd, xj, xk data-types = V16HI, V16HI, V16HI /// lasx_xvilvl_w +impl = portable name = lasx_xvilvl_w asm-fmts = xd, xj, xk data-types = V8SI, V8SI, V8SI /// lasx_xvilvl_d +impl = portable name = lasx_xvilvl_d asm-fmts = xd, xj, xk data-types = V4DI, V4DI, V4DI diff --git a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec index b9df7bd96b9cb..6c554ac28d3a1 100644 --- a/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec +++ b/library/stdarch/crates/stdarch-gen-loongarch/lsx.spec @@ -1488,41 +1488,49 @@ asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vilvh_b +impl = portable name = lsx_vilvh_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vilvh_h +impl = portable name = lsx_vilvh_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vilvh_w +impl = portable name = lsx_vilvh_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vilvh_d +impl = portable name = lsx_vilvh_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI /// lsx_vilvl_b +impl = portable name = lsx_vilvl_b asm-fmts = vd, vj, vk data-types = V16QI, V16QI, V16QI /// lsx_vilvl_h +impl = portable name = lsx_vilvl_h asm-fmts = vd, vj, vk data-types = V8HI, V8HI, V8HI /// lsx_vilvl_w +impl = portable name = lsx_vilvl_w asm-fmts = vd, vj, vk data-types = V4SI, V4SI, V4SI /// lsx_vilvl_d +impl = portable name = lsx_vilvl_d asm-fmts = vd, vj, vk data-types = V2DI, V2DI, V2DI diff --git a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt index e07ac41f9c44e..d17d0833df77b 100644 --- a/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt +++ b/library/stdarch/crates/stdarch-gen-loongarch/src/portable-intrinsics.txt @@ -239,6 +239,14 @@ lsx_vpickod_b lsx_vpickod_h lsx_vpickod_w lsx_vpickod_d +lsx_vilvh_b +lsx_vilvh_h +lsx_vilvh_w +lsx_vilvh_d +lsx_vilvl_b +lsx_vilvl_h +lsx_vilvl_w +lsx_vilvl_d # LASX intrinsics lasx_xvsll_b @@ -475,3 +483,11 @@ lasx_xvpickod_b lasx_xvpickod_h lasx_xvpickod_w lasx_xvpickod_d +lasx_xvilvh_b +lasx_xvilvh_h +lasx_xvilvh_w +lasx_xvilvh_d +lasx_xvilvl_b +lasx_xvilvl_h +lasx_xvilvl_w +lasx_xvilvl_d diff --git a/src/doc/rustc-dev-guide/src/tests/compiletest.md b/src/doc/rustc-dev-guide/src/tests/compiletest.md index 7ffc9e3018471..36dc71a3ac0cc 100644 --- a/src/doc/rustc-dev-guide/src/tests/compiletest.md +++ b/src/doc/rustc-dev-guide/src/tests/compiletest.md @@ -346,17 +346,6 @@ See also the [codegen tests](#codegen-tests) for a similar set of tests. If you need to work with `#![no_std]` cross-compiling tests, consult the [`minicore` test auxiliary](./minicore.md) chapter. -#### Conditional assembly tests based on instruction support - -Tests that depend on specific assembly instructions being available can use the -`//@ needs-asm-mnemonic: ` directive. -This will skip the test if the target backend does not support the specified instruction mnemonic. - -For example, a test that requires the `RET` instruction: -```rust,ignore -//@ needs-asm-mnemonic: RET -``` - [`tests/assembly-llvm`]: https://github.com/rust-lang/rust/tree/HEAD/tests/assembly-llvm diff --git a/src/doc/rustc-dev-guide/src/tests/directives.md b/src/doc/rustc-dev-guide/src/tests/directives.md index d1b923edd3957..5dce2ad13a5fc 100644 --- a/src/doc/rustc-dev-guide/src/tests/directives.md +++ b/src/doc/rustc-dev-guide/src/tests/directives.md @@ -164,9 +164,10 @@ The following directives will check rustc build settings and target settings: For tests that cross-compile to explicit targets via `--target`, use `needs-llvm-components` instead to ensure the appropriate backend is available. -- `needs-asm-mnemonic: ` — ignores if the target backend does not - support the specified assembly mnemonic (e.g., `RET`, `NOP`). - Only supported with the LLVM backend. +- `needs-asm-ret` - ignores if the target does not have a `ret` instruction + in its assembly syntax. Most target architectures have this instruction, + making it handy for portable inline-assembly tests, but some architectures + (e.g. 32-bit ARM) do not have it. - `needs-profiler-runtime` — ignores the test if the profiler runtime was not enabled for the target (`build.profiler = true` in `bootstrap.toml`) - `needs-sanitizer-support` — ignores if the sanitizer support was not enabled diff --git a/src/tools/compiletest/src/directives/directive_names.rs b/src/tools/compiletest/src/directives/directive_names.rs index fb795c6b447c9..41ea492d6f38f 100644 --- a/src/tools/compiletest/src/directives/directive_names.rs +++ b/src/tools/compiletest/src/directives/directive_names.rs @@ -159,7 +159,7 @@ pub(crate) const KNOWN_DIRECTIVE_NAMES: &[&str] = &[ "min-llvm-version", "min-system-llvm-version", "minicore-compile-flags", - "needs-asm-mnemonic", + "needs-asm-ret", "needs-asm-support", "needs-backends", "needs-crate-type", diff --git a/src/tools/compiletest/src/directives/needs.rs b/src/tools/compiletest/src/directives/needs.rs index 64c488aaa7935..ce67b11fba211 100644 --- a/src/tools/compiletest/src/directives/needs.rs +++ b/src/tools/compiletest/src/directives/needs.rs @@ -101,37 +101,6 @@ pub(super) fn handle_needs( } } - if name == "needs-asm-mnemonic" { - let Some(rest) = ln.value_after_colon() else { - return IgnoreDecision::Error { - message: "expected `needs-asm-mnemonic` to have a mnemonic name after colon" - .to_string(), - }; - }; - - if !config.default_codegen_backend.is_llvm() { - return IgnoreDecision::Ignore { - reason: "skipping test as non-LLVM backend does not support mnemonic queries" - .to_string(), - }; - } - - let mnemonic = rest.trim(); - let has_mnemonic = match mnemonic { - "ret" => conditions.has_ret_mnemonic, - "nop" => conditions.has_nop_mnemonic, - _ => has_mnemonic(config, mnemonic), - }; - - if has_mnemonic { - return IgnoreDecision::Continue; - } else { - return IgnoreDecision::Ignore { - reason: format!("skipping test as target does not have `{mnemonic}` mnemonic"), - }; - } - } - // Handled elsewhere. if name == "needs-llvm-components" || name == "needs-backends" { return IgnoreDecision::Continue; @@ -163,11 +132,6 @@ struct Need { pub(crate) struct PreparedNeedsConditions { /// The `//@ needs-*` conditions that can be treated as a simple name->boolean mapping. simple_needs: HashMap<&'static str, Need>, - - /// Might add particular other mnemonics heavily needed by tests here. - /// Otherwise call into llvm for every check - has_ret_mnemonic: bool, - has_nop_mnemonic: bool, } pub(crate) fn prepare_needs_conditions(config: &Config) -> PreparedNeedsConditions { @@ -177,6 +141,14 @@ pub(crate) fn prepare_needs_conditions(config: &Config) -> PreparedNeedsConditio // Note that we intentionally still put the needs- prefix here to make the file show up when // grepping for a directive name, even though we could technically strip that. let simple_needs = vec![ + // This used to be a more general `//@ needs-asm-mnemonic: ret` directive, + // but was simplified to just `//@ needs-asm-ret` because there are very + // few other mnemonics (`nop`?) that it could ever be useful with. + Need { + name: "needs-asm-ret", + condition: has_mnemonic(config, "ret"), + ignore_reason: "ignored on targets without a `ret` assembly instruction", + }, Need { name: "needs-asm-support", condition: config.has_asm_support(), @@ -398,11 +370,7 @@ pub(crate) fn prepare_needs_conditions(config: &Config) -> PreparedNeedsConditio }) .collect::>(); - PreparedNeedsConditions { - simple_needs, - has_ret_mnemonic: has_mnemonic(config, "ret"), - has_nop_mnemonic: has_mnemonic(config, "nop"), - } + PreparedNeedsConditions { simple_needs } } fn find_dlltool(config: &Config) -> bool { diff --git a/src/tools/compiletest/src/directives/tests.rs b/src/tools/compiletest/src/directives/tests.rs index bc016cfb1cb43..65d7b5360ae0d 100644 --- a/src/tools/compiletest/src/directives/tests.rs +++ b/src/tools/compiletest/src/directives/tests.rs @@ -1270,23 +1270,15 @@ fn test_edition_range_edition_to_test() { } #[test] -fn needs_asm_mnemonic() { +fn needs_asm_ret() { let config_x86_64 = cfg().target("x86_64-unknown-linux-gnu").build(); let config_aarch64 = cfg().target("aarch64-unknown-linux-gnu").build(); - - // invalid mnemonic - assert!(check_ignore(&config_x86_64, "//@ needs-asm-mnemonic:GRUGGY")); - assert!(check_ignore(&config_aarch64, "//@ needs-asm-mnemonic:gruggy")); - - // valid x86 and aarch64 - assert!(!check_ignore(&config_x86_64, "//@ needs-asm-mnemonic:RET")); - assert!(!check_ignore(&config_aarch64, "//@ needs-asm-mnemonic:ret")); - - // this is aarch64 specific - assert!(check_ignore(&config_x86_64, "//@ needs-asm-mnemonic:ldrsbwui")); - assert!(!check_ignore(&config_aarch64, "//@ needs-asm-mnemonic:LDRSBWui")); - - // this is x86 specific - assert!(check_ignore(&config_aarch64, "//@ needs-asm-mnemonic:CMPxCHG16B")); - assert!(!check_ignore(&config_x86_64, "//@ needs-asm-mnemonic:CMPXchg16B")); + // 32-bit ARM does not have a "ret" mnemonic. + let config_arm32 = cfg().target("armv7a-none-eabi").build(); + let config_wasm = cfg().target("wasm32v1-none").build(); + + assert!(!check_ignore(&config_x86_64, "//@ needs-asm-ret")); + assert!(!check_ignore(&config_aarch64, "//@ needs-asm-ret")); + assert!(check_ignore(&config_arm32, "//@ needs-asm-ret")); + assert!(check_ignore(&config_wasm, "//@ needs-asm-ret")); } diff --git a/tests/codegen-llvm/cffi/c-variadic-naked.rs b/tests/codegen-llvm/cffi/c-variadic-naked.rs index caca6d327dd63..a04d3efca9cd9 100644 --- a/tests/codegen-llvm/cffi/c-variadic-naked.rs +++ b/tests/codegen-llvm/cffi/c-variadic-naked.rs @@ -1,5 +1,5 @@ //@ needs-asm-support -//@ needs-asm-mnemonic: ret +//@ needs-asm-ret // tests that `va_start` is not injected into naked functions diff --git a/tests/codegen-llvm/naked-fn/aligned.rs b/tests/codegen-llvm/naked-fn/aligned.rs index 8c4ac57a7bf90..77e637701fc95 100644 --- a/tests/codegen-llvm/naked-fn/aligned.rs +++ b/tests/codegen-llvm/naked-fn/aligned.rs @@ -1,6 +1,6 @@ //@ compile-flags: -C no-prepopulate-passes -Copt-level=0 //@ needs-asm-support -//@ needs-asm-mnemonic: ret +//@ needs-asm-ret //@ ignore-wasm32 aligning functions is not currently supported on wasm (#143368) #![crate_type = "lib"] diff --git a/tests/codegen-llvm/naked-fn/min-function-alignment.rs b/tests/codegen-llvm/naked-fn/min-function-alignment.rs index 2619f4ef476a7..059eed06b66bb 100644 --- a/tests/codegen-llvm/naked-fn/min-function-alignment.rs +++ b/tests/codegen-llvm/naked-fn/min-function-alignment.rs @@ -1,6 +1,6 @@ //@ compile-flags: -C no-prepopulate-passes -Copt-level=0 -Zmin-function-alignment=16 //@ needs-asm-support -//@ needs-asm-mnemonic: ret +//@ needs-asm-ret //@ ignore-wasm32 aligning functions is not currently supported on wasm (#143368) // FIXME(#82232, #143834): temporarily renamed to mitigate `#[align]` nameres ambiguity diff --git a/tests/run-make/naked-dead-code-elimination/rmake.rs b/tests/run-make/naked-dead-code-elimination/rmake.rs index 8e4c26fc34508..c726f7ce04da7 100644 --- a/tests/run-make/naked-dead-code-elimination/rmake.rs +++ b/tests/run-make/naked-dead-code-elimination/rmake.rs @@ -1,6 +1,6 @@ //@ ignore-cross-compile //@ needs-asm-support -//@ needs-asm-mnemonic: RET +//@ needs-asm-ret use run_make_support::symbols::object_contains_any_symbol; use run_make_support::{bin_name, rustc};