diff --git a/library/compiler-builtins/.editorconfig b/library/compiler-builtins/.editorconfig
index f0735cedfbd6b..23a4d23f28200 100644
--- a/library/compiler-builtins/.editorconfig
+++ b/library/compiler-builtins/.editorconfig
@@ -1,5 +1,3 @@
-# EditorConfig helps developers define and maintain consistent
-# coding styles between different editors and IDEs
 # editorconfig.org
 
 root = true
@@ -12,5 +10,5 @@ insert_final_newline = true
 indent_style = space
 indent_size = 4
 
-[*.yml]
+[*.{yaml,yml}]
 indent_size = 2
diff --git a/library/compiler-builtins/.github/renovate.json5 b/library/compiler-builtins/.github/renovate.json5
new file mode 100644
index 0000000000000..7d18fd573d6d8
--- /dev/null
+++ b/library/compiler-builtins/.github/renovate.json5
@@ -0,0 +1,37 @@
+{
+  $schema: "https://docs.renovatebot.com/renovate-schema.json",
+  extends: [
+    "config:recommended",
+    ":maintainLockFilesMonthly",
+    "helpers:pinGitHubActionDigestsToSemver"
+  ],
+  packageRules: [
+    {
+      matchCategories: [
+        "rust"
+      ],
+      matchJsonata: [
+        "isBreaking != true"
+      ],
+      // Disable non-breaking change updates because they
+      // are updated periodically with lockfile maintainance.
+      enabled: false,
+    },
+    {
+      matchManagers: [
+        "github-actions"
+      ],
+      // Every month
+      schedule: "* 0 1 * *",
+      groupName: "Github Actions",
+    }
+  ],
+  // Receive any update that fixes security vulnerabilities.
+  // We need this because we disabled "patch" updates for Rust.
+  // Note: You need to enable "Dependabot alerts" in "Code security" GitHub
+  // Settings to receive security updates.
+  // See https://docs.renovatebot.com/configuration-options/#vulnerabilityalerts
+  vulnerabilityAlerts: {
+    enabled: true,
+  },
+}
diff --git a/library/compiler-builtins/.github/workflows/main.yaml b/library/compiler-builtins/.github/workflows/main.yaml
index 261b1619f1c5f..aba86eaa93edd 100644
--- a/library/compiler-builtins/.github/workflows/main.yaml
+++ b/library/compiler-builtins/.github/workflows/main.yaml
@@ -1,4 +1,5 @@
 name: CI
+permissions: {}
 on:
   push: { branches: [main] }
   pull_request:
@@ -10,6 +11,7 @@ concurrency:
 
 env:
   CARGO_TERM_COLOR: always
+  LIBM_BUILD_VERBOSE: true
   RUSTDOCFLAGS: -Dwarnings
   RUSTFLAGS: -Dwarnings
   RUST_BACKTRACE: full
@@ -28,8 +30,9 @@ jobs:
       extensive_matrix: ${{ steps.script.outputs.extensive_matrix }}
       may_skip_libm_ci: ${{ steps.script.outputs.may_skip_libm_ci }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
         with:
+          persist-credentials: false
           fetch-depth: 500
       - name: Fetch pull request ref
         run: git fetch origin "$GITHUB_REF:$GITHUB_REF"
@@ -42,6 +45,9 @@ jobs:
   test:
     name: Build and test
     timeout-minutes: 60
+    # NOTE: self-hosted riscv64 runners are experimental and may be flaky.
+    # Do not block CI on failures from this platform for now.
+    continue-on-error: ${{ contains(matrix.os, 'self-hosted') }}
     strategy:
       fail-fast: false
       matrix:
@@ -72,6 +78,9 @@ jobs:
           os: ubuntu-24.04
         - target: powerpc64le-unknown-linux-gnu
           os: ubuntu-24.04-ppc64le
+        # FIXME(ci): re-enable these once more capacity is avialable
+        # - target: riscv64gc-unknown-linux-gnu
+        #   os: ["self-hosted", "linux", "riscv64"]
         - target: riscv64gc-unknown-linux-gnu
           os: ubuntu-24.04
         - target: s390x-unknown-linux-gnu
@@ -104,53 +113,58 @@ jobs:
     needs: [calculate_vars]
     env:
       BUILD_ONLY: ${{ matrix.build_only }}
+      JOB_TARGET: ${{ matrix.target }}
+      JOB_CHANNEL: ${{ matrix.channel }}
       MAY_SKIP_LIBM_CI: ${{ needs.calculate_vars.outputs.may_skip_libm_ci }}
+      RUN_IN_DOCKER: ${{ matrix.os == 'ubuntu-24.04' }}
     steps:
-    - name: Print $HOME
+    - name: Print runner information
       shell: bash
       run: |
         set -x
-        echo "${HOME:-not found}"
+        uname -a
+        lscpu || (sysctl -a | grep cpu) || true
+        echo "home: ${HOME:-not found}"
         pwd
-        printenv
-    - name: Print runner information
-      run: uname -a
 
     # Native ppc and s390x runners don't have rustup by default
     - name: Install rustup
       if: matrix.os == 'ubuntu-24.04-ppc64le' || matrix.os == 'ubuntu-24.04-s390x'
       run: sudo apt-get update && sudo apt-get install -y rustup
 
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     - name: Install Rust (rustup)
       shell: bash
       run: |
         channel="nightly"
         # Account for channels that have required components (MinGW)
-        [ -n "${{ matrix.channel }}" ] && channel="${{ matrix.channel }}"
+        [ -n "$JOB_CHANNEL" ] && channel="$JOB_CHANNEL"
         rustup update "$channel" --no-self-update
         rustup default "$channel"
-        rustup target add "${{ matrix.target }}"
+        rustup target add "$JOB_TARGET"
 
-    - uses: taiki-e/install-action@nextest
+    - uses: taiki-e/install-action@bfadeaba214680fb4ab63e710bcb2a6a17019fdc # v2.70.4
+      with:
+        tool: nextest@0.9.131
 
-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
       with:
         key: ${{ matrix.target }}
     - name: Cache Docker layers
-      uses: actions/cache@v4
-      if: matrix.os == 'ubuntu-24.04'
+      uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
+      if: ${{ env.RUN_IN_DOCKER == 'true' }}
       with:
         path: /tmp/.buildx-cache
         key: ${{ matrix.target }}-buildx-${{ github.sha }}
         restore-keys: ${{ matrix.target }}-buildx-
     # Configure buildx to use Docker layer caching
-    - uses: docker/setup-buildx-action@v3
-      if: matrix.os == 'ubuntu-24.04'
+    - uses: docker/setup-buildx-action@4d04d5d9486b7bd6fa91e7baf45bbb4f8b9deedd # v4
+      if: ${{ env.RUN_IN_DOCKER == 'true' }}
 
     - name: Cache compiler-rt
       id: cache-compiler-rt
-      uses: actions/cache@v4
+      uses: actions/cache@668228422ae6a00e4ad889ee87cd7109ec5666a7 # v5.0.4
       with:
         path: compiler-rt
         key: ${{ runner.os }}-compiler-rt-${{ hashFiles('ci/download-compiler-rt.sh') }}
@@ -166,19 +180,22 @@ jobs:
       shell: bash
 
     - name: Verify API list
-      if: matrix.os == 'ubuntu-24.04'
-      run: python3 etc/update-api-list.py --check
+      if: matrix.os == 'ubuntu-24.04' || contains(matrix.os, 'self-hosted')
+      run: |
+        # Must be run on the host (not in Docker) because git and fs access is required.
+        python3 etc/update-api-list.py --check
+        cargo test -p update-api-list
 
     # Non-linux tests just use our raw script
     - name: Run locally
-      if: matrix.os != 'ubuntu-24.04'
+      if: ${{ env.RUN_IN_DOCKER != 'true' }}
       shell: bash
-      run: ./ci/run.sh ${{ matrix.target }}
+      run: ./ci/run.sh "$JOB_TARGET"
 
     # Otherwise we use our docker containers to run builds
     - name: Run in Docker
-      if: matrix.os == 'ubuntu-24.04'
-      run: ./ci/run-docker.sh ${{ matrix.target }}
+      if: ${{ env.RUN_IN_DOCKER == 'true' }}
+      run: ./ci/run-docker.sh "$JOB_TARGET"
 
     - name: Print test logs if available
       if: always()
@@ -189,7 +206,7 @@ jobs:
     # https://github.com/docker/build-push-action/issues/252
     # https://github.com/moby/buildkit/issues/1896
     - name: Move Docker cache
-      if: matrix.os == 'ubuntu-24.04'
+      if: ${{ env.RUN_IN_DOCKER == 'true' }}
       run: |
         rm -rf /tmp/.buildx-cache
         mv /tmp/.buildx-cache-new /tmp/.buildx-cache
@@ -199,30 +216,43 @@ jobs:
     runs-on: ubuntu-24.04
     timeout-minutes: 10
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     # Unlike rustfmt, stable clippy does not work on code with nightly features.
     - name: Install nightly `clippy`
       run: |
         rustup update nightly --no-self-update
         rustup default nightly
         rustup component add clippy
-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
     - name: Download musl source
       run: ./ci/update-musl.sh
     - run: cargo clippy --workspace --all-targets
 
+  zizmor:
+    name: Zizmor (Static analysis for GitHub Actions)
+    runs-on: ubuntu-24.04
+    permissions:
+      security-events: write
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with: { persist-credentials: false }
+      - uses: zizmorcore/zizmor-action@71321a20a9ded102f6e9ce5718a2fcec2c4f70d8 # v0.5.2
+
   build-custom:
     name: Build custom target
     runs-on: ubuntu-24.04
     timeout-minutes: 10
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     - name: Install Rust
       run: |
         rustup update nightly --no-self-update
         rustup default nightly
         rustup component add rust-src
-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
     - run: |
         # Ensure we can build with custom target.json files (these can interact
         # poorly with build scripts)
@@ -237,13 +267,14 @@ jobs:
     runs-on: ubuntu-24.04
     timeout-minutes: 10
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     - name: Install Rust
       run: |
         rustup update nightly --no-self-update
         rustup default nightly
         rustup component add rust-src
-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
     - run: |
         cargo build -p compiler_builtins -p libm \
           --target etc/thumbv6-none-eabi.json \
@@ -252,23 +283,30 @@ jobs:
 
   benchmarks:
     name: Benchmarks
-    timeout-minutes: 20
+    timeout-minutes: 30
     strategy:
       fail-fast: false
       matrix:
         include:
-        - target: x86_64-unknown-linux-gnu
-          os: ubuntu-24.04
         - target: aarch64-unknown-linux-gnu
           os: ubuntu-24.04-arm
+        - target: i686-unknown-linux-gnu
+          os: ubuntu-24.04
+        - target: x86_64-unknown-linux-gnu
+          os: ubuntu-24.04
     runs-on: ${{ matrix.os }}
+    env:
+      JOB_TARGET: ${{ matrix.target }}
     steps:
-    - uses: actions/checkout@master
-    - uses: taiki-e/install-action@cargo-binstall
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
+    - uses: taiki-e/install-action@bfadeaba214680fb4ab63e710bcb2a6a17019fdc # v2.70.4
+      with:
+        tool: cargo-binstall@1.17.7
 
     - name: Set up dependencies
-      run: ./ci/install-bench-deps.sh
-    - uses: Swatinem/rust-cache@v2
+      run: ./ci/install-bench-deps.sh "$JOB_TARGET"
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
       with:
         key: ${{ matrix.target }}
     - name: Download musl source
@@ -278,17 +316,14 @@ jobs:
       env:
         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
         PR_NUMBER: ${{ github.event.pull_request.number }}
-      run: ./ci/bench-icount.sh ${{ matrix.target }}
+      run: ./ci/bench-icount.sh "$JOB_TARGET"
 
     - name: Upload the benchmark baseline
-      uses: actions/upload-artifact@v4
+      uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
       with:
         name: ${{ env.BASELINE_NAME }}
         path: ${{ env.BASELINE_NAME }}.tar.xz
 
-    - name: Run wall time benchmarks
-      run: ./ci/bench-walltime.sh
-
     - name: Print test logs if available
       if: always()
       run: if [ -f "target/test-log.txt" ]; then cat target/test-log.txt; fi
@@ -299,15 +334,14 @@ jobs:
     runs-on: ubuntu-24.04
     timeout-minutes: 10
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     - name: Install Rust (rustup)
-      # FIXME(ci): not working in the 2026-02-11 nightly
-      # https://rust-lang.zulipchat.com/#narrow/channel/269128-miri/topic/build-script-build.20contains.20outdated.20or.20invalid.20JSON/with/573426109
-      run: rustup update nightly-2026-02-10 --no-self-update && rustup default nightly-2026-02-10
+      run: rustup update nightly --no-self-update && rustup default nightly
       shell: bash
     - run: rustup component add miri
     - run: cargo miri setup
-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
     - run: ./ci/miri.sh
 
   msrv:
@@ -317,13 +351,14 @@ jobs:
     env:
       RUSTFLAGS: # No need to check warnings on old MSRV, unset `-Dwarnings`
     steps:
-    - uses: actions/checkout@master
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     - name: Install Rust
       run: |
         msrv="$(perl -ne 'print if s/rust-version\s*=\s*"(.*)"/\1/g' libm/Cargo.toml)"
         echo "MSRV: $msrv"
         rustup update "$msrv" --no-self-update && rustup default "$msrv"
-    - uses: Swatinem/rust-cache@v2
+    - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
     - run: |
         # FIXME(msrv): Remove the workspace Cargo.toml so 1.63 cargo doesn't see
         # `edition = "2024"` and get spooked.
@@ -335,7 +370,8 @@ jobs:
     runs-on: ubuntu-24.04
     timeout-minutes: 10
     steps:
-    - uses: actions/checkout@v4
+    - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      with: { persist-credentials: false }
     - name: Install nightly `rustfmt`
       run: rustup set profile minimal && rustup default nightly && rustup component add rustfmt
     - run: cargo fmt -- --check
@@ -358,12 +394,13 @@ jobs:
     env:
       TO_TEST: ${{ matrix.to_test }}
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with: { persist-credentials: false }
       - name: Install Rust
         run: |
           rustup update nightly --no-self-update
           rustup default nightly
-      - uses: Swatinem/rust-cache@v2
+      - uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
       - name: download musl source
         run: ./ci/update-musl.sh
       - name: Run extensive tests
@@ -383,13 +420,16 @@ jobs:
       - msrv
       - rustfmt
       - test
+      - zizmor
     runs-on: ubuntu-24.04
     timeout-minutes: 10
     # GitHub branch protection is exceedingly silly and treats "jobs skipped because a dependency
     # failed" as success. So we have to do some contortions to ensure the job fails if any of its
     # dependencies fails.
     if: always() # make sure this is never "skipped"
+    env:
+      NEEDS: ${{ toJson(needs) }}
     steps:
       # Manually check the status of all dependencies. `if: failure()` does not work.
       - name: check if any dependency failed
-        run: jq --exit-status 'all(.result == "success")' <<< '${{ toJson(needs) }}'
+        run: jq --exit-status 'all(.result == "success")' <<< "$NEEDS"
diff --git a/library/compiler-builtins/.github/workflows/publish.yaml b/library/compiler-builtins/.github/workflows/publish.yaml
index d6f1dc398e8ec..aa742914a5344 100644
--- a/library/compiler-builtins/.github/workflows/publish.yaml
+++ b/library/compiler-builtins/.github/workflows/publish.yaml
@@ -11,15 +11,15 @@ jobs:
   release-plz:
     name: Release-plz
     runs-on: ubuntu-24.04
+    environment: publish
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
         with:
+          persist-credentials: false
           fetch-depth: 0
       - name: Install Rust (rustup)
         run: rustup update nightly --no-self-update && rustup default nightly
       - name: Run release-plz
-        uses: MarcoIeni/release-plz-action@v0.5
+        uses: release-plz/action@1528104d2ca23787631a1c1f022abb64b34c1e11 # v0.5
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          CARGO_REGISTRY_TOKEN: ${{ secrets.CARGO_REGISTRY_TOKEN }}
diff --git a/library/compiler-builtins/.github/workflows/rustc-pull.yml b/library/compiler-builtins/.github/workflows/rustc-pull.yml
index 8e88213332de4..ff8305d3729fd 100644
--- a/library/compiler-builtins/.github/workflows/rustc-pull.yml
+++ b/library/compiler-builtins/.github/workflows/rustc-pull.yml
@@ -5,7 +5,7 @@ on:
   workflow_dispatch:
   schedule:
     # Run at 04:00 UTC every Monday and Thursday
-    - cron: '0 4 * * 1,4'
+    - cron: "0 4 * * 1,4"
 
 env:
   JOSH_SYNC_VERBOSE: true
@@ -13,15 +13,19 @@ env:
 jobs:
   pull:
     if: github.repository == 'rust-lang/compiler-builtins'
-    uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@main
+    uses: rust-lang/josh-sync/.github/workflows/rustc-pull.yml@8970a6eb3a6095db68e4d765b3b5fba5e9c42cf6 # main
     with:
       github-app-id: ${{ vars.APP_CLIENT_ID }}
+      pr-author: "workflows-compiler-builtins[bot]"
       # https://rust-lang.zulipchat.com/#narrow/channel/219381-t-libs/topic/compiler-builtins.20subtree.20sync.20automation/with/528482375
       zulip-stream-id: 219381
-      zulip-topic: 'compiler-builtins subtree sync automation'
+      zulip-topic: "compiler-builtins subtree sync automation"
       zulip-bot-email: "compiler-builtins-ci-bot@rust-lang.zulipchat.com"
       pr-base-branch: main
       branch-name: rustc-pull
     secrets:
       zulip-api-token: ${{ secrets.ZULIP_API_TOKEN }}
       github-app-secret: ${{ secrets.APP_PRIVATE_KEY }}
+    permissions:
+      contents: write
+      pull-requests: write
diff --git a/library/compiler-builtins/Cargo.lock b/library/compiler-builtins/Cargo.lock
index 7a3e9a38430b6..45197592cde56 100644
--- a/library/compiler-builtins/Cargo.lock
+++ b/library/compiler-builtins/Cargo.lock
@@ -34,9 +34,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anstream"
-version = "0.6.21"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a"
+checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -49,15 +49,15 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78"
+checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.7"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
+checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
 dependencies = [
  "utf8parse",
 ]
@@ -68,7 +68,7 @@ version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
@@ -79,20 +79,24 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
 name = "anyhow"
-version = "1.0.101"
+version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "api-list-common"
+version = "0.1.0"
 
 [[package]]
 name = "assert_cmd"
-version = "2.1.2"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c5bcfa8749ac45dd12cb11055aeeb6b27a3895560d60d71e3c23bf979e60514"
+checksum = "9a686bbee5efb88a82df0621b236e74d925f470e5445d3220a5648b892ec99c9"
 dependencies = [
  "anstyle",
  "bstr",
@@ -151,16 +155,13 @@ dependencies = [
  "paste",
  "rand_xoshiro",
  "rustc_apfloat",
- "test",
- "utest-cortex-m-qemu",
- "utest-macros",
 ]
 
 [[package]]
 name = "bumpalo"
-version = "3.19.1"
+version = "3.20.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5dd9dc738b7a8311c7ade152424974d8115f2cdad61e8dab8dac9f2362298510"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
 
 [[package]]
 name = "cast"
@@ -170,9 +171,9 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
 name = "cc"
-version = "1.2.56"
+version = "1.2.58"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aebf35691d1bfb0ac386a69bac2fde4dd276fb618cf8bf4f5318fe285e821bb2"
+checksum = "e1e928d4b69e3077709075a938a05ffbedfa53a84c8f766efbf8220bb1ff60e1"
 dependencies = [
  "find-msvc-tools",
  "shlex",
@@ -224,9 +225,9 @@ dependencies = [
 
 [[package]]
 name = "clap"
-version = "4.5.58"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "63be97961acde393029492ce0be7a1af7e323e6bae9511ebfac33751be5e6806"
+checksum = "b193af5b67834b676abd72466a96c1024e6a6ad978a1f484bd90b85c94041351"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -234,9 +235,9 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.58"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7f13174bda5dfd69d7e947827e5af4b0f2f94a4a3ee92912fba07a66150f21e2"
+checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
 dependencies = [
  "anstream",
  "anstyle",
@@ -246,9 +247,9 @@ dependencies = [
 
 [[package]]
 name = "clap_derive"
-version = "4.5.55"
+version = "4.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a92793da1a46a5f2a02a6f4c46c6496b28c43638adea8306fcb0caa1634f24e5"
+checksum = "1110bd8a634a1ab8cb04345d8d878267d57c3cf1b38d91b71af6686408bbca6a"
 dependencies = [
  "heck",
  "proc-macro2",
@@ -258,15 +259,15 @@ dependencies = [
 
 [[package]]
 name = "clap_lex"
-version = "1.0.0"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a822ea5bc7590f9d40f1ba12c0dc3c2760f3482c6984db1573ad11031420831"
+checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
 
 [[package]]
 name = "colorchoice"
-version = "1.0.4"
+version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
 
 [[package]]
 name = "compiler_builtins"
@@ -277,14 +278,13 @@ dependencies = [
 
 [[package]]
 name = "console"
-version = "0.16.2"
+version = "0.16.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4"
+checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87"
 dependencies = [
  "encode_unicode",
  "libc",
- "once_cell",
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
@@ -391,6 +391,12 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "diff"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56254986775e3233ffa9c4d7d3faaf6d36a2c09d30b20687e9f88bc8bafc16c8"
+
 [[package]]
 name = "difflib"
 version = "0.4.0"
@@ -422,7 +428,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
@@ -470,9 +476,9 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.4.1"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "139ef39800118c7683f2fd3c98c1b23c09ae076556b435f8e9064ae108aaeeec"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
 dependencies = [
  "cfg-if",
  "js-sys",
@@ -484,21 +490,27 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
 [[package]]
 name = "gmp-mpfr-sys"
-version = "1.6.8"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60f8970a75c006bb2f8ae79c6768a116dd215fa8346a87aed99bf9d82ca43394"
+checksum = "8cfc928d8ff4ab3767a3674cf55f81186436fb6070866bb1443ffe65a640d2d6"
 dependencies = [
  "libc",
- "windows-sys 0.60.2",
+ "windows-sys",
 ]
 
 [[package]]
 name = "gungraun"
-version = "0.17.2"
+version = "0.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "61c1bbe46f51c63bc08a1fac0ee0c530a77c961613a86ecf828ab1b0ffc6687a"
+checksum = "2e2e7d17b75a18300d495a5e79970067b92d74e4858c28326e125f2d55b1b566"
 dependencies = [
  "bincode",
  "derive_more",
@@ -508,9 +520,9 @@ dependencies = [
 
 [[package]]
 name = "gungraun-macros"
-version = "0.7.2"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cdccd089c36fb2ee66ef0eb7b1baa3ce7e7878a8eae682d9c8c368869ff6eca1"
+checksum = "e35c7fb6133421db1cf752b7a2838d9277a26810ccaeeca7aa449f96ad7c2b01"
 dependencies = [
  "derive_more",
  "proc-macro-error2",
@@ -524,9 +536,9 @@ dependencies = [
 
 [[package]]
 name = "gungraun-runner"
-version = "0.17.2"
+version = "0.18.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6da6487203fa53ae6b1c8fead642fe79a3199464b0dd1337635594d675a9ac05"
+checksum = "c19bb4c552085f983300b11694022d7584810dca3500c220962ab2353327fb45"
 dependencies = [
  "serde",
 ]
@@ -610,15 +622,15 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.17"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92ecc6618181def0457392ccd0ee51198e065e016d1d527a7ac1b6dc7c1f09d2"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
 
 [[package]]
 name = "js-sys"
-version = "0.3.85"
+version = "0.3.94"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c942ebf8e95485ca0d52d97da7c5a2c387d0e7f0ba4c35e93bfcaee045955b3"
+checksum = "2e04e2ef80ce82e13552136fabeef8a5ed1f985a96805761cbb9a2c34e7664d9"
 dependencies = [
  "once_cell",
  "wasm-bindgen",
@@ -632,9 +644,9 @@ checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
 
 [[package]]
 name = "libc"
-version = "0.2.182"
+version = "0.2.183"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6800badb6cb2082ffd7b6a67e6125bb39f18782f793520caee8cb8846be06112"
+checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d"
 
 [[package]]
 name = "libm"
@@ -653,6 +665,7 @@ checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981"
 name = "libm-macros"
 version = "0.1.0"
 dependencies = [
+ "api-list-common",
  "heck",
  "proc-macro2",
  "quote",
@@ -664,6 +677,8 @@ name = "libm-test"
 version = "0.1.0"
 dependencies = [
  "anyhow",
+ "api-list-common",
+ "compiler_builtins",
  "criterion",
  "getrandom",
  "gmp-mpfr-sys",
@@ -682,9 +697,9 @@ dependencies = [
 
 [[package]]
 name = "libtest-mimic"
-version = "0.8.1"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5297962ef19edda4ce33aaa484386e0a5b3d7f2f4e037cbeee00503ef6b29d33"
+checksum = "14e6ba06f0ade6e504aff834d7c34298e5155c6baca353cc6a4aaff2f9fd7f33"
 dependencies = [
  "anstream",
  "anstyle",
@@ -694,9 +709,9 @@ dependencies = [
 
 [[package]]
 name = "linux-raw-sys"
-version = "0.11.0"
+version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
 [[package]]
 name = "log"
@@ -750,21 +765,21 @@ dependencies = [
 
 [[package]]
 name = "object"
-version = "0.38.1"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "271638cd5fa9cca89c4c304675ca658efc4e64a66c716b7cfe1afb4b9611dbbc"
+checksum = "63944c133d03f44e75866bbd160b95af0ec3f6a13d936d69d31c81078cbc5baf"
 dependencies = [
  "flate2",
  "memchr",
  "ruzstd",
- "wasmparser 0.243.0",
+ "wasmparser 0.245.1",
 ]
 
 [[package]]
 name = "once_cell"
-version = "1.21.3"
+version = "1.21.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
 
 [[package]]
 name = "once_cell_polyfill"
@@ -868,6 +883,16 @@ dependencies = [
  "termtree",
 ]
 
+[[package]]
+name = "pretty_assertions"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d"
+dependencies = [
+ "diff",
+ "yansi",
+]
+
 [[package]]
 name = "prettyplease"
 version = "0.2.37"
@@ -911,24 +936,24 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.44"
+version = "1.0.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21b2ebcf727b7760c461f091f9f0f539b77b8e87f2fd88131e7f1b433b3cece4"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
 dependencies = [
  "proc-macro2",
 ]
 
 [[package]]
 name = "r-efi"
-version = "5.3.0"
+version = "6.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
 
 [[package]]
 name = "rand"
-version = "0.10.0"
+version = "0.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc266eb313df6c5c09c1c7b1fbe2510961e5bcd3add930c1e31f7ed9da0feff8"
+checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207"
 dependencies = [
  "chacha20",
  "getrandom",
@@ -1005,15 +1030,15 @@ dependencies = [
 
 [[package]]
 name = "regex-syntax"
-version = "0.8.9"
+version = "0.8.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a96887878f22d7bad8a3b6dc5b7440e0ada9a245242924394987b21cf2210a4c"
+checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a"
 
 [[package]]
 name = "rug"
-version = "1.28.1"
+version = "1.29.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "de190ec858987c79cad4da30e19e546139b3339331282832af004d0ea7829639"
+checksum = "25f6c8f906c90b48e0c1745c9f814c3a31c5eba847043b05c3e9a934dec7c4b3"
 dependencies = [
  "az",
  "gmp-mpfr-sys",
@@ -1042,15 +1067,15 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "1.1.3"
+version = "1.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
 dependencies = [
  "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
@@ -1077,12 +1102,6 @@ dependencies = [
  "winapi-util",
 ]
 
-[[package]]
-name = "sc"
-version = "0.2.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "010e18bd3bfd1d45a7e666b236c78720df0d9a7698ebaa9c1c559961eb60a38b"
-
 [[package]]
 name = "semver"
 version = "1.0.27"
@@ -1140,9 +1159,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
 [[package]]
 name = "simd-adler32"
-version = "0.3.8"
+version = "0.3.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
 
 [[package]]
 name = "smallvec"
@@ -1171,9 +1190,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.116"
+version = "2.0.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3df424c70518695237746f84cede799c9c58fcb37450d7b23716568cc8bc69cb"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1182,15 +1201,15 @@ dependencies = [
 
 [[package]]
 name = "tempfile"
-version = "3.25.0"
+version = "3.27.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0136791f7c95b1f6dd99f9cc786b91bb81c3800b639b3478e561ddb7be95e5f1"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
 dependencies = [
  "fastrand",
  "getrandom",
  "once_cell",
  "rustix",
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
@@ -1199,11 +1218,6 @@ version = "0.5.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683"
 
-[[package]]
-name = "test"
-version = "0.1.0"
-source = "git+https://github.com/japaric/utest#e32073e2b078e3bee46001c13ae4c1acf368d762"
-
 [[package]]
 name = "tinytemplate"
 version = "1.2.1"
@@ -1245,18 +1259,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3"
 
 [[package]]
-name = "utest-cortex-m-qemu"
+name = "update-api-list"
 version = "0.1.0"
-source = "git+https://github.com/japaric/utest#e32073e2b078e3bee46001c13ae4c1acf368d762"
 dependencies = [
- "sc",
+ "api-list-common",
+ "getopts",
+ "glob",
+ "pretty_assertions",
+ "regex",
 ]
 
-[[package]]
-name = "utest-macros"
-version = "0.1.0"
-source = "git+https://github.com/japaric/utest#e32073e2b078e3bee46001c13ae4c1acf368d762"
-
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -1314,9 +1326,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen"
-version = "0.2.108"
+version = "0.2.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64024a30ec1e37399cf85a7ffefebdb72205ca1c972291c51512360d90bd8566"
+checksum = "0551fc1bb415591e3372d0bc4780db7e587d84e2a7e79da121051c5c4b89d0b0"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -1327,9 +1339,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.108"
+version = "0.2.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "008b239d9c740232e71bd39e8ef6429d27097518b6b30bdf9086833bd5b6d608"
+checksum = "7fbdf9a35adf44786aecd5ff89b4563a90325f9da0923236f6104e603c7e86be"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -1337,9 +1349,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.108"
+version = "0.2.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5256bae2d58f54820e6490f9839c49780dff84c65aeab9e772f15d5f0e913a55"
+checksum = "dca9693ef2bab6d4e6707234500350d8dad079eb508dca05530c85dc3a529ff2"
 dependencies = [
  "bumpalo",
  "proc-macro2",
@@ -1350,9 +1362,9 @@ dependencies = [
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.108"
+version = "0.2.117"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1f01b580c9ac74c8d8f0c0e4afb04eeef2acf145458e52c03845ee9cd23e3d12"
+checksum = "39129a682a6d2d841b6c429d0c51e5cb0ed1a03829d8b3d1e69a011e62cb3d3b"
 dependencies = [
  "unicode-ident",
 ]
@@ -1381,30 +1393,30 @@ dependencies = [
 
 [[package]]
 name = "wasmparser"
-version = "0.243.0"
+version = "0.244.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f6d8db401b0528ec316dfbe579e6ab4152d61739cfe076706d2009127970159d"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
 dependencies = [
  "bitflags",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
 ]
 
 [[package]]
 name = "wasmparser"
-version = "0.244.0"
+version = "0.245.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+checksum = "4f08c9adee0428b7bddf3890fc27e015ac4b761cc608c822667102b8bfd6995e"
 dependencies = [
  "bitflags",
- "hashbrown 0.15.5",
- "indexmap",
- "semver",
 ]
 
 [[package]]
 name = "web-sys"
-version = "0.3.85"
+version = "0.3.94"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "312e32e551d92129218ea9a2452120f4aabc03529ef03e4d0d82fb2780608598"
+checksum = "cd70027e39b12f0849461e08ffc50b9cd7688d942c1c8e3c7b22273236b4dd0a"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
@@ -1442,7 +1454,7 @@ version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys 0.61.2",
+ "windows-sys",
 ]
 
 [[package]]
@@ -1457,15 +1469,6 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
-[[package]]
-name = "windows-sys"
-version = "0.60.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
-dependencies = [
- "windows-targets",
-]
-
 [[package]]
 name = "windows-sys"
 version = "0.61.2"
@@ -1475,71 +1478,6 @@ dependencies = [
  "windows-link",
 ]
 
-[[package]]
-name = "windows-targets"
-version = "0.53.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
-dependencies = [
- "windows-link",
- "windows_aarch64_gnullvm",
- "windows_aarch64_msvc",
- "windows_i686_gnu",
- "windows_i686_gnullvm",
- "windows_i686_msvc",
- "windows_x86_64_gnu",
- "windows_x86_64_gnullvm",
- "windows_x86_64_msvc",
-]
-
-[[package]]
-name = "windows_aarch64_gnullvm"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
-
-[[package]]
-name = "windows_aarch64_msvc"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
-
-[[package]]
-name = "windows_i686_gnu"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
-
-[[package]]
-name = "windows_i686_gnullvm"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
-
-[[package]]
-name = "windows_i686_msvc"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
-
-[[package]]
-name = "windows_x86_64_gnu"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
-
-[[package]]
-name = "windows_x86_64_gnullvm"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
-
-[[package]]
-name = "windows_x86_64_msvc"
-version = "0.53.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
-
 [[package]]
 name = "wit-bindgen"
 version = "0.51.0"
@@ -1628,20 +1566,26 @@ dependencies = [
  "wasmparser 0.244.0",
 ]
 
+[[package]]
+name = "yansi"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
+
 [[package]]
 name = "zerocopy"
-version = "0.8.39"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db6d35d663eadb6c932438e763b262fe1a70987f9ae936e60158176d710cae4a"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
 dependencies = [
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.8.39"
+version = "0.8.48"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4122cd3169e94605190e77839c9a40d40ed048d305bfdc146e7df40ab0f3e517"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/library/compiler-builtins/Cargo.toml b/library/compiler-builtins/Cargo.toml
index 26a056f434967..6c353aa79c896 100644
--- a/library/compiler-builtins/Cargo.toml
+++ b/library/compiler-builtins/Cargo.toml
@@ -3,10 +3,12 @@ resolver = "2"
 members = [
     "builtins-shim",
     "builtins-test",
+    "crates/api-list-common",
     "crates/libm-macros",
     "crates/musl-math-sys",
     "crates/panic-handler",
     "crates/symbol-check",
+    "crates/update-api-list",
     "crates/util",
     "libm",
     "libm-test",
@@ -15,14 +17,15 @@ members = [
 default-members = [
     "builtins-shim",
     "builtins-test",
+    "crates/api-list-common",
     "crates/libm-macros",
     "libm",
     "libm-test",
 ]
 
 exclude = [
-    # `builtins-test-intrinsics` needs the feature `compiler-builtins` enabled
-    # and `mangled-names` disabled, which is the opposite of what is needed for
+    # `builtins-test-intrinsics` needs the features `compiler-builtins` and
+    # `unmangled-names` enabled, which is the opposite of what is needed for
     # other tests, so it makes sense to keep it out of the workspace.
     "builtins-test-intrinsics",
     # We test via the `builtins-shim` crate, so exclude the `compiler-builtins`
@@ -33,6 +36,7 @@ exclude = [
 
 [workspace.dependencies]
 anyhow = "1.0.101"
+api-list-common = { path = "crates/api-list-common" }
 assert_cmd = "2.1.2"
 cc = "1.2.56"
 cfg-if = "1.0.4"
@@ -40,8 +44,9 @@ compiler_builtins = { path = "builtins-shim", default-features = false }
 criterion = { version = "0.8.2", default-features = false, features = ["cargo_bench_support"] }
 getopts = "0.2.24"
 getrandom = "0.4.1"
+glob = "0.3.3"
 gmp-mpfr-sys = { version = "1.6.8", default-features = false }
-gungraun = "0.17.2"
+gungraun = "0.18.0"
 heck = "0.5.0"
 indicatif = { version = "0.18.3", default-features = false }
 libm = { path = "libm", default-features = false }
@@ -50,9 +55,10 @@ libm-test = { path = "libm-test", default-features = false }
 libtest-mimic = "0.8.1"
 musl-math-sys = { path = "crates/musl-math-sys" }
 no-panic = "0.1.36"
-object = { version = "0.38.1", features = ["wasm"] }
+object = { version = "0.39.0", features = ["wasm"] }
 panic-handler = { path = "crates/panic-handler" }
 paste = "1.0.15"
+pretty_assertions = "1.4.1"
 proc-macro2 = "1.0.106"
 quote = "1.0.44"
 rand = "0.10.0"
diff --git a/library/compiler-builtins/builtins-shim/Cargo.toml b/library/compiler-builtins/builtins-shim/Cargo.toml
index 32b0308a7b3c9..c940723a1ba5f 100644
--- a/library/compiler-builtins/builtins-shim/Cargo.toml
+++ b/library/compiler-builtins/builtins-shim/Cargo.toml
@@ -39,27 +39,28 @@ test = false
 cc = { version = "1.2", optional = true }
 
 [features]
-default = []
+default = ["arch"]
+
+# Enable architecture-specific features such as SIMD or assembly routines. If
+# disabled, the generic version can be tested on any platform.
+arch = []
 
 # Enable compilation of C code in compiler-rt, filling in some more optimized
 # implementations and also filling in unimplemented intrinsics
 c = ["dep:cc"]
 
-# For implementations where there is both a generic version and a platform-
-# specific version, use the generic version. This is meant to enable testing
-# the generic versions on all platforms.
-no-asm = []
-
 # Flag this library as the unstable compiler-builtins lib. This must be enabled
 # when using as `std`'s dependency.'
-compiler-builtins = []
+compiler-builtins = ["unmangled-names"]
 
-# Generate memory-related intrinsics like memcpy
+# Enable `no_mangle` symbols for memory-related intrinsics like memcpy. The
+# mangled versions are always available.
 mem = []
 
-# Mangle all names so this can be linked in with other versions or other
-# compiler-rt implementations. Also used for testing
-mangled-names = []
+# Enable `no_mangle` symbols so this crate gets used as the runtime intrinsic
+# implementation. Leave this disabled for testing to avoid conflicting with
+# the system intrinsics.
+unmangled-names = []
 
 # This makes certain traits and function specializations public that
 # are not normally public but are required by the `builtins-test`
diff --git a/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml b/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml
index e73a1f7b17e5b..fed2ac39fb321 100644
--- a/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml
+++ b/library/compiler-builtins/builtins-test-intrinsics/Cargo.toml
@@ -6,9 +6,15 @@ publish = false
 license = "MIT OR Apache-2.0"
 
 [dependencies]
-compiler_builtins = { path = "../builtins-shim", features = ["compiler-builtins"] }
+# FIXME: `aeabi_mem*` tests will require the "mem" feature to be enabled here.
+compiler_builtins = { path = "../builtins-shim", features = ["compiler-builtins", "unmangled-names"] }
 panic-handler = { path = "../crates/panic-handler" }
 
+[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
+test = { git = "https://github.com/japaric/utest" }
+utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
+utest-macros = { git = "https://github.com/japaric/utest" }
+
 [features]
 c = ["compiler_builtins/c"]
 
diff --git a/library/compiler-builtins/builtins-test-intrinsics/build.rs b/library/compiler-builtins/builtins-test-intrinsics/build.rs
index b82581262f7b0..c1ab650a6e82a 100644
--- a/library/compiler-builtins/builtins-test-intrinsics/build.rs
+++ b/library/compiler-builtins/builtins-test-intrinsics/build.rs
@@ -1,10 +1,10 @@
-mod builtins_configure {
-    include!("../compiler-builtins/configure.rs");
-}
+#[path = "../libm/configure.rs"]
+mod configure;
 
-fn main() {
-    println!("cargo::rerun-if-changed=../configure.rs");
+use configure::{Config, Library};
 
-    let target = builtins_configure::Target::from_env();
-    builtins_configure::configure_aliases(&target);
+fn main() {
+    println!("cargo::rerun-if-changed=../libm/configure.rs");
+    let cfg = Config::from_env(Library::BuiltinsTestIntrinsics);
+    configure::emit(&cfg);
 }
diff --git a/library/compiler-builtins/builtins-test-intrinsics/src/main.rs b/library/compiler-builtins/builtins-test-intrinsics/src/main.rs
index a3df2c98376cb..e390590946f4c 100644
--- a/library/compiler-builtins/builtins-test-intrinsics/src/main.rs
+++ b/library/compiler-builtins/builtins-test-intrinsics/src/main.rs
@@ -1,7 +1,9 @@
-// By compiling this file we check that all the intrinsics we care about continue to be provided by
-// the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop
-// compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail
-// to link due to the missing intrinsic (symbol).
+//! Tests that require unmangled symbols from `compiler-builtins`.
+//!
+//! By compiling this file we check that all the intrinsics we care about continue to be provided by
+//! the `compiler_builtins` crate regardless of the changes we make to it. If we, by mistake, stop
+//! compiling a C implementation and forget to implement that intrinsic in Rust, this file will fail
+//! to link due to the missing intrinsic (symbol).
 
 #![allow(internal_features, unused_features)]
 #![deny(dead_code)]
diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs b/library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memclr.rs
similarity index 98%
rename from library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs
rename to library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memclr.rs
index 0761feaffd9e2..df0e6a6d52997 100644
--- a/library/compiler-builtins/builtins-test/tests/aeabi_memclr.rs
+++ b/library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memclr.rs
@@ -2,7 +2,6 @@
     target_arch = "arm",
     not(any(target_env = "gnu", target_env = "musl")),
     target_os = "linux",
-    feature = "mem"
 ))]
 #![feature(compiler_builtins_lib)]
 #![no_std]
diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs b/library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memcpy.rs
similarity index 98%
rename from library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs
rename to library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memcpy.rs
index e76e712a246f1..acd12eef64ba5 100644
--- a/library/compiler-builtins/builtins-test/tests/aeabi_memcpy.rs
+++ b/library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memcpy.rs
@@ -2,7 +2,6 @@
     target_arch = "arm",
     not(any(target_env = "gnu", target_env = "musl")),
     target_os = "linux",
-    feature = "mem"
 ))]
 #![feature(compiler_builtins_lib)]
 #![no_std]
diff --git a/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs b/library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memset.rs
similarity index 99%
rename from library/compiler-builtins/builtins-test/tests/aeabi_memset.rs
rename to library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memset.rs
index 8f9f80f969ccb..98d2c6852d14f 100644
--- a/library/compiler-builtins/builtins-test/tests/aeabi_memset.rs
+++ b/library/compiler-builtins/builtins-test-intrinsics/tests/aeabi_memset.rs
@@ -2,7 +2,6 @@
     target_arch = "arm",
     not(any(target_env = "gnu", target_env = "musl")),
     target_os = "linux",
-    feature = "mem"
 ))]
 #![feature(compiler_builtins_lib)]
 #![no_std]
diff --git a/library/compiler-builtins/builtins-test/Cargo.toml b/library/compiler-builtins/builtins-test/Cargo.toml
index b2313bb9d140e..f1a5be415675d 100644
--- a/library/compiler-builtins/builtins-test/Cargo.toml
+++ b/library/compiler-builtins/builtins-test/Cargo.toml
@@ -6,7 +6,7 @@ publish = false
 license = "MIT AND Apache-2.0 WITH LLVM-exception AND (MIT OR Apache-2.0)"
 
 [dependencies]
-compiler_builtins = { workspace = true, features = ["unstable-public-internals"] }
+compiler_builtins = { workspace = true, default-features = false, features = ["unstable-public-internals"] }
 
 # For fuzzing tests we want a deterministic seedable RNG. We also eliminate potential
 # problems with system RNGs on the variety of platforms this crate is tested on.
@@ -23,25 +23,11 @@ gungraun = { workspace = true, optional = true }
 [dev-dependencies]
 paste.workspace = true
 
-[target.'cfg(all(target_arch = "arm", not(any(target_env = "gnu", target_env = "musl")), target_os = "linux"))'.dev-dependencies]
-test = { git = "https://github.com/japaric/utest" }
-utest-cortex-m-qemu = { default-features = false, git = "https://github.com/japaric/utest" }
-utest-macros = { git = "https://github.com/japaric/utest" }
-
 [features]
-default = ["mangled-names"]
+# Defaults should match the defaults in compiler-builtins since we have that
+# dependency with `default-features=false`.
+default = ["compiler_builtins/arch"]
 c = ["compiler_builtins/c"]
-no-asm = ["compiler_builtins/no-asm"]
-mem = ["compiler_builtins/mem"]
-mangled-names = ["compiler_builtins/mangled-names"]
-# Skip tests that rely on f128 symbols being available on the system
-no-sys-f128 = ["no-sys-f128-int-convert", "no-sys-f16-f128-convert"]
-# Some platforms have some f128 functions but everything except integer conversions
-no-sys-f128-int-convert = []
-no-sys-f16-f128-convert = []
-no-sys-f16-f64-convert = []
-# Skip tests that rely on f16 symbols being available on the system
-no-sys-f16 = ["no-sys-f16-f64-convert"]
 
 # Enable icount benchmarks (requires gungraun-runner and valgrind locally)
 icount = ["dep:gungraun"]
@@ -51,11 +37,6 @@ icount = ["dep:gungraun"]
 benchmarking-reports = ["walltime", "criterion/plotters", "criterion/html_reports"]
 walltime = ["dep:criterion"]
 
-# NOTE: benchmarks must be run with `--no-default-features` or with
-# `-p builtins-test`, otherwise the default `compiler-builtins` feature
-# of the `compiler_builtins` crate gets activated, resulting in linker
-# errors.
-
 [[bench]]
 name = "float_add"
 harness = false
diff --git a/library/compiler-builtins/builtins-test/benches/float_add.rs b/library/compiler-builtins/builtins-test/benches/float_add.rs
index 197f90b319da4..2c1d2d9f15520 100644
--- a/library/compiler-builtins/builtins-test/benches/float_add.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_add.rs
@@ -74,7 +74,7 @@ float_bench! {
     crate_fn_ppc: add::__addkf3,
     sys_fn: __addtf3,
     sys_fn_ppc: __addkf3,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: []
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_cmp.rs b/library/compiler-builtins/builtins-test/benches/float_cmp.rs
index da29b5d313263..cb13ce18fe4c6 100644
--- a/library/compiler-builtins/builtins-test/benches/float_cmp.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_cmp.rs
@@ -185,7 +185,7 @@ float_bench! {
     crate_fn_ppc: cmp::__gtkf2,
     sys_fn: __gttf2,
     sys_fn_ppc: __gtkf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     output_eq: gt_res_eq,
     asm: []
 }
@@ -198,7 +198,7 @@ float_bench! {
     crate_fn_ppc: cmp::__unordkf2,
     sys_fn: __unordtf2,
     sys_fn_ppc: __unordkf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: []
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_conv.rs b/library/compiler-builtins/builtins-test/benches/float_conv.rs
index 40c13d270ac8e..35255645b2464 100644
--- a/library/compiler-builtins/builtins-test/benches/float_conv.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_conv.rs
@@ -84,7 +84,7 @@ float_bench! {
     crate_fn_ppc: conv::__floatunsikf,
     sys_fn: __floatunsitf,
     sys_fn_ppc: __floatunsikf,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -138,7 +138,7 @@ float_bench! {
     crate_fn_ppc: conv::__floatundikf,
     sys_fn: __floatunditf,
     sys_fn_ppc: __floatundikf,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -168,7 +168,7 @@ float_bench! {
     crate_fn_ppc: conv::__floatuntikf,
     sys_fn: __floatuntitf,
     sys_fn_ppc: __floatuntikf,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -249,7 +249,7 @@ float_bench! {
     crate_fn_ppc: conv::__floatsikf,
     sys_fn: __floatsitf,
     sys_fn_ppc: __floatsikf,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -328,7 +328,7 @@ float_bench! {
     crate_fn_ppc: conv::__floatdikf,
     sys_fn: __floatditf,
     sys_fn_ppc: __floatdikf,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -358,7 +358,7 @@ float_bench! {
     crate_fn_ppc: conv::__floattikf,
     sys_fn: __floattitf,
     sys_fn_ppc: __floattikf,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -473,7 +473,7 @@ float_bench! {
     crate_fn: conv::__fixunstfsi,
     crate_fn_ppc: conv::__fixunskfsi,
     sys_fn: __fixunstfsi,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -484,7 +484,7 @@ float_bench! {
     crate_fn: conv::__fixunstfdi,
     crate_fn_ppc: conv::__fixunskfdi,
     sys_fn: __fixunstfdi,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -495,7 +495,7 @@ float_bench! {
     crate_fn: conv::__fixunstfti,
     crate_fn_ppc: conv::__fixunskfti,
     sys_fn: __fixunstfti,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -610,7 +610,7 @@ float_bench! {
     crate_fn: conv::__fixtfsi,
     crate_fn_ppc: conv::__fixkfsi,
     sys_fn: __fixtfsi,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -621,7 +621,7 @@ float_bench! {
     crate_fn: conv::__fixtfdi,
     crate_fn_ppc: conv::__fixkfdi,
     sys_fn: __fixtfdi,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
@@ -632,7 +632,7 @@ float_bench! {
     crate_fn: conv::__fixtfti,
     crate_fn_ppc: conv::__fixkfti,
     sys_fn: __fixtfti,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: []
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_div.rs b/library/compiler-builtins/builtins-test/benches/float_div.rs
index d5b0ad0fd402b..dd2b2c1f940e2 100644
--- a/library/compiler-builtins/builtins-test/benches/float_div.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_div.rs
@@ -74,7 +74,7 @@ float_bench! {
     crate_fn_ppc: div::__divkf3,
     sys_fn: __divtf3,
     sys_fn_ppc: __divkf3,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: []
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_extend.rs b/library/compiler-builtins/builtins-test/benches/float_extend.rs
index 939dc60f95f4a..7896d10c8d1aa 100644
--- a/library/compiler-builtins/builtins-test/benches/float_extend.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_extend.rs
@@ -12,7 +12,7 @@ float_bench! {
     sig: (a: f16) -> f32,
     crate_fn: extend::__extendhfsf2,
     sys_fn: __extendhfsf2,
-    sys_available: not(feature = "no-sys-f16"),
+    sys_available: not(no_sys_f16),
     asm: [
         #[cfg(target_arch = "aarch64")] {
             let ret: f32;
@@ -34,7 +34,7 @@ float_bench! {
     sig: (a: f16) -> f64,
     crate_fn: extend::__extendhfdf2,
     sys_fn: __extendhfdf2,
-    sys_available: not(feature = "no-sys-f16-f64-convert"),
+    sys_available: not(no_sys_f16_f64_convert),
     asm: [
         #[cfg(target_arch = "aarch64")] {
             let ret: f64;
@@ -58,7 +58,7 @@ float_bench! {
     crate_fn_ppc: extend::__extendhfkf2,
     sys_fn: __extendhftf2,
     sys_fn_ppc: __extendhfkf2,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: [],
 }
 
@@ -91,7 +91,7 @@ float_bench! {
     crate_fn_ppc: extend::__extendsfkf2,
     sys_fn: __extendsftf2,
     sys_fn_ppc: __extendsfkf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: [],
 }
 
@@ -103,7 +103,7 @@ float_bench! {
     crate_fn_ppc: extend::__extenddfkf2,
     sys_fn: __extenddftf2,
     sys_fn_ppc: __extenddfkf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: [],
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_mul.rs b/library/compiler-builtins/builtins-test/benches/float_mul.rs
index a7a2d34aa0489..54fc6daa9b2a6 100644
--- a/library/compiler-builtins/builtins-test/benches/float_mul.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_mul.rs
@@ -74,7 +74,7 @@ float_bench! {
     crate_fn_ppc: mul::__mulkf3,
     sys_fn: __multf3,
     sys_fn_ppc: __mulkf3,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: []
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_pow.rs b/library/compiler-builtins/builtins-test/benches/float_pow.rs
index 64e37dd32416e..d749ae47ddafb 100644
--- a/library/compiler-builtins/builtins-test/benches/float_pow.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_pow.rs
@@ -24,7 +24,7 @@ float_bench! {
 
 // FIXME(f16_f128): can be changed to only `f128_enabled` once `__multf3` and `__divtf3` are
 // distributed by nightly.
-#[cfg(all(f128_enabled, not(feature = "no-sys-f128")))]
+#[cfg(all(f128_enabled, not(no_sys_f128)))]
 float_bench! {
     name: powi_f128,
     sig: (a: f128, b: i32) -> f128,
@@ -32,7 +32,7 @@ float_bench! {
     crate_fn_ppc: pow::__powikf2,
     sys_fn: __powitf2,
     sys_fn_ppc: __powikf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: []
 }
 
@@ -42,7 +42,7 @@ pub fn float_pow() {
     powi_f32(&mut criterion);
     powi_f64(&mut criterion);
 
-    #[cfg(all(f128_enabled, not(feature = "no-sys-f128")))]
+    #[cfg(all(f128_enabled, not(no_sys_f128)))]
     powi_f128(&mut criterion);
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_sub.rs b/library/compiler-builtins/builtins-test/benches/float_sub.rs
index 8bae294cd56b1..a8b73d7c7719e 100644
--- a/library/compiler-builtins/builtins-test/benches/float_sub.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_sub.rs
@@ -74,7 +74,7 @@ float_bench! {
     crate_fn_ppc: sub::__subkf3,
     sys_fn: __subtf3,
     sys_fn_ppc: __subkf3,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: []
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/float_trunc.rs b/library/compiler-builtins/builtins-test/benches/float_trunc.rs
index 9373f945bb2b8..fdf96fd3d34a4 100644
--- a/library/compiler-builtins/builtins-test/benches/float_trunc.rs
+++ b/library/compiler-builtins/builtins-test/benches/float_trunc.rs
@@ -11,7 +11,7 @@ float_bench! {
     sig: (a: f32) -> f16,
     crate_fn: trunc::__truncsfhf2,
     sys_fn: __truncsfhf2,
-    sys_available: not(feature = "no-sys-f16"),
+    sys_available: not(no_sys_f16),
     asm: [
         #[cfg(target_arch = "aarch64")] {
             let ret: f16;
@@ -33,7 +33,7 @@ float_bench! {
     sig: (a: f64) -> f16,
     crate_fn: trunc::__truncdfhf2,
     sys_fn: __truncdfhf2,
-    sys_available: not(feature = "no-sys-f16-f64-convert"),
+    sys_available: not(no_sys_f16_f64_convert),
     asm: [
         #[cfg(target_arch = "aarch64")] {
             let ret: f16;
@@ -90,7 +90,7 @@ float_bench! {
     crate_fn_ppc: trunc::__trunckfhf2,
     sys_fn: __trunctfhf2,
     sys_fn_ppc: __trunckfhf2,
-    sys_available: not(feature = "no-sys-f16-f128-convert"),
+    sys_available: not(no_sys_f16_f128_convert),
     asm: [],
 }
 
@@ -102,7 +102,7 @@ float_bench! {
     crate_fn_ppc: trunc::__trunckfsf2,
     sys_fn: __trunctfsf2,
     sys_fn_ppc: __trunckfsf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: [],
 }
 
@@ -114,7 +114,7 @@ float_bench! {
     crate_fn_ppc: trunc::__trunckfdf2,
     sys_fn: __trunctfdf2,
     sys_fn_ppc: __trunckfdf2,
-    sys_available: not(feature = "no-sys-f128"),
+    sys_available: not(no_sys_f128),
     asm: [],
 }
 
diff --git a/library/compiler-builtins/builtins-test/benches/mem_icount.rs b/library/compiler-builtins/builtins-test/benches/mem_icount.rs
index 37595e8258436..7a3cad09b4044 100644
--- a/library/compiler-builtins/builtins-test/benches/mem_icount.rs
+++ b/library/compiler-builtins/builtins-test/benches/mem_icount.rs
@@ -118,7 +118,7 @@ mod mcpy {
         }
     }
 
-    library_benchmark_group!(name = memcpy; benchmarks = bench_cpy);
+    library_benchmark_group!(name = memcpy, benchmarks = [bench_cpy]);
 }
 
 mod mset {
@@ -167,7 +167,7 @@ mod mset {
         }
     }
 
-    library_benchmark_group!(name = memset; benchmarks = bench_set);
+    library_benchmark_group!(name = memset, benchmarks = [bench_set]);
 }
 
 mod mcmp {
@@ -235,7 +235,7 @@ mod mcmp {
         }
     }
 
-    library_benchmark_group!(name = memcmp; benchmarks = bench_cmp);
+    library_benchmark_group!(name = memcmp, benchmarks = [bench_cmp]);
 }
 
 mod mmove {
@@ -489,7 +489,7 @@ mod mmove {
         }
     }
 
-    library_benchmark_group!(name = memmove; benchmarks = forward_move, backward_move);
+    library_benchmark_group!(name = memmove, benchmarks = [forward_move, backward_move]);
 }
 
 use mcmp::memcmp;
@@ -497,4 +497,4 @@ use mcpy::memcpy;
 use mmove::memmove;
 use mset::memset;
 
-main!(library_benchmark_groups = memcpy, memset, memcmp, memmove);
+main!(library_benchmark_groups = [memcpy, memset, memcmp, memmove]);
diff --git a/library/compiler-builtins/builtins-test/build.rs b/library/compiler-builtins/builtins-test/build.rs
index 5b2dcd12ef86f..133186bc7f57d 100644
--- a/library/compiler-builtins/builtins-test/build.rs
+++ b/library/compiler-builtins/builtins-test/build.rs
@@ -1,12 +1,13 @@
+#[path = "../libm/configure.rs"]
+mod configure;
+
 use std::collections::HashSet;
 
-mod builtins_configure {
-    include!("../compiler-builtins/configure.rs");
-}
+use configure::{Config, Library, set_cfg};
 
 /// Features to enable
 #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-enum Feature {
+enum SetCfg {
     NoSysF128,
     NoSysF128IntConvert,
     NoSysF16,
@@ -14,7 +15,15 @@ enum Feature {
     NoSysF16F128Convert,
 }
 
-impl Feature {
+impl SetCfg {
+    const ALL: &[Self] = &[
+        Self::NoSysF128,
+        Self::NoSysF128IntConvert,
+        Self::NoSysF16,
+        Self::NoSysF16F64Convert,
+        Self::NoSysF16F128Convert,
+    ];
+
     fn implies(self) -> &'static [Self] {
         match self {
             Self::NoSysF128 => [Self::NoSysF128IntConvert, Self::NoSysF16F128Convert].as_slice(),
@@ -24,96 +33,89 @@ impl Feature {
             Self::NoSysF16F128Convert => [].as_slice(),
         }
     }
+
+    fn name(self) -> &'static str {
+        match self {
+            Self::NoSysF128 => "no_sys_f128",
+            Self::NoSysF128IntConvert => "no_sys_f128_int_convert",
+            Self::NoSysF16F64Convert => "no_sys_f16_f64_convert",
+            Self::NoSysF16F128Convert => "no_sys_f16_f128_convert",
+            Self::NoSysF16 => "no_sys_f16",
+        }
+    }
 }
 
 fn main() {
-    println!("cargo::rerun-if-changed=../configure.rs");
+    println!("cargo::rerun-if-changed=../libm/configure.rs");
 
-    let target = builtins_configure::Target::from_env();
-    let mut features = HashSet::new();
+    let cfg = Config::from_env(Library::BuiltinsTest);
+    configure::emit(&cfg);
+
+    let mut to_set = HashSet::new();
 
     // These platforms do not have f128 symbols available in their system libraries, so
     // skip related tests.
-    if target.arch == "arm"
-        || target.vendor == "apple"
-        || target.env == "msvc"
+    if cfg.target_arch == "arm"
+        || cfg.target_vendor == "apple"
+        || cfg.target_env == "msvc"
         // GCC and LLVM disagree on the ABI of `f16` and `f128` with MinGW. See
         // <https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115054>.
-        || (target.os == "windows" && target.env == "gnu")
+        || (cfg.target_os == "windows" && cfg.target_env == "gnu")
         // FIXME(llvm): There is an ABI incompatibility between GCC and Clang on 32-bit x86.
         // See <https://github.com/llvm/llvm-project/issues/77401>.
-        || target.arch == "x86"
+        || cfg.target_arch == "x86"
         // 32-bit PowerPC and 64-bit LE gets code generated that Qemu cannot handle. See
         // <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105635926>.
-        || target.arch == "powerpc"
-        || target.arch == "powerpc64le"
+        || cfg.target_arch == "powerpc"
+        || cfg.target_arch == "powerpc64le"
         // FIXME: We get different results from the builtin functions. See
         // <https://github.com/rust-lang/compiler-builtins/pull/606#issuecomment-2105657287>.
-        || target.arch == "powerpc64"
+        || cfg.target_arch == "powerpc64"
     {
-        features.insert(Feature::NoSysF128);
+        to_set.insert(SetCfg::NoSysF128);
     }
 
-    if target.arch == "x86" {
+    if cfg.target_arch == "x86" {
         // 32-bit x86 does not have `__fixunstfti`/`__fixtfti` but does have everything else
-        features.insert(Feature::NoSysF128IntConvert);
+        to_set.insert(SetCfg::NoSysF128IntConvert);
         // FIXME: 32-bit x86 has a bug in `f128 -> f16` system libraries
-        features.insert(Feature::NoSysF16F128Convert);
+        to_set.insert(SetCfg::NoSysF16F128Convert);
     }
 
     // These platforms do not have f16 symbols available in their system libraries, so
     // skip related tests. Most of these are missing `f16 <-> f32` conversion routines.
-    if (target.arch == "aarch64" && target.os == "linux")
-        || target.arch.starts_with("arm")
-        || target.arch == "powerpc"
-        || target.arch == "powerpc64"
-        || target.arch == "powerpc64le"
-        || target.arch == "loongarch64"
-        || (target.arch == "x86" && !target.has_feature("sse"))
-        || target.os == "windows"
+    if (cfg.target_arch == "aarch64" && cfg.target_os == "linux")
+        || cfg.target_arch.starts_with("arm")
+        || cfg.target_arch == "powerpc"
+        || cfg.target_arch == "powerpc64"
+        || cfg.target_arch == "powerpc64le"
+        || cfg.target_arch == "loongarch64"
+        || (cfg.target_arch == "x86" && !cfg.has_target_feature("sse"))
+        || cfg.target_os == "windows"
         // Linking says "error: function signature mismatch: __extendhfsf2" and seems to
         // think the signature is either `(i32) -> f32` or `(f32) -> f32`. See
         // <https://github.com/llvm/llvm-project/issues/96438>.
-        || target.arch == "wasm32"
-        || target.arch == "wasm64"
+        || cfg.target_arch == "wasm32"
+        || cfg.target_arch == "wasm64"
     {
-        features.insert(Feature::NoSysF16);
+        to_set.insert(SetCfg::NoSysF16);
     }
 
     // These platforms are missing either `__extendhfdf2` or `__truncdfhf2`.
-    if target.vendor == "apple" || target.os == "windows" {
-        features.insert(Feature::NoSysF16F64Convert);
+    if cfg.target_vendor == "apple" || cfg.target_os == "windows" {
+        to_set.insert(SetCfg::NoSysF16F64Convert);
     }
 
     // Add implied features. Collection is required for borrows.
-    features.extend(
-        features
+    to_set.extend(
+        to_set
             .iter()
             .flat_map(|x| x.implies())
             .copied()
             .collect::<Vec<_>>(),
     );
 
-    for feature in features {
-        let (name, warning) = match feature {
-            Feature::NoSysF128 => ("no-sys-f128", "using apfloat fallback for f128"),
-            Feature::NoSysF128IntConvert => (
-                "no-sys-f128-int-convert",
-                "using apfloat fallback for f128 <-> int conversions",
-            ),
-            Feature::NoSysF16F64Convert => (
-                "no-sys-f16-f64-convert",
-                "using apfloat fallback for f16 <-> f64 conversions",
-            ),
-            Feature::NoSysF16F128Convert => (
-                "no-sys-f16-f128-convert",
-                "using apfloat fallback for f16 <-> f128 conversions",
-            ),
-            Feature::NoSysF16 => ("no-sys-f16", "using apfloat fallback for f16"),
-        };
-        println!("cargo:warning={warning}");
-        println!("cargo:rustc-cfg=feature=\"{name}\"");
+    for cfg in SetCfg::ALL {
+        set_cfg(cfg.name(), to_set.contains(cfg));
     }
-
-    builtins_configure::configure_aliases(&target);
 }
diff --git a/library/compiler-builtins/builtins-test/src/bench.rs b/library/compiler-builtins/builtins-test/src/bench.rs
index 4bdcf482cd619..dd03579285cbc 100644
--- a/library/compiler-builtins/builtins-test/src/bench.rs
+++ b/library/compiler-builtins/builtins-test/src/bench.rs
@@ -1,7 +1,7 @@
 use alloc::vec::Vec;
 use core::cell::RefCell;
 
-use compiler_builtins::float::Float;
+use compiler_builtins::support::Float;
 
 /// Fuzz with these many items to ensure equal functions
 pub const CHECK_ITER_ITEMS: u32 = 10_000;
@@ -43,7 +43,7 @@ pub fn skip_sys_checks(test_name: &str) -> bool {
         return true;
     }
 
-    if cfg!(x86_no_sse) && X86_NO_SSE_SKIPPED.contains(&test_name) {
+    if cfg!(x86_no_sse2) && X86_NO_SSE_SKIPPED.contains(&test_name) {
         return true;
     }
 
diff --git a/library/compiler-builtins/builtins-test/src/lib.rs b/library/compiler-builtins/builtins-test/src/lib.rs
index b9ad649f88dd7..70478f5f6d745 100644
--- a/library/compiler-builtins/builtins-test/src/lib.rs
+++ b/library/compiler-builtins/builtins-test/src/lib.rs
@@ -19,8 +19,7 @@
 pub mod bench;
 extern crate alloc;
 
-use compiler_builtins::float::Float;
-use compiler_builtins::int::{Int, MinInt};
+use compiler_builtins::support::{Float, Int, MinInt};
 use rand_xoshiro::Xoshiro128StarStar;
 use rand_xoshiro::rand_core::{Rng, SeedableRng};
 
@@ -245,18 +244,18 @@ fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) {
     let sign = (rng32 & 1) != 0;
 
     // exponent fuzzing. Only 4 bits for the selector needed.
-    let ones = (F::Int::ONE << F::EXP_BITS) - F::Int::ONE;
+    let ones = F::EXP_SAT;
     let r0 = (rng32 >> 1) % F::EXP_BITS;
     let r1 = (rng32 >> 5) % F::EXP_BITS;
     // custom rotate shift. Note that `F::Int` is unsigned, so we can shift right without smearing
     // the sign bit.
     let mask = if r1 == 0 {
-        ones.wrapping_shr(r0)
+        ones >> r0
     } else {
-        let tmp = ones.wrapping_shr(r0);
-        (tmp.wrapping_shl(r1) | tmp.wrapping_shr(F::EXP_BITS - r1)) & ones
+        let tmp = ones >> r0;
+        ((tmp << r1) | (tmp >> (F::EXP_BITS - r1))) & ones
     };
-    let mut exp = (f.to_bits() & F::EXP_MASK) >> F::SIG_BITS;
+    let mut exp = f.ex();
     match (rng32 >> 9) % 4 {
         0 => exp |= mask,
         1 => exp &= mask,
@@ -274,13 +273,13 @@ fn fuzz_float_step<F: Float>(rng: &mut Xoshiro128StarStar, f: &mut F) {
 macro_rules! float_edge_cases {
     ($F:ident, $case:ident, $inner:block) => {
         for exponent in [
-            F::Int::ZERO,
-            F::Int::ONE,
-            F::Int::ONE << (F::EXP_BITS / 2),
-            (F::Int::ONE << (F::EXP_BITS - 1)) - F::Int::ONE,
-            F::Int::ONE << (F::EXP_BITS - 1),
-            (F::Int::ONE << (F::EXP_BITS - 1)) + F::Int::ONE,
-            (F::Int::ONE << F::EXP_BITS) - F::Int::ONE,
+            0,
+            1,
+            1 << (F::EXP_BITS / 2),
+            (1 << (F::EXP_BITS - 1)) - 1,
+            1 << (F::EXP_BITS - 1),
+            (1 << (F::EXP_BITS - 1)) + 1,
+            (1 << F::EXP_BITS) - 1,
         ]
         .iter()
         {
diff --git a/library/compiler-builtins/builtins-test/tests/addsub.rs b/library/compiler-builtins/builtins-test/tests/addsub.rs
index 410967967d2a3..859de3e425538 100644
--- a/library/compiler-builtins/builtins-test/tests/addsub.rs
+++ b/library/compiler-builtins/builtins-test/tests/addsub.rs
@@ -87,7 +87,8 @@ macro_rules! float_sum {
             #[test]
             fn $fn_add() {
                 use core::ops::{Add, Sub};
-                use compiler_builtins::float::{{add::$fn_add, sub::$fn_sub}, Float};
+                use compiler_builtins::float::{add::$fn_add, sub::$fn_sub};
+                use compiler_builtins::support::Float;
 
                 fuzz_float_2(N, |x: $f, y: $f| {
                     let add0 = apfloat_fallback!($f, $apfloat_ty, $sys_available, Add::add, x, y);
@@ -112,7 +113,7 @@ macro_rules! float_sum {
     }
 }
 
-#[cfg(not(x86_no_sse))]
+#[cfg(not(x86_no_sse2))]
 mod float_addsub {
     use super::*;
 
@@ -127,15 +128,15 @@ mod float_addsub {
     }
 
     #[cfg(f128_enabled)]
-    #[cfg(not(x86_no_sse))]
+    #[cfg(not(x86_no_sse2))]
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
     float_sum! {
-        f128, __addtf3, __subtf3, Quad, not(feature = "no-sys-f128");
+        f128, __addtf3, __subtf3, Quad, not(no_sys_f128);
     }
 
     #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     float_sum! {
-        f128, __addkf3, __subkf3, Quad, not(feature = "no-sys-f128");
+        f128, __addkf3, __subkf3, Quad, not(no_sys_f128);
     }
 }
diff --git a/library/compiler-builtins/builtins-test/tests/big.rs b/library/compiler-builtins/builtins-test/tests/big.rs
deleted file mode 100644
index d1ae88bd16485..0000000000000
--- a/library/compiler-builtins/builtins-test/tests/big.rs
+++ /dev/null
@@ -1,134 +0,0 @@
-use compiler_builtins::int::{HInt, MinInt, i256, u256};
-
-const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
-
-/// Print a `u256` as hex since we can't add format implementations
-fn hexu(v: u256) -> String {
-    format!(
-        "0x{:016x}{:016x}{:016x}{:016x}",
-        v.0[3], v.0[2], v.0[1], v.0[0]
-    )
-}
-
-#[test]
-fn widen_u128() {
-    assert_eq!(u128::MAX.widen(), u256([u64::MAX, u64::MAX, 0, 0]));
-    assert_eq!(
-        LOHI_SPLIT.widen(),
-        u256([u64::MAX, 0xaaaaaaaaaaaaaaaa, 0, 0])
-    );
-}
-
-#[test]
-fn widen_i128() {
-    assert_eq!((-1i128).widen(), u256::MAX.signed());
-    assert_eq!(
-        (LOHI_SPLIT as i128).widen(),
-        i256([u64::MAX, 0xaaaaaaaaaaaaaaaa, u64::MAX, u64::MAX])
-    );
-    assert_eq!((-1i128).zero_widen().unsigned(), (u128::MAX).widen());
-}
-
-#[test]
-fn widen_mul_u128() {
-    let tests = [
-        (u128::MAX / 2, 2_u128, u256([u64::MAX - 1, u64::MAX, 0, 0])),
-        (u128::MAX, 2_u128, u256([u64::MAX - 1, u64::MAX, 1, 0])),
-        (u128::MAX, u128::MAX, u256([1, 0, u64::MAX - 1, u64::MAX])),
-        (u128::MIN, u128::MIN, u256::ZERO),
-        (1234, 0, u256::ZERO),
-        (0, 1234, u256::ZERO),
-    ];
-
-    let mut errors = Vec::new();
-    for (i, (a, b, exp)) in tests.iter().copied().enumerate() {
-        let res = a.widen_mul(b);
-        let res_z = a.zero_widen_mul(b);
-        assert_eq!(res, res_z);
-        if res != exp {
-            errors.push((i, a, b, exp, res));
-        }
-    }
-
-    for (i, a, b, exp, res) in &errors {
-        eprintln!(
-            "FAILURE ({i}): {a:#034x} * {b:#034x} = {} got {}",
-            hexu(*exp),
-            hexu(*res)
-        );
-    }
-    assert!(errors.is_empty());
-}
-
-#[test]
-fn not_u128() {
-    assert_eq!(!u256::ZERO, u256::MAX);
-}
-
-#[test]
-fn shr_u128() {
-    let only_low = [
-        1,
-        u16::MAX.into(),
-        u32::MAX.into(),
-        u64::MAX.into(),
-        u128::MAX,
-    ];
-
-    let mut errors = Vec::new();
-
-    for a in only_low {
-        for perturb in 0..10 {
-            let a = a.saturating_add(perturb);
-            for shift in 0..128 {
-                let res = a.widen() >> shift;
-                let expected = (a >> shift).widen();
-                if res != expected {
-                    errors.push((a.widen(), shift, res, expected));
-                }
-            }
-        }
-    }
-
-    let check = [
-        (
-            u256::MAX,
-            1,
-            u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]),
-        ),
-        (
-            u256::MAX,
-            5,
-            u256([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 5]),
-        ),
-        (u256::MAX, 63, u256([u64::MAX, u64::MAX, u64::MAX, 1])),
-        (u256::MAX, 64, u256([u64::MAX, u64::MAX, u64::MAX, 0])),
-        (u256::MAX, 65, u256([u64::MAX, u64::MAX, u64::MAX >> 1, 0])),
-        (u256::MAX, 127, u256([u64::MAX, u64::MAX, 1, 0])),
-        (u256::MAX, 128, u256([u64::MAX, u64::MAX, 0, 0])),
-        (u256::MAX, 129, u256([u64::MAX, u64::MAX >> 1, 0, 0])),
-        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
-        (u256::MAX, 192, u256([u64::MAX, 0, 0, 0])),
-        (u256::MAX, 193, u256([u64::MAX >> 1, 0, 0, 0])),
-        (u256::MAX, 191, u256([u64::MAX, 1, 0, 0])),
-        (u256::MAX, 254, u256([0b11, 0, 0, 0])),
-        (u256::MAX, 255, u256([1, 0, 0, 0])),
-    ];
-
-    for (input, shift, expected) in check {
-        let res = input >> shift;
-        if res != expected {
-            errors.push((input, shift, res, expected));
-        }
-    }
-
-    for (a, b, res, expected) in &errors {
-        eprintln!(
-            "FAILURE: {} >> {b} = {} got {}",
-            hexu(*a),
-            hexu(*expected),
-            hexu(*res),
-        );
-    }
-    assert!(errors.is_empty());
-}
diff --git a/library/compiler-builtins/builtins-test/tests/cmp.rs b/library/compiler-builtins/builtins-test/tests/cmp.rs
index 4b01b6ca1c7d7..d9360027a9643 100644
--- a/library/compiler-builtins/builtins-test/tests/cmp.rs
+++ b/library/compiler-builtins/builtins-test/tests/cmp.rs
@@ -125,19 +125,19 @@ mod float_comparisons {
 
         fuzz_float_2(N, |x: f128, y: f128| {
             let x_is_nan = apfloat_fallback!(
-                f128, Quad, not(feature = "no-sys-f128"),
+                f128, Quad, not(no_sys_f128),
                 |x: FloatTy| x.is_nan() => no_convert,
                 x
             );
             let y_is_nan = apfloat_fallback!(
-                f128, Quad, not(feature = "no-sys-f128"),
+                f128, Quad, not(no_sys_f128),
                 |x: FloatTy| x.is_nan() => no_convert,
                 y
             );
 
             assert_eq!(__unordtf2(x, y) != 0, x_is_nan || y_is_nan);
 
-            cmp!(f128, x, y, Quad, not(feature = "no-sys-f128"),
+            cmp!(f128, x, y, Quad, not(no_sys_f128),
                 1, __lttf2;
                 1, __letf2;
                 1, __eqtf2;
diff --git a/library/compiler-builtins/builtins-test/tests/conv.rs b/library/compiler-builtins/builtins-test/tests/conv.rs
index 0fd15ad3ee662..e01eb259ac869 100644
--- a/library/compiler-builtins/builtins-test/tests/conv.rs
+++ b/library/compiler-builtins/builtins-test/tests/conv.rs
@@ -6,7 +6,7 @@
 #![allow(unused_macros)]
 
 use builtins_test::*;
-use compiler_builtins::float::Float;
+use compiler_builtins::support::Float;
 use rustc_apfloat::{Float as _, FloatConvert as _};
 
 mod i_to_f {
@@ -18,7 +18,7 @@ mod i_to_f {
                 #[test]
                 fn $fn() {
                     use compiler_builtins::float::conv::$fn;
-                    use compiler_builtins::int::Int;
+                    use compiler_builtins::support::Int;
 
                     fuzz(N, |x: $i_ty| {
                         let f0 = apfloat_fallback!(
@@ -27,7 +27,7 @@ mod i_to_f {
                             // When the builtin is not available, we need to use a different conversion
                             // method (since apfloat doesn't support `as` casting).
                             |x: $i_ty| {
-                                use compiler_builtins::int::MinInt;
+                                use compiler_builtins::support::MinInt;
 
                                 let apf = if <$i_ty>::SIGNED {
                                     FloatTy::from_i128(x.try_into().unwrap()).value
@@ -117,7 +117,7 @@ mod i_to_f {
 
     #[cfg(f128_enabled)]
     #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
-    i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
+    i_to_f! { f128, Quad, not(no_sys_f128_int_convert),
         u32, __floatunsitf;
         i32, __floatsitf;
         u64, __floatunditf;
@@ -128,7 +128,7 @@ mod i_to_f {
 
     #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
-    i_to_f! { f128, Quad, not(feature = "no-sys-f128-int-convert"),
+    i_to_f! { f128, Quad, not(no_sys_f128_int_convert),
         u32, __floatunsikf;
         i32, __floatsikf;
         u64, __floatundikf;
@@ -155,7 +155,7 @@ mod f_to_i {
                         // When the builtin is not available, we need to use a different conversion
                         // method (since apfloat doesn't support `as` casting).
                         |x: $f_ty| {
-                            use compiler_builtins::int::MinInt;
+                            use compiler_builtins::support::MinInt;
 
                             let apf = FloatTy::from_bits(x.to_bits().into());
                             let bits: usize = <$i_ty>::BITS.try_into().unwrap();
@@ -236,7 +236,7 @@ mod f_to_i {
                 x,
                 f128,
                 Quad,
-                not(feature = "no-sys-f128-int-convert"),
+                not(no_sys_f128_int_convert),
                 u32, __fixunstfsi;
                 u64, __fixunstfdi;
                 u128, __fixunstfti;
@@ -259,7 +259,8 @@ macro_rules! f_to_f {
     ) => {$(
         #[test]
         fn $fn() {
-            use compiler_builtins::float::{$mod::$fn, Float};
+            use compiler_builtins::float::$mod::$fn;
+            use compiler_builtins::support::Float;
             use rustc_apfloat::ieee::{$from_ap_ty, $to_ap_ty};
 
             fuzz_float(N, |x: $from_ty| {
@@ -308,12 +309,12 @@ mod extend {
     )))]
     f_to_f! {
         extend,
-        f16 => f32, Half => Single, __extendhfsf2, not(feature = "no-sys-f16");
-        f16 => f32, Half => Single, __gnu_h2f_ieee, not(feature = "no-sys-f16");
-        f16 => f64, Half => Double, __extendhfdf2, not(feature = "no-sys-f16-f64-convert");
-        f16 => f128, Half => Quad, __extendhftf2, not(feature = "no-sys-f16-f128-convert");
-        f32 => f128, Single => Quad, __extendsftf2, not(feature = "no-sys-f128");
-        f64 => f128, Double => Quad, __extenddftf2, not(feature = "no-sys-f128");
+        f16 => f32, Half => Single, __extendhfsf2, not(no_sys_f16);
+        f16 => f32, Half => Single, __gnu_h2f_ieee, not(no_sys_f16);
+        f16 => f64, Half => Double, __extendhfdf2, not(no_sys_f16_f64_convert);
+        f16 => f128, Half => Quad, __extendhftf2, not(no_sys_f16_f128_convert);
+        f32 => f128, Single => Quad, __extendsftf2, not(no_sys_f128);
+        f64 => f128, Double => Quad, __extenddftf2, not(no_sys_f128);
     }
 
     #[cfg(f128_enabled)]
@@ -321,8 +322,8 @@ mod extend {
     f_to_f! {
         extend,
         // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
-        f32 => f128, Single => Quad, __extendsfkf2, not(feature = "no-sys-f128");
-        f64 => f128, Double => Quad, __extenddfkf2, not(feature = "no-sys-f128");
+        f32 => f128, Single => Quad, __extendsfkf2, not(no_sys_f128);
+        f64 => f128, Double => Quad, __extenddfkf2, not(no_sys_f128);
     }
 }
 
@@ -342,12 +343,12 @@ mod trunc {
     )))]
     f_to_f! {
         trunc,
-        f32 => f16, Single => Half, __truncsfhf2, not(feature = "no-sys-f16");
-        f32 => f16, Single => Half, __gnu_f2h_ieee, not(feature = "no-sys-f16");
-        f64 => f16, Double => Half, __truncdfhf2, not(feature = "no-sys-f16-f64-convert");
-        f128 => f16, Quad => Half, __trunctfhf2, not(feature = "no-sys-f16-f128-convert");
-        f128 => f32, Quad => Single, __trunctfsf2, not(feature = "no-sys-f128");
-        f128 => f64, Quad => Double, __trunctfdf2, not(feature = "no-sys-f128");
+        f32 => f16, Single => Half, __truncsfhf2, not(no_sys_f16);
+        f32 => f16, Single => Half, __gnu_f2h_ieee, not(no_sys_f16);
+        f64 => f16, Double => Half, __truncdfhf2, not(no_sys_f16_f64_convert);
+        f128 => f16, Quad => Half, __trunctfhf2, not(no_sys_f16_f128_convert);
+        f128 => f32, Quad => Single, __trunctfsf2, not(no_sys_f128);
+        f128 => f64, Quad => Double, __trunctfdf2, not(no_sys_f128);
     }
 
     #[cfg(f128_enabled)]
@@ -355,7 +356,7 @@ mod trunc {
     f_to_f! {
         trunc,
         // FIXME(#655): `f16` tests disabled until we can bootstrap symbols
-        f128 => f32, Quad => Single, __trunckfsf2, not(feature = "no-sys-f128");
-        f128 => f64, Quad => Double, __trunckfdf2, not(feature = "no-sys-f128");
+        f128 => f32, Quad => Single, __trunckfsf2, not(no_sys_f128);
+        f128 => f64, Quad => Double, __trunckfdf2, not(no_sys_f128);
     }
 }
diff --git a/library/compiler-builtins/builtins-test/tests/div_rem.rs b/library/compiler-builtins/builtins-test/tests/div_rem.rs
index 4ff86385a9630..5d04fe9c0a9b1 100644
--- a/library/compiler-builtins/builtins-test/tests/div_rem.rs
+++ b/library/compiler-builtins/builtins-test/tests/div_rem.rs
@@ -109,7 +109,8 @@ macro_rules! float {
         $(
             #[test]
             fn $fn() {
-                use compiler_builtins::float::{div::$fn, Float};
+                use compiler_builtins::float::div::$fn;
+                use compiler_builtins::support::Float;
                 use core::ops::Div;
 
                 fuzz_float_2(N, |x: $f, y: $f| {
@@ -138,7 +139,7 @@ macro_rules! float {
     };
 }
 
-#[cfg(not(x86_no_sse))]
+#[cfg(not(x86_no_sse2))]
 mod float_div {
     use super::*;
 
@@ -153,12 +154,12 @@ mod float_div {
         f128, __divtf3, Quad,
         // FIXME(llvm): there is a bug in LLVM rt.
         // See <https://github.com/llvm/llvm-project/issues/91840>.
-        not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
+        not(any(no_sys_f128, all(target_arch = "aarch64", target_os = "linux")));
     }
 
     #[cfg(f128_enabled)]
     #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
     float! {
-        f128, __divkf3, Quad, not(feature = "no-sys-f128");
+        f128, __divkf3, Quad, not(no_sys_f128);
     }
 }
diff --git a/library/compiler-builtins/builtins-test/tests/float_pow.rs b/library/compiler-builtins/builtins-test/tests/float_pow.rs
index 3cea83a255c8c..c808b9efae65f 100644
--- a/library/compiler-builtins/builtins-test/tests/float_pow.rs
+++ b/library/compiler-builtins/builtins-test/tests/float_pow.rs
@@ -1,7 +1,7 @@
 #![allow(unused_macros, unused_features)]
 #![cfg_attr(f128_enabled, feature(f128))]
 
-#[cfg_attr(x86_no_sse, allow(unused))]
+#[cfg_attr(x86_no_sse2, allow(unused))]
 use builtins_test::*;
 
 // This is approximate because of issues related to
@@ -16,7 +16,8 @@ macro_rules! pow {
             #[cfg($sys_available)]
             fn $fn() {
                 use compiler_builtins::float::pow::$fn;
-                use compiler_builtins::float::Float;
+                use compiler_builtins::support::Float;
+
                 fuzz_float_2(N, |x: $f, y: $f| {
                     if !(Float::is_subnormal(x) || Float::is_subnormal(y) || x.is_nan()) {
                         let n = y.to_bits() & !<$f as Float>::SIG_MASK;
@@ -52,7 +53,7 @@ macro_rules! pow {
     };
 }
 
-#[cfg(not(x86_no_sse))] // FIXME(i586): failure for powidf2
+#[cfg(not(x86_no_sse2))] // FIXME(i586): failure for powidf2
 pow! {
     f32, 1e-4, __powisf2, all();
     f64, 1e-12, __powidf2, all();
@@ -61,11 +62,11 @@ pow! {
 #[cfg(f128_enabled)]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 pow! {
-    f128, 1e-36, __powitf2, not(feature = "no-sys-f128");
+    f128, 1e-36, __powitf2, not(no_sys_f128);
 }
 
 #[cfg(f128_enabled)]
 #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
 pow! {
-    f128, 1e-36, __powikf2, not(feature = "no-sys-f128");
+    f128, 1e-36, __powikf2, not(no_sys_f128);
 }
diff --git a/library/compiler-builtins/builtins-test/tests/lse.rs b/library/compiler-builtins/builtins-test/tests/lse.rs
index d408fe193bcff..20d4d19a328a1 100644
--- a/library/compiler-builtins/builtins-test/tests/lse.rs
+++ b/library/compiler-builtins/builtins-test/tests/lse.rs
@@ -1,12 +1,12 @@
 #![allow(unused_features)]
 #![feature(decl_macro)] // so we can use pub(super)
 #![feature(macro_metavar_expr_concat)]
-#![cfg(all(target_arch = "aarch64", feature = "mangled-names"))]
+#![cfg(target_arch = "aarch64")]
 
 use std::sync::Mutex;
 
 use compiler_builtins::aarch64_outline_atomics::{get_have_lse_atomics, set_have_lse_atomics};
-use compiler_builtins::int::{Int, MinInt};
+use compiler_builtins::support::{Int, MinInt};
 use compiler_builtins::{foreach_bytes, foreach_ordering};
 
 #[track_caller]
diff --git a/library/compiler-builtins/builtins-test/tests/mul.rs b/library/compiler-builtins/builtins-test/tests/mul.rs
index 30516ebaf7866..b16d729be1dd5 100644
--- a/library/compiler-builtins/builtins-test/tests/mul.rs
+++ b/library/compiler-builtins/builtins-test/tests/mul.rs
@@ -96,7 +96,8 @@ macro_rules! float_mul {
         $(
             #[test]
             fn $fn() {
-                use compiler_builtins::float::{mul::$fn, Float};
+                use compiler_builtins::float::mul::$fn;
+                use compiler_builtins::support::Float;
                 use core::ops::Mul;
 
                 fuzz_float_2(N, |x: $f, y: $f| {
@@ -114,7 +115,7 @@ macro_rules! float_mul {
     };
 }
 
-#[cfg(not(x86_no_sse))]
+#[cfg(not(x86_no_sse2))]
 mod float_mul {
     use super::*;
 
@@ -132,7 +133,7 @@ mod float_mul {
 }
 
 #[cfg(f128_enabled)]
-#[cfg(not(x86_no_sse))]
+#[cfg(not(x86_no_sse2))]
 #[cfg(not(any(target_arch = "powerpc", target_arch = "powerpc64")))]
 mod float_mul_f128 {
     use super::*;
@@ -141,7 +142,7 @@ mod float_mul_f128 {
         f128, __multf3, Quad,
         // FIXME(llvm): there is a bug in LLVM rt.
         // See <https://github.com/llvm/llvm-project/issues/91840>.
-        not(any(feature = "no-sys-f128", all(target_arch = "aarch64", target_os = "linux")));
+        not(any(no_sys_f128, all(target_arch = "aarch64", target_os = "linux")));
     }
 }
 
@@ -151,6 +152,6 @@ mod float_mul_f128_ppc {
     use super::*;
 
     float_mul! {
-        f128, __mulkf3, Quad, not(feature = "no-sys-f128");
+        f128, __mulkf3, Quad, not(no_sys_f128);
     }
 }
diff --git a/library/compiler-builtins/ci/bench-icount.sh b/library/compiler-builtins/ci/bench-icount.sh
index 6d92b50a6dae7..cd36303ca316a 100755
--- a/library/compiler-builtins/ci/bench-icount.sh
+++ b/library/compiler-builtins/ci/bench-icount.sh
@@ -26,12 +26,20 @@ tag="$(echo "$target" | cut -d'-' -f1)"
 # after the first run with gungraun.
 [ -d "iai-home" ] && mv "iai-home" "$gungraun_home"
 
+failed="0"
+
 # Run benchmarks once
 function run_icount_benchmarks() {
     cargo_args=(
+        "--target" "$target"
         "--bench" "*icount*"
         "--no-default-features"
-        "--features" "unstable,unstable-float,icount"
+        "--features" "unstable unstable-float icount"
+        # Enable unmangled-names so our compiler-builtins gets used for
+        # intrinsics. This makes performance impacts of c-b changes show up
+        # in libm benchmarks and gives us a better idea of what will happen
+        # in std (e.g. speedups in __addtf3 will show up in fmaf128).
+        "--features" "compiler_builtins/unmangled-names"
     )
 
     gungraun_args=(
@@ -65,13 +73,18 @@ function run_icount_benchmarks() {
         # Disregard regressions after merge
         echo "Benchmarks completed with regressions; ignoring (not in a PR)"
     else
-        ./ci/ci-util.py handle-bench-regressions "$PR_NUMBER"
+        ./ci/ci-util.py handle-bench-regressions "$PR_NUMBER" || failed="1"
     fi
 }
 
 # Run once with softfloats, once with arch instructions enabled
-run_icount_benchmarks --features force-soft-floats -- --save-baseline=softfloat
-run_icount_benchmarks -- --save-baseline=hardfloat
+run_icount_benchmarks                 -- --save-baseline=arch_disabled
+run_icount_benchmarks --features arch -- --save-baseline=arch_enabled
+
+if [ "$failed" != "0" ]; then
+    echo "One or more benchmarks failed"
+    exit 1
+fi
 
 # Name and tar the new baseline
 name="baseline-icount-$tag-$(date -u +'%Y%m%d%H%M')-${GITHUB_SHA:0:12}"
diff --git a/library/compiler-builtins/ci/bench-walltime.sh b/library/compiler-builtins/ci/bench-walltime.sh
index 0393d02dfc452..5d564c97beb35 100755
--- a/library/compiler-builtins/ci/bench-walltime.sh
+++ b/library/compiler-builtins/ci/bench-walltime.sh
@@ -6,4 +6,4 @@
 export LIBM_SEED=benchesbenchesbenchesbencheswoo!
 cargo bench --package libm-test \
     --no-default-features \
-    --features walltime,short-benchmarks,build-musl,libm/force-soft-floats
+    --features walltime,short-benchmarks,build-musl
diff --git a/library/compiler-builtins/ci/ci-util.py b/library/compiler-builtins/ci/ci-util.py
index 392f83c219e7a..f359c59797448 100755
--- a/library/compiler-builtins/ci/ci-util.py
+++ b/library/compiler-builtins/ci/ci-util.py
@@ -361,6 +361,10 @@ def base_name(name: str) -> tuple[str, str]:
         return (name.rstrip("f"), "f32")
     elif name.endswith("f16"):
         return (name.rstrip("f16"), "f16")
+    elif name.endswith("f32"):
+        return (name.rstrip("f32"), "f32")
+    elif name.endswith("f64"):
+        return (name.rstrip("f64"), "f64")
     elif name.endswith("f128"):
         return (name.rstrip("f128"), "f128")
 
diff --git a/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
index 683bd07fd47ef..30a13fc5de910 100644
--- a/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/aarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
index 781abd1b6e888..41ff36a49e3bb 100644
--- a/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
+++ b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
index 36ea4827dc52f..1fad72c470f03 100644
--- a/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
+++ b/library/compiler-builtins/ci/docker/arm-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile b/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
index 8b76693b2799e..039ccd5745256 100644
--- a/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
+++ b/library/compiler-builtins/ci/docker/armv7-unknown-linux-gnueabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile
index 9125038acbde5..9319e73dd03f0 100644
--- a/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/i586-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile
index 9125038acbde5..9319e73dd03f0 100644
--- a/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/i686-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
index a652235958777..442a13164880c 100644
--- a/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/loongarch64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile
index 0913f33c05ce4..9941a8c2736c0 100644
--- a/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/mips-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile b/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
index d2f4e484b1aab..c20d0a77b81c3 100644
--- a/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
+++ b/library/compiler-builtins/ci/docker/mips64-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile b/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
index 873754b2793e9..584f7ffff45a5 100644
--- a/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
+++ b/library/compiler-builtins/ci/docker/mips64el-unknown-linux-gnuabi64/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
index 5768b68d6c950..ead99bb9c1132 100644
--- a/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/mipsel-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
index c625a4bcd5d7c..74071874ed7cf 100644
--- a/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/powerpc-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
index 86a7a8cd46e4e..ba4fec7160b64 100644
--- a/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/powerpc64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
index 722b10b0a7349..e90d4c8812042 100644
--- a/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/powerpc64le-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
index 7a721ba05416e..96442121bcd9e 100644
--- a/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/riscv64gc-unknown-linux-gnu/Dockerfile
@@ -1,11 +1,11 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     gcc libc6-dev qemu-user ca-certificates \
     gcc-riscv64-linux-gnu libc6-dev-riscv64-cross \
-    qemu-system-riscv64
+    qemu-system-riscv
 
 ENV TOOLCHAIN_PREFIX=riscv64-linux-gnu-
 ENV CARGO_TARGET_RISCV64GC_UNKNOWN_LINUX_GNU_LINKER="$TOOLCHAIN_PREFIX"gcc \
diff --git a/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile b/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile
index a1a6b3cf5cfd2..463cce94e5540 100644
--- a/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile
+++ b/library/compiler-builtins/ci/docker/thumbv6m-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile b/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile
index a1a6b3cf5cfd2..463cce94e5540 100644
--- a/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile
+++ b/library/compiler-builtins/ci/docker/thumbv7em-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile b/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile
index a1a6b3cf5cfd2..463cce94e5540 100644
--- a/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile
+++ b/library/compiler-builtins/ci/docker/thumbv7em-none-eabihf/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile b/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile
index a1a6b3cf5cfd2..463cce94e5540 100644
--- a/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile
+++ b/library/compiler-builtins/ci/docker/thumbv7m-none-eabi/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile b/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile
index b646a72bb37cc..09f35c3b128d0 100644
--- a/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile
+++ b/library/compiler-builtins/ci/docker/wasm32-unknown-unknown/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile b/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
index 927515f90f329..103c395ee8496 100644
--- a/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
+++ b/library/compiler-builtins/ci/docker/x86_64-unknown-linux-gnu/Dockerfile
@@ -1,4 +1,4 @@
-ARG IMAGE=ubuntu:25.10
+ARG IMAGE=ubuntu:26.04
 FROM $IMAGE
 
 RUN apt-get update && \
diff --git a/library/compiler-builtins/ci/download-compiler-rt.sh b/library/compiler-builtins/ci/download-compiler-rt.sh
index bf7f8c2489643..87b337d8255bb 100755
--- a/library/compiler-builtins/ci/download-compiler-rt.sh
+++ b/library/compiler-builtins/ci/download-compiler-rt.sh
@@ -6,5 +6,5 @@ set -eux
 
 rust_llvm_version=20.1-2025-02-13
 
-curl -L -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${rust_llvm_version}.tar.gz"
+curl -L --retry 3 -o code.tar.gz "https://github.com/rust-lang/llvm-project/archive/rustc/${rust_llvm_version}.tar.gz"
 tar xzf code.tar.gz --strip-components 1 llvm-project-rustc-${rust_llvm_version}/compiler-rt
diff --git a/library/compiler-builtins/ci/install-bench-deps.sh b/library/compiler-builtins/ci/install-bench-deps.sh
index 61f4723c0358d..7d73ed0d32f55 100755
--- a/library/compiler-builtins/ci/install-bench-deps.sh
+++ b/library/compiler-builtins/ci/install-bench-deps.sh
@@ -1,10 +1,22 @@
-#!/bin/sh
+#!/bin/bash
 # Install needed dependencies for gungraun.
 
+set -eux
+
+target="${1:-}"
+
+# Needed for gungraun
+deps=(valgrind gdb libc6-dbg)
+
+[[ "$target" = *"i686"* ]] && deps+=(gcc-multilib)
+
 sudo apt-get update
-sudo apt-get install -y valgrind gdb libc6-dbg # Needed for gungraun
+sudo apt-get install -y "${deps[@]}"
+
 rustup update "$BENCHMARK_RUSTC" --no-self-update
 rustup default "$BENCHMARK_RUSTC"
+[ -n "$target" ] && rustup target add "$target"
+
 # Install the version of gungraun-runner that is specified in Cargo.toml
 gungraun_version="$(cargo metadata --format-version=1 --features icount |
     jq -r '.packages[] | select(.name == "gungraun").version')"
diff --git a/library/compiler-builtins/ci/miri.sh b/library/compiler-builtins/ci/miri.sh
index aae474d884638..90b64934db0b5 100755
--- a/library/compiler-builtins/ci/miri.sh
+++ b/library/compiler-builtins/ci/miri.sh
@@ -13,10 +13,11 @@ targets=(
     s390x-unknown-linux-gnu
 )
 for target in "${targets[@]}"; do
-    # Only run the `mem` tests to avoid this taking too long.
+    # Only run the `mem` tests to avoid this taking too long. Disable default
+    # features to turn off `arch` and avoid inline assembly.
     cargo miri test \
         --manifest-path builtins-test/Cargo.toml \
-        --features no-asm \
+        --no-default-features \
         --target "$target" \
         -- mem
 done
diff --git a/library/compiler-builtins/ci/run-docker.sh b/library/compiler-builtins/ci/run-docker.sh
index e65ada271904f..a894677b59f90 100755
--- a/library/compiler-builtins/ci/run-docker.sh
+++ b/library/compiler-builtins/ci/run-docker.sh
@@ -78,6 +78,7 @@ run() {
         -e CI \
         -e CARGO_TARGET_DIR=/builtins-target \
         -e CARGO_TERM_COLOR \
+        -e LIBM_BUILD_VERBOSE \
         -e MAY_SKIP_LIBM_CI \
         -e RUSTFLAGS \
         -e RUST_BACKTRACE \
@@ -97,7 +98,7 @@ if [ "${1:-}" = "--help" ] || [ "$#" -gt 1 ]; then
     usage: ./ci/run-docker.sh [target]
 
     you can also set DOCKER_BASE_IMAGE to use something other than the default
-    ubuntu:25.10 (or rustlang/rust:nightly).
+    ubuntu:26.04 (or rustlang/rust:nightly).
     "
     exit
 fi
diff --git a/library/compiler-builtins/ci/run-extensive.sh b/library/compiler-builtins/ci/run-extensive.sh
index 4ba41a026fab6..9bdf8bcfbadcc 100755
--- a/library/compiler-builtins/ci/run-extensive.sh
+++ b/library/compiler-builtins/ci/run-extensive.sh
@@ -14,7 +14,15 @@ set -x
 test_cmd=(
     cargo test
     --package libm-test
-    --features "build-mpfr,libm/unstable,libm/force-soft-floats"
+    --no-default-features
+    # Don't enable `arch` for extensive tests. Usually anything in asm is
+    # only a single instruction or a small sequence, and we rely on the
+    # vendors to test that for us.
+    #
+    # libm/unstable enables libm/unstable-intrinsics, which means we usually
+    # get the single-instruction ops anyway when we aren't specifically
+    # testing for them.
+    --features "libm-test/build-mpfr libm-test/unstable-float libm/unstable"
     --profile release-checked
 )
 
diff --git a/library/compiler-builtins/ci/run.sh b/library/compiler-builtins/ci/run.sh
index b9a21d555c9e5..93c551dc52ec7 100755
--- a/library/compiler-builtins/ci/run.sh
+++ b/library/compiler-builtins/ci/run.sh
@@ -20,19 +20,45 @@ if [ "${USING_CONTAINER_RUSTC:-}" = 1 ]; then
         rustup target add "$target"
 fi
 
+# If nextest is available, use that
+command -v cargo-nextest && nextest=1 || nextest=0
+if [ "$nextest" = "1" ]; then
+    test_runner=(cargo nextest run --max-fail=20)
+    profile_flag="--cargo-profile"
+
+    # Workaround for https://github.com/nextest-rs/nextest/issues/2066
+    if [ -n "${CARGO_TARGET_DIR:-}" ]; then
+        cfg_file="/tmp/nextest-config.toml"
+        echo "[store]" >> "$cfg_file"
+        echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
+        test_runner+=(--config-file "$cfg_file")
+    fi
+
+    # Not all configurations have tests to run on wasm
+    [[ "$target" = *"wasm"* ]] && test_runner+=(--no-tests=warn)
+else
+    test_runner=(cargo test --no-fail-fast)
+    profile_flag="--profile"
+fi
+
 # Test our implementation
 if [ "${BUILD_ONLY:-}" = "1" ]; then
     echo "no tests to run for build-only targets"
 else
-    test_builtins=(cargo test --package builtins-test --no-fail-fast --target "$target")
+    test_builtins=(
+        "${test_runner[@]}"
+        --package builtins-test
+        --target "$target"
+    )
+
     "${test_builtins[@]}"
     "${test_builtins[@]}" --release
     "${test_builtins[@]}" --features c
     "${test_builtins[@]}" --features c --release
-    "${test_builtins[@]}" --features no-asm
-    "${test_builtins[@]}" --features no-asm --release
     "${test_builtins[@]}" --benches
     "${test_builtins[@]}" --benches --release
+    "${test_builtins[@]}" --no-default-features
+    "${test_builtins[@]}" --no-default-features --release
 
     # Validate that having a verbatim path for the target directory works
     # (trivial to regress using `/` in paths to build artifacts rather than
@@ -53,12 +79,14 @@ symcheck+=(-- --build-and-check --target "$target")
 # Executable section checks are meaningless on no-std targets
 [[ "$target" == *"-none"* ]] && symcheck+=(--no-os)
 
-"${symcheck[@]}" -- -p compiler_builtins
-"${symcheck[@]}" -- -p compiler_builtins --release
-"${symcheck[@]}" -- -p compiler_builtins --features c
-"${symcheck[@]}" -- -p compiler_builtins --features c --release
-"${symcheck[@]}" -- -p compiler_builtins --features no-asm
-"${symcheck[@]}" -- -p compiler_builtins --features no-asm --release
+# We only need to check the configurations std may use
+symcheck_cb_args=(-- --package compiler_builtins --features compiler-builtins)
+"${symcheck[@]}" "${symcheck_cb_args[@]}"
+"${symcheck[@]}" "${symcheck_cb_args[@]}" --release
+"${symcheck[@]}" "${symcheck_cb_args[@]}" --features c
+"${symcheck[@]}" "${symcheck_cb_args[@]}" --features c --release
+"${symcheck[@]}" "${symcheck_cb_args[@]}" --no-default-features
+"${symcheck[@]}" "${symcheck_cb_args[@]}" --no-default-features --release
 
 run_intrinsics_test() {
     build_args=(--verbose --manifest-path builtins-test-intrinsics/Cargo.toml)
@@ -100,13 +128,13 @@ mflags=()
 # We enumerate features manually.
 mflags+=(--no-default-features)
 
-# Enable arch-specific routines when available.
-mflags+=(--features arch)
-
 # Always enable `unstable-float` since it expands available API but does not
 # change any implementations.
 mflags+=(--features unstable-float)
 
+# This is a host program that may not run in containers.
+mflags+=(--exclude update-api-list)
+
 # We need to specifically skip tests for musl-math-sys on systems that can't
 # build musl since otherwise `--all` will activate it.
 case "$target" in
@@ -157,28 +185,7 @@ if [ "${BUILD_ONLY:-}" = "1" ]; then
 else
     # symcheck tests need specific env setup, and is already tested above
     mflags+=(--workspace --exclude symbol-check --target "$target")
-    cmd=(cargo test "${mflags[@]}")
-    profile_flag="--profile"
-
-    # If nextest is available, use that
-    command -v cargo-nextest && nextest=1 || nextest=0
-    if [ "$nextest" = "1" ]; then
-        cmd=(cargo nextest run --max-fail=10)
-
-        # Workaround for https://github.com/nextest-rs/nextest/issues/2066
-        if [ -f /.dockerenv ]; then
-            cfg_file="/tmp/nextest-config.toml"
-            echo "[store]" >> "$cfg_file"
-            echo "dir = \"$CARGO_TARGET_DIR/nextest\"" >> "$cfg_file"
-            cmd+=(--config-file "$cfg_file")
-        fi
-
-        # Not all configurations have tests to run on wasm
-        [[ "$target" = *"wasm"* ]] && cmd+=(--no-tests=warn)
-
-        cmd+=("${mflags[@]}")
-        profile_flag="--cargo-profile"
-    fi
+    cmd=("${test_runner[@]}" "${mflags[@]}")
 
     # Test once without intrinsics
     "${cmd[@]}"
@@ -191,15 +198,15 @@ else
     cmd+=(--exclude util --exclude libm-macros)
 
     # Test once with intrinsics enabled
-    "${cmd[@]}" --features unstable-intrinsics
-    "${cmd[@]}" --features unstable-intrinsics --benches
+    "${cmd[@]}" --features arch,unstable-intrinsics
+    "${cmd[@]}" --features arch,unstable-intrinsics --benches
 
     # Test the same in release mode, which also increases coverage. Also ensure
     # the soft float routines are checked.
     "${cmd[@]}" "$profile_flag" release-checked
-    "${cmd[@]}" "$profile_flag" release-checked --features force-soft-floats
-    "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics
-    "${cmd[@]}" "$profile_flag" release-checked --features unstable-intrinsics --benches
+    "${cmd[@]}" "$profile_flag" release-checked --features arch
+    "${cmd[@]}" "$profile_flag" release-checked --features arch,unstable-intrinsics
+    "${cmd[@]}" "$profile_flag" release-checked --features arch,unstable-intrinsics --benches
 
     # Ensure that the routines do not panic.
     #
diff --git a/library/compiler-builtins/compiler-builtins/Cargo.toml b/library/compiler-builtins/compiler-builtins/Cargo.toml
index d9acb8341d483..b2f1443f2eb09 100644
--- a/library/compiler-builtins/compiler-builtins/Cargo.toml
+++ b/library/compiler-builtins/compiler-builtins/Cargo.toml
@@ -34,27 +34,28 @@ core = { path = "../../core", optional = true }
 cc = { version = "1.2", optional = true }
 
 [features]
-default = []
+default = ["arch"]
+
+# Enable architecture-specific features such as SIMD or assembly routines. If
+# disabled, the generic version can be tested on any platform.
+arch = []
 
 # Enable compilation of C code in compiler-rt, filling in some more optimized
 # implementations and also filling in unimplemented intrinsics
 c = ["dep:cc"]
 
-# For implementations where there is both a generic version and a platform-
-# specific version, use the generic version. This is meant to enable testing
-# the generic versions on all platforms.
-no-asm = []
-
 # Flag this library as the unstable compiler-builtins lib. This must be enabled
 # when using as `std`'s dependency.'
-compiler-builtins = ["dep:core"]
+compiler-builtins = ["dep:core", "unmangled-names"]
 
-# Generate memory-related intrinsics like memcpy
+# Enable `no_mangle` symbols for memory-related intrinsics like memcpy. The
+# mangled versions are always available.
 mem = []
 
-# Mangle all names so this can be linked in with other versions or other
-# compiler-rt implementations. Also used for testing
-mangled-names = []
+# Enable `no_mangle` symbols so this crate gets used as the runtime intrinsic
+# implementation. Leave this disabled for testing to avoid conflicting with
+# the system intrinsics.
+unmangled-names = []
 
 # This makes certain traits and function specializations public that
 # are not normally public but are required by the `builtins-test`
diff --git a/library/compiler-builtins/compiler-builtins/build.rs b/library/compiler-builtins/compiler-builtins/build.rs
index 6e1d230e3cd26..d8019277d3ff4 100644
--- a/library/compiler-builtins/compiler-builtins/build.rs
+++ b/library/compiler-builtins/compiler-builtins/build.rs
@@ -1,33 +1,40 @@
+#[path = "../libm/configure.rs"]
 mod configure;
 
 use std::env;
 
-use configure::{Target, configure_aliases};
+use configure::{Config, Library, set_cfg};
 
 fn main() {
-    println!("cargo::rerun-if-changed=build.rs");
-    println!("cargo::rerun-if-changed=configure.rs");
+    let cfg = Config::from_env(Library::CompilerBuiltins);
+
+    // Work around building as part of `builtins-shim`: if only `build.rs` is used, Cargo always
+    // considers the build dirty because `builtins-shim/build.rs` does not exist. If only
+    // `../c-b/build.rs` is used, the same may happen if not built in the workspace.
+    if cfg.manifest_dir.file_name().unwrap() == "builtins-shim" {
+        println!("cargo::rerun-if-changed=../compiler-builtins/build.rs");
+    } else {
+        println!("cargo::rerun-if-changed=build.rs");
+    }
 
-    let target = Target::from_env();
-    let cwd = env::current_dir().unwrap();
+    println!("cargo::rerun-if-changed=../libm/configure.rs");
 
+    configure::emit(&cfg);
     configure_check_cfg();
-    configure_aliases(&target);
-
-    configure_libm(&target);
 
+    let cwd = env::current_dir().unwrap();
     println!("cargo:compiler-rt={}", cwd.join("compiler-rt").display());
 
     println!("cargo::rustc-check-cfg=cfg(kernel_user_helpers)");
     println!("cargo::rustc-check-cfg=cfg(feature, values(\"mem-unaligned\"))");
 
     // Emscripten's runtime includes all the builtins
-    if target.os == "emscripten" {
+    if cfg.target_os == "emscripten" {
         return;
     }
 
     // OpenBSD provides compiler_rt by default, use it instead of rebuilding it from source
-    if target.os == "openbsd" {
+    if cfg.target_os == "openbsd" {
         println!("cargo:rustc-link-search=native=/usr/lib");
         println!("cargo:rustc-link-lib=compiler_rt");
         return;
@@ -35,86 +42,49 @@ fn main() {
 
     // Forcibly enable memory intrinsics on wasm & SGX as we don't have a libc to
     // provide them.
-    if (target.triple.contains("wasm") && !target.triple.contains("wasi"))
-        || (target.triple.contains("sgx") && target.triple.contains("fortanix"))
-        || target.triple.contains("-none")
-        || target.triple.contains("nvptx")
-        || target.triple.contains("uefi")
-        || target.triple.contains("xous")
+    if (cfg.target_triple.contains("wasm") && !cfg.target_triple.contains("wasi"))
+        || (cfg.target_triple.contains("sgx") && cfg.target_triple.contains("fortanix"))
+        || cfg.target_triple.contains("-none")
+        || cfg.target_triple.contains("nvptx")
+        || cfg.target_triple.contains("uefi")
+        || cfg.target_triple.contains("xous")
     {
         println!("cargo:rustc-cfg=feature=\"mem\"");
     }
 
     // These targets have hardware unaligned access support.
-    if target.arch.contains("x86_64")
-        || target.arch.contains("x86")
-        || target.arch.contains("aarch64")
-        || target.arch.contains("bpf")
-    {
-        println!("cargo:rustc-cfg=feature=\"mem-unaligned\"");
-    }
+    let mem_unaligned = cfg.target_arch.contains("x86_64")
+        || cfg.target_arch.contains("x86")
+        || cfg.target_arch.contains("aarch64")
+        || cfg.target_arch.contains("bpf");
+    set_cfg("mem_unaligned", mem_unaligned);
 
     // NOTE we are going to assume that llvm-target, what determines our codegen option, matches the
     // target triple. This is usually correct for our built-in targets but can break in presence of
     // custom targets, which can have arbitrary names.
-    let llvm_target = target.triple.split('-').collect::<Vec<_>>();
+    let llvm_target = cfg.target_triple.split('-').collect::<Vec<_>>();
 
     // Build missing intrinsics from compiler-rt C source code. If we're
     // mangling names though we assume that we're also in test mode so we don't
     // build anything and we rely on the upstream implementation of compiler-rt
     // functions
-    if !cfg!(feature = "mangled-names") && cfg!(feature = "c") {
+    if cfg!(feature = "unmangled-names") && cfg!(feature = "c") {
         // Don't use a C compiler for these targets:
         //
         // * nvptx - everything is bitcode, not compatible with mixed C/Rust
-        if !target.arch.contains("nvptx") {
+        if !cfg.target_arch.contains("nvptx") {
             #[cfg(feature = "c")]
-            c::compile(&llvm_target, &target);
+            c::compile(&llvm_target, &cfg);
         }
     }
 
     // Only emit the ARM Linux atomic emulation on pre-ARMv6 architectures. This
     // includes the old androideabi. It is deprecated but it is available as a
     // rustc target (arm-linux-androideabi).
-    if llvm_target[0] == "armv4t"
+    let kernel_user_helpers = llvm_target[0] == "armv4t"
         || llvm_target[0] == "armv5te"
-        || target.triple == "arm-linux-androideabi"
-    {
-        println!("cargo:rustc-cfg=kernel_user_helpers")
-    }
-}
-
-/// Run configuration for `libm` since it is included directly.
-///
-/// Much of this is copied from `libm/configure.rs`.
-fn configure_libm(target: &Target) {
-    println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
-    println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
-    println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
-    println!("cargo:rustc-check-cfg=cfg(feature, values(\"unstable-public-internals\"))");
-
-    // Always use intrinsics
-    println!("cargo:rustc-cfg=intrinsics_enabled");
-
-    // The arch module may contain assembly.
-    if !cfg!(feature = "no-asm") {
-        println!("cargo:rustc-cfg=arch_enabled");
-    }
-
-    println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
-    if !matches!(target.opt_level.as_str(), "0" | "1") {
-        println!("cargo:rustc-cfg=optimizations_enabled");
-    }
-
-    println!(
-        "cargo:rustc-env=CFG_CARGO_FEATURES={:?}",
-        target.cargo_features
-    );
-    println!("cargo:rustc-env=CFG_OPT_LEVEL={}", target.opt_level);
-    println!("cargo:rustc-env=CFG_TARGET_FEATURES={:?}", target.features);
-
-    // Activate libm's unstable features to make full use of Nightly.
-    println!("cargo:rustc-cfg=feature=\"unstable-intrinsics\"");
+        || cfg.target_triple == "arm-linux-androideabi";
+    set_cfg("kernel_user_helpers", kernel_user_helpers);
 }
 
 /// Emit directives for features we expect to support that aren't in `Cargo.toml`.
@@ -176,10 +146,6 @@ fn configure_check_cfg() {
     // Rustc is unaware of sparc target features, but this does show up from
     // `rustc --print target-features --target sparc64-unknown-linux-gnu`.
     println!("cargo::rustc-check-cfg=cfg(target_feature, values(\"vis3\"))");
-
-    // FIXME: these come from libm and should be changed there
-    println!("cargo::rustc-check-cfg=cfg(feature, values(\"checked\"))");
-    println!("cargo::rustc-check-cfg=cfg(assert_no_panic)");
 }
 
 #[cfg(feature = "c")]
@@ -190,7 +156,7 @@ mod c {
     use std::io::Write;
     use std::path::{Path, PathBuf};
 
-    use super::Target;
+    use super::Config;
 
     struct Sources {
         // SYMBOL -> PATH TO SOURCE
@@ -232,17 +198,17 @@ mod c {
     }
 
     /// Compile intrinsics from the compiler-rt C source code
-    pub fn compile(llvm_target: &[&str], target: &Target) {
+    pub fn compile(llvm_target: &[&str], cfg: &Config) {
         let mut consider_float_intrinsics = true;
-        let cfg = &mut cc::Build::new();
+        let build = &mut cc::Build::new();
 
         // AArch64 GCCs exit with an error condition when they encounter any kind of floating point
         // code if the `nofp` and/or `nosimd` compiler flags have been set.
         //
         // Therefore, evaluate if those flags are present and set a boolean that causes any
         // compiler-rt intrinsics that contain floating point source to be excluded for this target.
-        if target.arch == "aarch64" {
-            let cflags_key = String::from("CFLAGS_") + &(target.triple.replace("-", "_"));
+        if cfg.target_arch == "aarch64" {
+            let cflags_key = String::from("CFLAGS_") + &(cfg.target_triple.replace("-", "_"));
             if let Ok(cflags_value) = env::var(cflags_key) {
                 if cflags_value.contains("+nofp") || cflags_value.contains("+nosimd") {
                     consider_float_intrinsics = false;
@@ -256,22 +222,22 @@ mod c {
         // support `_Float16` on all targets (whereas Rust does). However, define the macro
         // anyway to prevent issues like rust#118813 and rust#123885 silently reoccuring if more
         // `f16` intrinsics get accidentally added here in the future.
-        cfg.define("COMPILER_RT_HAS_FLOAT16", None);
+        build.define("COMPILER_RT_HAS_FLOAT16", None);
 
-        cfg.warnings(false);
+        build.warnings(false);
 
-        if target.env == "msvc" {
+        if cfg.target_env == "msvc" {
             // Don't pull in extra libraries on MSVC
-            cfg.flag("/Zl");
+            build.flag("/Zl");
 
             // Emulate C99 and C++11's __func__ for MSVC prior to 2013 CTP
-            cfg.define("__func__", Some("__FUNCTION__"));
+            build.define("__func__", Some("__FUNCTION__"));
         } else {
             // Turn off various features of gcc and such, mostly copying
             // compiler-rt's build system already
-            cfg.flag("-fno-builtin");
-            cfg.flag("-fvisibility=hidden");
-            cfg.flag("-ffreestanding");
+            build.flag("-fno-builtin");
+            build.flag("-fvisibility=hidden");
+            build.flag("-ffreestanding");
             // Avoid the following warning appearing once **per file**:
             // clang: warning: optimization flag '-fomit-frame-pointer' is not supported for target 'armv7' [-Wignored-optimization-argument]
             //
@@ -280,17 +246,17 @@ mod c {
             // `check_cxx_compiler_flag(-fomit-frame-pointer COMPILER_RT_HAS_FOMIT_FRAME_POINTER_FLAG)`
             //
             // in https://github.com/rust-lang/compiler-rt/blob/c8fbcb3/cmake/config-ix.cmake#L19.
-            cfg.flag_if_supported("-fomit-frame-pointer");
-            cfg.define("VISIBILITY_HIDDEN", None);
+            build.flag_if_supported("-fomit-frame-pointer");
+            build.define("VISIBILITY_HIDDEN", None);
 
-            if let "aarch64" | "arm64ec" = target.arch.as_str() {
+            if let "aarch64" | "arm64ec" = cfg.target_arch.as_str() {
                 // FIXME(llvm20): Older GCCs on A64 fail to build with
                 // -Werror=implicit-function-declaration due to a compiler-rt bug.
                 // With a newer LLVM we should be able to enable the flag everywhere.
                 // https://github.com/llvm/llvm-project/commit/8aa9d6206ce55bdaaf422839c351fbd63f033b89
             } else {
                 // Avoid implicitly creating references to undefined functions
-                cfg.flag("-Werror=implicit-function-declaration");
+                build.flag("-Werror=implicit-function-declaration");
             }
         }
 
@@ -299,14 +265,14 @@ mod c {
         // at odds with compiling with `-ffreestanding`, as the header
         // may be incompatible or not present. Create a minimal stub
         // header to use instead.
-        if target.os == "uefi" {
+        if cfg.target_os == "uefi" {
             let out_dir = PathBuf::from(env::var("OUT_DIR").unwrap());
             let include_dir = out_dir.join("include");
             if !include_dir.exists() {
                 fs::create_dir(&include_dir).unwrap();
             }
             fs::write(include_dir.join("stdlib.h"), "#include <stddef.h>").unwrap();
-            cfg.flag(&format!("-I{}", include_dir.to_str().unwrap()));
+            build.flag(&format!("-I{}", include_dir.to_str().unwrap()));
         }
 
         let mut sources = Sources::new();
@@ -344,7 +310,7 @@ mod c {
 
         // On iOS and 32-bit OSX these are all just empty intrinsics, no need to
         // include them.
-        if target.vendor != "apple" || target.arch != "x86" {
+        if cfg.target_vendor != "apple" || cfg.target_arch != "x86" {
             sources.extend(&[
                 ("__absvti2", "absvti2.c"),
                 ("__addvti3", "addvti3.c"),
@@ -363,7 +329,7 @@ mod c {
             }
         }
 
-        if target.vendor == "apple" {
+        if cfg.target_vendor == "apple" {
             sources.extend(&[
                 ("atomic_flag_clear", "atomic_flag_clear.c"),
                 ("atomic_flag_clear_explicit", "atomic_flag_clear_explicit.c"),
@@ -377,8 +343,8 @@ mod c {
             ]);
         }
 
-        if target.env != "msvc" {
-            if target.arch == "x86" {
+        if cfg.target_env != "msvc" {
+            if cfg.target_arch == "x86" {
                 sources.extend(&[
                     ("__ashldi3", "i386/ashldi3.S"),
                     ("__ashrdi3", "i386/ashrdi3.S"),
@@ -392,7 +358,7 @@ mod c {
             }
         }
 
-        if target.arch == "arm" && target.vendor != "apple" && target.env != "msvc" {
+        if cfg.target_arch == "arm" && cfg.target_vendor != "apple" && cfg.target_env != "msvc" {
             sources.extend(&[
                 ("__aeabi_div0", "arm/aeabi_div0.c"),
                 ("__aeabi_drsub", "arm/aeabi_drsub.c"),
@@ -412,7 +378,7 @@ mod c {
                 ("__umodsi3", "arm/umodsi3.S"),
             ]);
 
-            if target.os == "freebsd" {
+            if cfg.target_os == "freebsd" {
                 sources.extend(&[("__clear_cache", "clear_cache.c")]);
             }
 
@@ -484,31 +450,36 @@ mod c {
             ]);
         }
 
-        if (target.arch == "aarch64" || target.arch == "arm64ec") && consider_float_intrinsics {
+        if (cfg.target_arch == "aarch64" || cfg.target_arch == "arm64ec")
+            && consider_float_intrinsics
+        {
             sources.extend(&[
                 ("__fe_getround", "fp_mode.c"),
                 ("__fe_raise_inexact", "fp_mode.c"),
             ]);
 
-            if target.os != "windows" && target.os != "cygwin" {
+            if cfg.target_os != "windows" && cfg.target_os != "cygwin" {
                 sources.extend(&[("__multc3", "multc3.c")]);
             }
         }
 
-        if target.arch == "mips" || target.arch == "riscv32" || target.arch == "riscv64" {
+        if cfg.target_arch == "mips" || cfg.target_arch == "riscv32" || cfg.target_arch == "riscv64"
+        {
             sources.extend(&[("__bswapsi2", "bswapsi2.c")]);
         }
 
-        if target.arch == "mips64" {
+        if cfg.target_arch == "mips64" {
             sources.extend(&[("__fe_getround", "fp_mode.c")]);
         }
 
-        if target.arch == "loongarch64" {
+        if cfg.target_arch == "loongarch64" {
             sources.extend(&[("__fe_getround", "fp_mode.c")]);
         }
 
         // Remove the assembly implementations that won't compile for the target
-        if llvm_target[0] == "thumbv6m" || llvm_target[0] == "thumbv8m.base" || target.os == "uefi"
+        if llvm_target[0] == "thumbv6m"
+            || llvm_target[0] == "thumbv8m.base"
+            || cfg.target_os == "uefi"
         {
             let mut to_remove = Vec::new();
             for (k, v) in sources.map.iter() {
@@ -524,19 +495,19 @@ mod c {
         }
 
         // Android and Cygwin uses emulated TLS so we need a runtime support function.
-        if target.os == "android" || target.os == "cygwin" {
+        if cfg.target_os == "android" || cfg.target_os == "cygwin" {
             sources.extend(&[("__emutls_get_address", "emutls.c")]);
         }
 
         // Work around a bug in the NDK headers (fixed in
         // https://r.android.com/2038949 which will be released in a future
         // NDK version) by providing a definition of LONG_BIT.
-        if target.os == "android" {
-            cfg.define("LONG_BIT", "(8 * sizeof(long))");
+        if cfg.target_os == "android" {
+            build.define("LONG_BIT", "(8 * sizeof(long))");
         }
 
         // OpenHarmony also uses emulated TLS.
-        if target.env == "ohos" {
+        if cfg.target_env == "ohos" {
             sources.extend(&[("__emutls_get_address", "emutls.c")]);
         }
 
@@ -568,16 +539,16 @@ mod c {
         // Support deterministic builds by remapping the __FILE__ prefix if the
         // compiler supports it.  This fixes the nondeterminism caused by the
         // use of that macro in lib/builtins/int_util.h in compiler-rt.
-        cfg.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display()));
+        build.flag_if_supported(&format!("-ffile-prefix-map={}=.", root.display()));
 
         // Include out-of-line atomics for aarch64, which are all generated by supplying different
         // sets of flags to the same source file.
         // Note: Out-of-line aarch64 atomics are not supported by the msvc toolchain (#430) and
         // on uefi.
         let src_dir = root.join("lib/builtins");
-        if target.arch == "aarch64" && target.env != "msvc" && target.os != "uefi" {
+        if cfg.target_arch == "aarch64" && cfg.target_env != "msvc" && cfg.target_os != "uefi" {
             // See below for why we're building these as separate libraries.
-            build_aarch64_out_of_line_atomics_libraries(&src_dir, cfg, link_against_prebuilt_rt);
+            build_aarch64_out_of_line_atomics_libraries(&src_dir, build, link_against_prebuilt_rt);
 
             // Some run-time CPU feature detection is necessary, as well.
             let cpu_model_src = if src_dir.join("cpu_model.c").exists() {
@@ -592,7 +563,7 @@ mod c {
         for (sym, src) in sources.map.iter() {
             let src = src_dir.join(src);
             if !link_against_prebuilt_rt && added_sources.insert(src.clone()) {
-                cfg.file(&src);
+                build.file(&src);
                 println!("cargo:rerun-if-changed={}", src.display());
             }
             println!("cargo:rustc-cfg={}=\"optimized-c\"", sym);
@@ -616,7 +587,7 @@ mod c {
                 );
             }
         } else {
-            cfg.compile("libcompiler-rt.a");
+            build.compile("libcompiler-rt.a");
         }
     }
 
diff --git a/library/compiler-builtins/compiler-builtins/configure.rs b/library/compiler-builtins/compiler-builtins/configure.rs
deleted file mode 100644
index f16da6b58f812..0000000000000
--- a/library/compiler-builtins/compiler-builtins/configure.rs
+++ /dev/null
@@ -1,107 +0,0 @@
-// Configuration that is shared between `compiler_builtins` and `builtins_test`.
-
-use std::{env, str};
-
-#[derive(Debug)]
-#[allow(dead_code)]
-pub struct Target {
-    pub triple: String,
-    pub triple_split: Vec<String>,
-    pub opt_level: String,
-    pub cargo_features: Vec<String>,
-    pub os: String,
-    pub arch: String,
-    pub vendor: String,
-    pub env: String,
-    pub pointer_width: u8,
-    pub little_endian: bool,
-    pub features: Vec<String>,
-    pub reliable_f128: bool,
-    pub reliable_f16: bool,
-}
-
-impl Target {
-    pub fn from_env() -> Self {
-        let triple = env::var("TARGET").unwrap();
-        let triple_split = triple.split('-').map(ToOwned::to_owned).collect();
-        let little_endian = match env::var("CARGO_CFG_TARGET_ENDIAN").unwrap().as_str() {
-            "little" => true,
-            "big" => false,
-            x => panic!("unknown endian {x}"),
-        };
-        let cargo_features = env::vars()
-            .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned))
-            .map(|s| s.to_lowercase().replace("_", "-"))
-            .collect();
-
-        Self {
-            triple,
-            triple_split,
-            os: env::var("CARGO_CFG_TARGET_OS").unwrap(),
-            opt_level: env::var("OPT_LEVEL").unwrap(),
-            cargo_features,
-            arch: env::var("CARGO_CFG_TARGET_ARCH").unwrap(),
-            vendor: env::var("CARGO_CFG_TARGET_VENDOR").unwrap(),
-            env: env::var("CARGO_CFG_TARGET_ENV").unwrap(),
-            pointer_width: env::var("CARGO_CFG_TARGET_POINTER_WIDTH")
-                .unwrap()
-                .parse()
-                .unwrap(),
-            little_endian,
-            features: env::var("CARGO_CFG_TARGET_FEATURE")
-                .unwrap_or_default()
-                .split(",")
-                .map(ToOwned::to_owned)
-                .collect(),
-            // Note that these are unstable options, so only show up with the nightly compiler or
-            // with `RUSTC_BOOTSTRAP=1` (which is required to use the types anyway).
-            reliable_f128: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F128").is_some(),
-            reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(),
-        }
-    }
-
-    #[allow(dead_code)]
-    pub fn has_feature(&self, feature: &str) -> bool {
-        self.features.iter().any(|f| f == feature)
-    }
-}
-
-pub fn configure_aliases(target: &Target) {
-    // To compile builtins-test-intrinsics for thumb targets, where there is no libc
-    println!("cargo::rustc-check-cfg=cfg(thumb)");
-    if target.triple_split[0].starts_with("thumb") {
-        println!("cargo:rustc-cfg=thumb")
-    }
-
-    // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because
-    // these targets do not have full Thumb-2 support but only original Thumb-1.
-    // We have to cfg our code accordingly.
-    println!("cargo::rustc-check-cfg=cfg(thumb_1)");
-    if target.triple_split[0] == "thumbv6m" || target.triple_split[0] == "thumbv8m.base" {
-        println!("cargo:rustc-cfg=thumb_1")
-    }
-
-    // Config shorthands
-    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
-    if target.arch == "x86" && !target.features.iter().any(|f| f == "sse") {
-        // Shorthand to detect i586 targets
-        println!("cargo:rustc-cfg=x86_no_sse");
-    }
-
-    /* Not all backends support `f16` and `f128` to the same level on all architectures, so we
-     * need to disable things if the compiler may crash. See configuration at:
-     * * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432
-     * * https://github.com/rust-lang/rustc_codegen_gcc/blob/4b5c44b14166083eef8d71f15f5ea1f53fc976a0/src/lib.rs#L496-L507
-     * * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226
-     */
-
-    println!("cargo::rustc-check-cfg=cfg(f16_enabled)");
-    if target.reliable_f16 {
-        println!("cargo::rustc-cfg=f16_enabled");
-    }
-
-    println!("cargo::rustc-check-cfg=cfg(f128_enabled)");
-    if target.reliable_f128 {
-        println!("cargo::rustc-cfg=f128_enabled");
-    }
-}
diff --git a/library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs b/library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs
index 100b67150772a..367768ecb60d6 100644
--- a/library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs
+++ b/library/compiler-builtins/compiler-builtins/src/aarch64_outline_atomics.rs
@@ -35,14 +35,14 @@ intrinsics! {
 }
 
 /// Function to enable/disable LSE. To be used only for testing purposes.
-#[cfg(feature = "mangled-names")]
+#[cfg(feature = "unstable-public-internals")]
 pub unsafe fn set_have_lse_atomics(has_lse: bool) {
     let lse_flag = if has_lse { 1 } else { 0 };
     HAVE_LSE_ATOMICS.store(lse_flag, Ordering::Relaxed);
 }
 
 /// Function to obtain whether LSE is enabled or not. To be used only for testing purposes.
-#[cfg(feature = "mangled-names")]
+#[cfg(feature = "unstable-public-internals")]
 pub fn get_have_lse_atomics() -> bool {
     HAVE_LSE_ATOMICS.load(Ordering::Relaxed) != 0
 }
diff --git a/library/compiler-builtins/compiler-builtins/src/float/add.rs b/library/compiler-builtins/compiler-builtins/src/float/add.rs
index acdcd2ebe3133..69de07372f16e 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/add.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/add.rs
@@ -1,5 +1,4 @@
-use crate::float::Float;
-use crate::int::{CastFrom, CastInto, Int, MinInt};
+use crate::support::{CastFrom, CastInto, Float, Int, MinInt};
 
 /// Returns `a + b`
 fn add<F: Float>(a: F, b: F) -> F
diff --git a/library/compiler-builtins/compiler-builtins/src/float/cmp.rs b/library/compiler-builtins/compiler-builtins/src/float/cmp.rs
index 8ab39c2b5914d..a59365a2bdb8b 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/cmp.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/cmp.rs
@@ -1,21 +1,28 @@
 #![allow(unreachable_code)]
 
-use crate::float::Float;
-use crate::int::MinInt;
-use crate::support::cfg_if;
-
-// Taken from LLVM config:
-// https://github.com/llvm/llvm-project/blob/0cf3c437c18ed27d9663d87804a9a15ff6874af2/compiler-rt/lib/builtins/fp_compare_impl.inc#L11-L27
+use crate::support::{Float, MinInt, cfg_if};
+
+// These definitions should be consistent with LLVM's definition from `getCmpLibcallReturnType`,
+// compiler-rt's definitions [1], GCC's `CMPtype` [2], and `libgcc`. To find the definitions
+// in GCC, there are a few things to grep for:
+//
+// * `default_libgcc_cmp_return_mode` for the default (word sized)
+// * `TARGET_LIBGCC_CMP_RETURN_MODE` for the target hook to override the default
+// * `# *define *CMPtype` as a last resort, for overrides that don't use the hook (AVR)
+//
+// [1]: https://github.com/llvm/llvm-project/blob/0cf3c437c18ed27d9663d87804a9a15ff6874af2/compiler-rt/lib/builtins/fp_compare_impl.inc#L11-L27
+// [2]: https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
 cfg_if! {
-    if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec"))] {
+    if #[cfg(any(target_arch = "aarch64", target_arch = "arm64ec", target_family = "wasm"))] {
         // Aarch64 uses `int` rather than a pointer-sized value.
+        // `getCmpLibcallReturnType` for WASM is always set to i32.
         pub type CmpResult = i32;
     } else if #[cfg(target_arch = "avr")] {
         // AVR uses a single byte.
         pub type CmpResult = i8;
     } else {
-        // In compiler-rt, LLP64 ABIs use `long long` and everything else uses `long`. In effect,
-        // this means the return value is always pointer-sized.
+        // The default is word-sized. In LLVM's compiler-rt, this is done by using `long long` on
+        // LLP64 ABIs and `long` on everything else.
         pub type CmpResult = isize;
     }
 }
@@ -28,8 +35,18 @@ enum Result {
     Unordered,
 }
 
+/// Conversions to match GCC intrinsics [1].
+///
+/// * `unord`: nonzero if either NaN, 0 otherwise
+/// * `eq`, `ne`: 0 if a == b and both YaN, nonzero otherwise
+/// * `ge`, `gt`, `lt`, `le`: return an int result that provides the same comparison to 0 if both
+///   YaN and the comparison matches. E.g. if a >= b, `ge` returns an `x >= 0`.
+///
+/// The separate map functions are only needed to handle the unordered case.
+///
+/// [1]: https://gcc.gnu.org/onlinedocs/gccint/Soft-float-library-routines.html#Comparison-functions-1
 impl Result {
-    fn to_le_abi(self) -> CmpResult {
+    fn to_default_cmp_result(self) -> CmpResult {
         match self {
             Result::Less => -1,
             Result::Equal => 0,
@@ -38,7 +55,7 @@ impl Result {
         }
     }
 
-    fn to_ge_abi(self) -> CmpResult {
+    fn to_gt_ge_cmp_result(self) -> CmpResult {
         match self {
             Result::Less => -1,
             Result::Equal => 0,
@@ -118,11 +135,11 @@ fn unord<F: Float>(a: F, b: F) -> bool {
 #[cfg(f16_enabled)]
 intrinsics! {
     pub extern "C" fn __lehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __gehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 
     pub extern "C" fn __unordhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
@@ -130,29 +147,29 @@ intrinsics! {
     }
 
     pub extern "C" fn __eqhf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __lthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __nehf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __gthf2(a: f16, b: f16) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 }
 
 intrinsics! {
     pub extern "C" fn __lesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __gesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 
     #[arm_aeabi_alias = __aeabi_fcmpun]
@@ -161,27 +178,27 @@ intrinsics! {
     }
 
     pub extern "C" fn __eqsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __ltsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __nesf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __gtsf2(a: f32, b: f32) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 
     pub extern "C" fn __ledf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __gedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 
     #[arm_aeabi_alias = __aeabi_dcmpun]
@@ -190,19 +207,19 @@ intrinsics! {
     }
 
     pub extern "C" fn __eqdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __ltdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __nedf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     pub extern "C" fn __gtdf2(a: f64, b: f64) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 }
 
@@ -210,12 +227,12 @@ intrinsics! {
 intrinsics! {
     #[ppc_alias = __lekf2]
     pub extern "C" fn __letf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     #[ppc_alias = __gekf2]
     pub extern "C" fn __getf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 
     #[ppc_alias = __unordkf2]
@@ -225,22 +242,22 @@ intrinsics! {
 
     #[ppc_alias = __eqkf2]
     pub extern "C" fn __eqtf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     #[ppc_alias = __ltkf2]
     pub extern "C" fn __lttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     #[ppc_alias = __nekf2]
     pub extern "C" fn __netf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_le_abi()
+        cmp(a, b).to_default_cmp_result()
     }
 
     #[ppc_alias = __gtkf2]
     pub extern "C" fn __gttf2(a: f128, b: f128) -> crate::float::cmp::CmpResult {
-        cmp(a, b).to_ge_abi()
+        cmp(a, b).to_gt_ge_cmp_result()
     }
 }
 
diff --git a/library/compiler-builtins/compiler-builtins/src/float/conv.rs b/library/compiler-builtins/compiler-builtins/src/float/conv.rs
index 75ea7ce02424a..7310cd4ec4f02 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/conv.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/conv.rs
@@ -1,7 +1,6 @@
 use core::ops::Neg;
 
-use super::Float;
-use crate::int::{CastFrom, CastInto, Int, MinInt};
+use crate::support::{CastFrom, CastInto, Float, Int, MinInt};
 
 /// Conversions from integers to floats.
 ///
diff --git a/library/compiler-builtins/compiler-builtins/src/float/div.rs b/library/compiler-builtins/compiler-builtins/src/float/div.rs
index fc1fc085105a7..419d8ad5e7061 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/div.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/div.rs
@@ -82,9 +82,7 @@
 use core::mem::size_of;
 use core::ops;
 
-use super::HalfRep;
-use crate::float::Float;
-use crate::int::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
+use crate::support::{CastFrom, CastInto, DInt, Float, HInt, HalfRep, Int, MinInt};
 
 fn div<F: Float>(a: F, b: F) -> F
 where
diff --git a/library/compiler-builtins/compiler-builtins/src/float/extend.rs b/library/compiler-builtins/compiler-builtins/src/float/extend.rs
index c4f1fe30e0ea8..58038ce57f834 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/extend.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/extend.rs
@@ -1,5 +1,4 @@
-use crate::float::Float;
-use crate::int::{CastInto, Int, MinInt};
+use crate::support::{CastInto, Float, Int, MinInt};
 
 /// Generic conversion from a narrower to a wider IEEE-754 floating-point type
 fn extend<F: Float, R: Float>(a: F) -> R
diff --git a/library/compiler-builtins/compiler-builtins/src/float/mod.rs b/library/compiler-builtins/compiler-builtins/src/float/mod.rs
index 4a379d0d3575b..15318c4928804 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/mod.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/mod.rs
@@ -6,10 +6,4 @@ pub mod extend;
 pub mod mul;
 pub mod pow;
 pub mod sub;
-pub(crate) mod traits;
 pub mod trunc;
-
-#[cfg(not(feature = "unstable-public-internals"))]
-pub(crate) use traits::{Float, HalfRep};
-#[cfg(feature = "unstable-public-internals")]
-pub use traits::{Float, HalfRep};
diff --git a/library/compiler-builtins/compiler-builtins/src/float/mul.rs b/library/compiler-builtins/compiler-builtins/src/float/mul.rs
index 49a2414eb5c69..ffba2dc41f8a0 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/mul.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/mul.rs
@@ -1,5 +1,4 @@
-use crate::float::Float;
-use crate::int::{CastInto, DInt, HInt, Int, MinInt};
+use crate::support::{CastInto, DInt, Float, HInt, Int, MinInt};
 
 fn mul<F: Float>(a: F, b: F) -> F
 where
diff --git a/library/compiler-builtins/compiler-builtins/src/float/pow.rs b/library/compiler-builtins/compiler-builtins/src/float/pow.rs
index 6997a9c213c59..2c92971d31397 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/pow.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/pow.rs
@@ -1,5 +1,4 @@
-use crate::float::Float;
-use crate::int::Int;
+use crate::support::{Float, Int};
 
 /// Returns `a` raised to the power `b`
 fn pow<F: Float>(a: F, b: i32) -> F {
diff --git a/library/compiler-builtins/compiler-builtins/src/float/sub.rs b/library/compiler-builtins/compiler-builtins/src/float/sub.rs
index 48ef33b0b826f..11dd3b77d5d1c 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/sub.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/sub.rs
@@ -1,4 +1,4 @@
-use crate::float::Float;
+use crate::support::Float;
 
 intrinsics! {
     #[cfg(f16_enabled)]
diff --git a/library/compiler-builtins/compiler-builtins/src/float/traits.rs b/library/compiler-builtins/compiler-builtins/src/float/traits.rs
deleted file mode 100644
index a30d20900b1c4..0000000000000
--- a/library/compiler-builtins/compiler-builtins/src/float/traits.rs
+++ /dev/null
@@ -1,189 +0,0 @@
-use core::ops;
-
-use crate::int::{DInt, Int, MinInt};
-
-/// Wrapper to extract the integer type half of the float's size
-pub type HalfRep<F> = <<F as Float>::Int as DInt>::H;
-
-/// Trait for some basic operations on floats
-#[allow(dead_code)]
-pub trait Float:
-    Copy
-    + core::fmt::Debug
-    + PartialEq
-    + PartialOrd
-    + ops::AddAssign
-    + ops::MulAssign
-    + ops::Add<Output = Self>
-    + ops::Sub<Output = Self>
-    + ops::Div<Output = Self>
-    + ops::Rem<Output = Self>
-{
-    /// A uint of the same width as the float
-    type Int: Int<OtherSign = Self::SignedInt, Unsigned = Self::Int>;
-
-    /// A int of the same width as the float
-    type SignedInt: Int + MinInt<OtherSign = Self::Int, Unsigned = Self::Int>;
-
-    /// An int capable of containing the exponent bits plus a sign bit. This is signed.
-    type ExpInt: Int;
-
-    const ZERO: Self;
-    const ONE: Self;
-
-    /// The bitwidth of the float type.
-    const BITS: u32;
-
-    /// The bitwidth of the significand.
-    const SIG_BITS: u32;
-
-    /// The bitwidth of the exponent.
-    const EXP_BITS: u32 = Self::BITS - Self::SIG_BITS - 1;
-
-    /// The saturated (maximum bitpattern) value of the exponent, i.e. the infinite
-    /// representation.
-    ///
-    /// This is in the rightmost position, use `EXP_MASK` for the shifted value.
-    const EXP_SAT: u32 = (1 << Self::EXP_BITS) - 1;
-
-    /// The exponent bias value.
-    const EXP_BIAS: u32 = Self::EXP_SAT >> 1;
-
-    /// A mask for the sign bit.
-    const SIGN_MASK: Self::Int;
-
-    /// A mask for the significand.
-    const SIG_MASK: Self::Int;
-
-    /// The implicit bit of the float format.
-    const IMPLICIT_BIT: Self::Int;
-
-    /// A mask for the exponent.
-    const EXP_MASK: Self::Int;
-
-    /// Returns `self` transmuted to `Self::Int`
-    fn to_bits(self) -> Self::Int;
-
-    /// Returns `self` transmuted to `Self::SignedInt`
-    fn to_bits_signed(self) -> Self::SignedInt;
-
-    /// Checks if two floats have the same bit representation. *Except* for NaNs! NaN can be
-    /// represented in multiple different ways. This method returns `true` if two NaNs are
-    /// compared.
-    fn eq_repr(self, rhs: Self) -> bool;
-
-    /// Returns true if the sign is negative
-    fn is_sign_negative(self) -> bool;
-
-    /// Returns the exponent, not adjusting for bias.
-    fn exp(self) -> Self::ExpInt;
-
-    /// Returns the significand with no implicit bit (or the "fractional" part)
-    fn frac(self) -> Self::Int;
-
-    /// Returns the significand with implicit bit
-    fn imp_frac(self) -> Self::Int;
-
-    /// Returns a `Self::Int` transmuted back to `Self`
-    fn from_bits(a: Self::Int) -> Self;
-
-    /// Constructs a `Self` from its parts. Inputs are treated as bits and shifted into position.
-    fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self;
-
-    fn abs(self) -> Self {
-        let abs_mask = !Self::SIGN_MASK;
-        Self::from_bits(self.to_bits() & abs_mask)
-    }
-
-    /// Returns (normalized exponent, normalized significand)
-    fn normalize(significand: Self::Int) -> (i32, Self::Int);
-
-    /// Returns if `self` is subnormal
-    fn is_subnormal(self) -> bool;
-}
-
-macro_rules! float_impl {
-    ($ty:ident, $ity:ident, $sity:ident, $expty:ident, $bits:expr, $significand_bits:expr) => {
-        impl Float for $ty {
-            type Int = $ity;
-            type SignedInt = $sity;
-            type ExpInt = $expty;
-
-            const ZERO: Self = 0.0;
-            const ONE: Self = 1.0;
-
-            const BITS: u32 = $bits;
-            const SIG_BITS: u32 = $significand_bits;
-
-            const SIGN_MASK: Self::Int = 1 << (Self::BITS - 1);
-            const SIG_MASK: Self::Int = (1 << Self::SIG_BITS) - 1;
-            const IMPLICIT_BIT: Self::Int = 1 << Self::SIG_BITS;
-            const EXP_MASK: Self::Int = !(Self::SIGN_MASK | Self::SIG_MASK);
-
-            fn to_bits(self) -> Self::Int {
-                self.to_bits()
-            }
-            fn to_bits_signed(self) -> Self::SignedInt {
-                self.to_bits() as Self::SignedInt
-            }
-            fn eq_repr(self, rhs: Self) -> bool {
-                #[cfg(feature = "mangled-names")]
-                fn is_nan(x: $ty) -> bool {
-                    // When using mangled-names, the "real" compiler-builtins might not have the
-                    // necessary builtin (__unordtf2) to test whether `f128` is NaN.
-                    // FIXME(f16_f128): Remove once the nightly toolchain has the __unordtf2 builtin
-                    // x is NaN if all the bits of the exponent are set and the significand is non-0
-                    x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
-                }
-                #[cfg(not(feature = "mangled-names"))]
-                fn is_nan(x: $ty) -> bool {
-                    x.is_nan()
-                }
-                if is_nan(self) && is_nan(rhs) {
-                    true
-                } else {
-                    self.to_bits() == rhs.to_bits()
-                }
-            }
-            fn is_sign_negative(self) -> bool {
-                self.is_sign_negative()
-            }
-            fn exp(self) -> Self::ExpInt {
-                ((self.to_bits() & Self::EXP_MASK) >> Self::SIG_BITS) as Self::ExpInt
-            }
-            fn frac(self) -> Self::Int {
-                self.to_bits() & Self::SIG_MASK
-            }
-            fn imp_frac(self) -> Self::Int {
-                self.frac() | Self::IMPLICIT_BIT
-            }
-            fn from_bits(a: Self::Int) -> Self {
-                Self::from_bits(a)
-            }
-            fn from_parts(negative: bool, exponent: Self::Int, significand: Self::Int) -> Self {
-                Self::from_bits(
-                    ((negative as Self::Int) << (Self::BITS - 1))
-                        | ((exponent << Self::SIG_BITS) & Self::EXP_MASK)
-                        | (significand & Self::SIG_MASK),
-                )
-            }
-            fn normalize(significand: Self::Int) -> (i32, Self::Int) {
-                let shift = significand.leading_zeros().wrapping_sub(Self::EXP_BITS);
-                (
-                    1i32.wrapping_sub(shift as i32),
-                    significand << shift as Self::Int,
-                )
-            }
-            fn is_subnormal(self) -> bool {
-                (self.to_bits() & Self::EXP_MASK) == Self::Int::ZERO
-            }
-        }
-    };
-}
-
-#[cfg(f16_enabled)]
-float_impl!(f16, u16, i16, i8, 16, 10);
-float_impl!(f32, u32, i32, i16, 32, 23);
-float_impl!(f64, u64, i64, i16, 64, 52);
-#[cfg(f128_enabled)]
-float_impl!(f128, u128, i128, i16, 128, 112);
diff --git a/library/compiler-builtins/compiler-builtins/src/float/trunc.rs b/library/compiler-builtins/compiler-builtins/src/float/trunc.rs
index 93db5d8bbdeb1..1a88b0649fda3 100644
--- a/library/compiler-builtins/compiler-builtins/src/float/trunc.rs
+++ b/library/compiler-builtins/compiler-builtins/src/float/trunc.rs
@@ -1,5 +1,4 @@
-use crate::float::Float;
-use crate::int::{CastInto, Int, MinInt};
+use crate::support::{CastInto, Float, Int, MinInt};
 
 fn trunc<F: Float, R: Float>(a: F) -> R
 where
diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon.rs b/library/compiler-builtins/compiler-builtins/src/hexagon.rs
index a5c7b4dfdda91..46062fc3700d8 100644
--- a/library/compiler-builtins/compiler-builtins/src/hexagon.rs
+++ b/library/compiler-builtins/compiler-builtins/src/hexagon.rs
@@ -1,7 +1,66 @@
 use core::arch::global_asm;
 
+// Hexagon L1 cache line size in bytes (Hexagon PRM sections 5.10.3-5.10.4).
+const CACHE_LINE_SIZE: usize = 32;
+
+intrinsics! {
+    pub unsafe extern "C" fn __clear_cache(start: *mut u8, end: *mut u8) {
+        // Hexagon has separate instruction and data caches.
+        let mask = !(CACHE_LINE_SIZE - 1);
+        let start_line = start.addr() & mask;
+        let end_addr = end.addr();
+
+        // Clean and invalidate data cache to push new code to memory and
+        // invalidate stale lines in the L2 cache.
+        let mut addr = start_line;
+        while addr < end_addr {
+            unsafe {
+                core::arch::asm!(
+                    "dccleaninva({addr})",
+                    addr = in(reg) addr,
+                    options(nostack, preserves_flags),
+                );
+            }
+            addr += CACHE_LINE_SIZE;
+        }
+
+        // Invalidate instruction cache so it re-fetches from memory.
+        addr = start_line;
+        while addr < end_addr {
+            unsafe {
+                core::arch::asm!(
+                    "icinva({addr})",
+                    addr = in(reg) addr,
+                    options(nostack, preserves_flags),
+                );
+            }
+            addr += CACHE_LINE_SIZE;
+        }
+
+        // Instruction sync barrier ensures subsequent fetches see the new code.
+        unsafe {
+            core::arch::asm!("isync", options(nostack, preserves_flags));
+        }
+    }
+}
+
 global_asm!(include_str!("hexagon/func_macro.s"), options(raw));
 
+global_asm!(
+    include_str!("hexagon/common_entry_exit_abi1.s"),
+    options(raw)
+);
+
+global_asm!(
+    include_str!("hexagon/common_entry_exit_abi2.s"),
+    options(raw)
+);
+
+global_asm!(
+    include_str!("hexagon/common_entry_exit_legacy.s"),
+    options(raw)
+);
+
 global_asm!(include_str!("hexagon/dfaddsub.s"), options(raw));
 
 global_asm!(include_str!("hexagon/dfdiv.s"), options(raw));
diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_abi1.s b/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_abi1.s
new file mode 100644
index 0000000000000..61425abaad5ae
--- /dev/null
+++ b/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_abi1.s
@@ -0,0 +1,42 @@
+
+FUNCTION_BEGIN __save_r24_through_r27
+		memd(fp+#-16) = r27:26
+FALLTHROUGH_TAIL_CALL __save_r24_through_r27 __save_r24_through_r25
+	{
+		memd(fp+#-8) = r25:24
+		jumpr lr
+	}
+FUNCTION_END __save_r24_through_r25
+
+
+FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe_before_tailcall
+		r27:26 = memd(fp+#-16)
+FALLTHROUGH_TAIL_CALL __restore_r24_through_r27_and_deallocframe_before_tailcall __restore_r24_through_r25_and_deallocframe_before_tailcall
+	{
+		r25:24 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r24_through_r25_and_deallocframe_before_tailcall
+
+
+FUNCTION_BEGIN __restore_r24_through_r27_and_deallocframe
+	{
+		lr = memw(fp+#4)
+		r27:26 = memd(fp+#-16)
+	}
+	{
+		r25:24 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r24_through_r27_and_deallocframe
+
+
+FUNCTION_BEGIN __restore_r24_through_r25_and_deallocframe
+	{
+		r25:24 = memd(fp+#-8)
+		deallocframe
+	}
+		jumpr lr
+FUNCTION_END __restore_r24_through_r25_and_deallocframe
diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_abi2.s b/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_abi2.s
new file mode 100644
index 0000000000000..8734310fc3758
--- /dev/null
+++ b/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_abi2.s
@@ -0,0 +1,237 @@
+
+	.macro ABI2_FUNCTION_BEGIN name
+	.p2align 2
+	.section .text.\name,"ax",@progbits
+	.globl \name
+	.type  \name, @function
+\name:
+	.endm
+
+	.macro ABI2_FUNCTION_END name
+	.size  \name, . - \name
+	.endm
+
+
+ABI2_FUNCTION_BEGIN __save_r16_through_r27
+        {
+                memd(fp+#-48) = r27:26
+                memd(fp+#-40) = r25:24
+        }
+        {
+                memd(fp+#-32) = r23:22
+                memd(fp+#-24) = r21:20
+        }
+        {
+                memd(fp+#-16) = r19:18
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+ABI2_FUNCTION_END __save_r16_through_r27
+
+ABI2_FUNCTION_BEGIN __save_r16_through_r25
+        {
+                memd(fp+#-40) = r25:24
+                memd(fp+#-32) = r23:22
+        }
+        {
+                memd(fp+#-24) = r21:20
+                memd(fp+#-16) = r19:18
+        }
+        {
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+ABI2_FUNCTION_END __save_r16_through_r25
+
+ABI2_FUNCTION_BEGIN __save_r16_through_r23
+        {
+                memd(fp+#-32) = r23:22
+                memd(fp+#-24) = r21:20
+        }
+        {
+                memd(fp+#-16) = r19:18
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+ABI2_FUNCTION_END __save_r16_through_r23
+
+ABI2_FUNCTION_BEGIN __save_r16_through_r21
+        {
+                memd(fp+#-24) = r21:20
+                memd(fp+#-16) = r19:18
+        }
+        {
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+ABI2_FUNCTION_END __save_r16_through_r21
+
+ABI2_FUNCTION_BEGIN __save_r16_through_r19
+        {
+                memd(fp+#-16) = r19:18
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+ABI2_FUNCTION_END __save_r16_through_r19
+
+ABI2_FUNCTION_BEGIN __save_r16_through_r17
+        {
+                memd(fp+#-8) = r17:16
+                jumpr lr
+        }
+ABI2_FUNCTION_END __save_r16_through_r17
+
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe_before_tailcall
+                r27:26 = memd(fp+#-48)
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+ABI2_FUNCTION_END __restore_r16_through_r27_and_deallocframe_before_tailcall
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe_before_tailcall
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+ABI2_FUNCTION_END __restore_r16_through_r25_and_deallocframe_before_tailcall
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe_before_tailcall
+        {
+                r23:22 = memd(fp+#-32)
+                r21:20 = memd(fp+#-24)
+        }
+                r19:18 = memd(fp+#-16)
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+ABI2_FUNCTION_END __restore_r16_through_r23_and_deallocframe_before_tailcall
+
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe_before_tailcall
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+ABI2_FUNCTION_END __restore_r16_through_r21_and_deallocframe_before_tailcall
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe_before_tailcall
+                r19:18 = memd(fp+#-16)
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+ABI2_FUNCTION_END __restore_r16_through_r19_and_deallocframe_before_tailcall
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe_before_tailcall
+        {
+                r17:16 = memd(fp+#-8)
+                deallocframe
+                jumpr lr
+        }
+ABI2_FUNCTION_END __restore_r16_through_r17_and_deallocframe_before_tailcall
+
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r27_and_deallocframe
+                r27:26 = memd(fp+#-48)
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+ABI2_FUNCTION_END __restore_r16_through_r27_and_deallocframe
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r25_and_deallocframe
+        {
+                r25:24 = memd(fp+#-40)
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+ABI2_FUNCTION_END __restore_r16_through_r25_and_deallocframe
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r23_and_deallocframe
+        {
+                r23:22 = memd(fp+#-32)
+        }
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+ABI2_FUNCTION_END __restore_r16_through_r23_and_deallocframe
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r21_and_deallocframe
+        {
+                r21:20 = memd(fp+#-24)
+                r19:18 = memd(fp+#-16)
+        }
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+ABI2_FUNCTION_END __restore_r16_through_r21_and_deallocframe
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r19_and_deallocframe
+	{
+                r19:18 = memd(fp+#-16)
+		r17:16 = memd(fp+#-8)
+        }
+        {
+		dealloc_return
+	}
+ABI2_FUNCTION_END __restore_r16_through_r19_and_deallocframe
+
+ABI2_FUNCTION_BEGIN __restore_r16_through_r17_and_deallocframe
+	{
+		r17:16 = memd(fp+#-8)
+		dealloc_return
+	}
+ABI2_FUNCTION_END __restore_r16_through_r17_and_deallocframe
+
+ABI2_FUNCTION_BEGIN __deallocframe
+        dealloc_return
+ABI2_FUNCTION_END __deallocframe
diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_legacy.s b/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_legacy.s
new file mode 100644
index 0000000000000..4c539ea91f4af
--- /dev/null
+++ b/library/compiler-builtins/compiler-builtins/src/hexagon/common_entry_exit_legacy.s
@@ -0,0 +1,92 @@
+
+FUNCTION_BEGIN __save_r27_through_r16
+		memd(fp+#-48) = r17:16
+FALLTHROUGH_TAIL_CALL __save_r27_through_r16 __save_r27_through_r18
+		memd(fp+#-40) = r19:18
+FALLTHROUGH_TAIL_CALL __save_r27_through_r18 __save_r27_through_r20
+		memd(fp+#-32) = r21:20
+FALLTHROUGH_TAIL_CALL __save_r27_through_r20 __save_r27_through_r22
+		memd(fp+#-24) = r23:22
+FALLTHROUGH_TAIL_CALL __save_r27_through_r22 __save_r27_through_r24
+		memd(fp+#-16) = r25:24
+	{
+		memd(fp+#-8) = r27:26
+		jumpr lr
+	}
+FUNCTION_END __save_r27_through_r24
+
+
+FUNCTION_BEGIN __restore_r27_through_r20_and_deallocframe_before_sibcall
+	{
+		r21:20 = memd(fp+#-32)
+		r23:22 = memd(fp+#-24)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe_before_sibcall __restore_r27_through_r24_and_deallocframe_before_sibcall
+	{
+		r25:24 = memd(fp+#-16)
+		jump __restore_r27_through_r26_and_deallocframe_before_sibcall
+	}
+FUNCTION_END __restore_r27_through_r24_and_deallocframe_before_sibcall
+
+
+FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe_before_sibcall
+		r17:16 = memd(fp+#-48)
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe_before_sibcall __restore_r27_through_r18_and_deallocframe_before_sibcall
+	{
+		r19:18 = memd(fp+#-40)
+		r21:20 = memd(fp+#-32)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe_before_sibcall __restore_r27_through_r22_and_deallocframe_before_sibcall
+	{
+		r23:22 = memd(fp+#-24)
+		r25:24 = memd(fp+#-16)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe_before_sibcall __restore_r27_through_r26_and_deallocframe_before_sibcall
+	{
+		r27:26 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r27_through_r26_and_deallocframe_before_sibcall
+
+
+FUNCTION_BEGIN __restore_r27_through_r16_and_deallocframe
+	{
+		r17:16 = memd(fp+#-48)
+		r19:18 = memd(fp+#-40)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r16_and_deallocframe __restore_r27_through_r20_and_deallocframe
+	{
+		r21:20 = memd(fp+#-32)
+		r23:22 = memd(fp+#-24)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r20_and_deallocframe __restore_r27_through_r24_and_deallocframe
+	{
+		lr = memw(fp+#4)
+		r25:24 = memd(fp+#-16)
+	}
+	{
+		r27:26 = memd(fp+#-8)
+		deallocframe
+		jumpr lr
+	}
+FUNCTION_END __restore_r27_through_r24_and_deallocframe
+
+
+FUNCTION_BEGIN __restore_r27_through_r18_and_deallocframe
+	{
+		r19:18 = memd(fp+#-40)
+		r21:20 = memd(fp+#-32)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r18_and_deallocframe __restore_r27_through_r22_and_deallocframe
+	{
+		r23:22 = memd(fp+#-24)
+		r25:24 = memd(fp+#-16)
+	}
+FALLTHROUGH_TAIL_CALL __restore_r27_through_r22_and_deallocframe __restore_r27_through_r26_and_deallocframe
+	{
+		r27:26 = memd(fp+#-8)
+		deallocframe
+	}
+		jumpr lr
+FUNCTION_END __restore_r27_through_r26_and_deallocframe
diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s b/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s
index 9a1e11aebcb50..f5514cb7f057b 100644
--- a/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s
+++ b/library/compiler-builtins/compiler-builtins/src/hexagon/func_macro.s
@@ -10,3 +10,11 @@
  .size \name, . - \name
  .endm
 
+ .macro FALLTHROUGH_TAIL_CALL name0 name1
+ .size \name0, . - \name0
+ .globl \name1
+ .type \name1, @function
+ .falign
+\name1:
+ .endm
+
diff --git a/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s b/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s
index 7e9b62f6a791c..416675833192e 100644
--- a/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s
+++ b/library/compiler-builtins/compiler-builtins/src/hexagon/memcpy_likely_aligned.s
@@ -31,6 +31,35 @@ FUNCTION_BEGIN __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
  }
 FUNCTION_END __hexagon_memcpy_likely_aligned_min32bytes_mult8bytes
 
+FUNCTION_BEGIN __hexagon_memcpy_dwloop
+ {
+  r5:4 = memd(r1)
+  r3 = #-3
+ }
+ {
+  memd(r0++#8) = r5:4
+  r5:4 = memd(r1+#8)
+  r3 += lsr(r2,#3)
+ }
+ {
+  memd(r0++#8) = r5:4
+  r5:4 = memd(r1+#16)
+  r1 = add(r1,#24)
+  loop0(1f,r3)
+ }
+ .falign
+1:
+ {
+  memd(r0++#8) = r5:4
+  r5:4 = memd(r1++#8)
+ }:endloop0
+ {
+  memd(r0) = r5:4
+  r0 -= add(r2,#-8)
+  jumpr r31
+ }
+FUNCTION_END __hexagon_memcpy_dwloop
+
 .Lmemcpy_call:
 
  jump memcpy@PLT
diff --git a/library/compiler-builtins/compiler-builtins/src/int/addsub.rs b/library/compiler-builtins/compiler-builtins/src/int/addsub.rs
index b2b21fc2c4401..ab5e8e1b5d873 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/addsub.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/addsub.rs
@@ -1,4 +1,4 @@
-use crate::int::{DInt, Int, MinInt};
+use crate::support::{DInt, Int, MinInt};
 
 trait UAddSub: DInt + Int {
     fn uadd(self, other: Self) -> Self {
diff --git a/library/compiler-builtins/compiler-builtins/src/int/big.rs b/library/compiler-builtins/compiler-builtins/src/int/big.rs
deleted file mode 100644
index 8e06009090c09..0000000000000
--- a/library/compiler-builtins/compiler-builtins/src/int/big.rs
+++ /dev/null
@@ -1,295 +0,0 @@
-//! Integers used for wide operations, larger than `u128`.
-
-#![allow(unused)]
-
-use core::{fmt, ops};
-
-use crate::int::{DInt, HInt, Int, MinInt};
-
-const WORD_LO_MASK: u64 = 0x00000000ffffffff;
-const WORD_HI_MASK: u64 = 0xffffffff00000000;
-const WORD_FULL_MASK: u64 = 0xffffffffffffffff;
-const U128_LO_MASK: u128 = u64::MAX as u128;
-const U128_HI_MASK: u128 = (u64::MAX as u128) << 64;
-
-/// A 256-bit unsigned integer represented as 4 64-bit limbs.
-///
-/// Each limb is a native-endian number, but the array is little-limb-endian.
-#[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-pub struct u256(pub [u64; 4]);
-
-impl u256 {
-    pub const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX]);
-
-    /// Reinterpret as a signed integer
-    pub fn signed(self) -> i256 {
-        i256(self.0)
-    }
-}
-
-/// A 256-bit signed integer represented as 4 64-bit limbs.
-///
-/// Each limb is a native-endian number, but the array is little-limb-endian.
-#[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)]
-pub struct i256(pub [u64; 4]);
-
-impl i256 {
-    /// Reinterpret as an unsigned integer
-    pub fn unsigned(self) -> u256 {
-        u256(self.0)
-    }
-}
-
-impl MinInt for u256 {
-    type OtherSign = i256;
-
-    type Unsigned = u256;
-
-    const SIGNED: bool = false;
-    const BITS: u32 = 256;
-    const ZERO: Self = Self([0u64; 4]);
-    const ONE: Self = Self([1, 0, 0, 0]);
-    const MIN: Self = Self([0u64; 4]);
-    const MAX: Self = Self([u64::MAX; 4]);
-}
-
-impl MinInt for i256 {
-    type OtherSign = u256;
-
-    type Unsigned = u256;
-
-    const SIGNED: bool = false;
-    const BITS: u32 = 256;
-    const ZERO: Self = Self([0u64; 4]);
-    const ONE: Self = Self([1, 0, 0, 0]);
-    const MIN: Self = Self([0, 0, 0, 1 << 63]);
-    const MAX: Self = Self([u64::MAX, u64::MAX, u64::MAX, u64::MAX >> 1]);
-}
-
-macro_rules! impl_common {
-    ($ty:ty) => {
-        impl ops::BitOr for $ty {
-            type Output = Self;
-
-            fn bitor(mut self, rhs: Self) -> Self::Output {
-                self.0[0] |= rhs.0[0];
-                self.0[1] |= rhs.0[1];
-                self.0[2] |= rhs.0[2];
-                self.0[3] |= rhs.0[3];
-                self
-            }
-        }
-
-        impl ops::Not for $ty {
-            type Output = Self;
-
-            fn not(self) -> Self::Output {
-                Self([!self.0[0], !self.0[1], !self.0[2], !self.0[3]])
-            }
-        }
-
-        impl ops::Shl<u32> for $ty {
-            type Output = Self;
-
-            fn shl(self, rhs: u32) -> Self::Output {
-                unimplemented!("only used to meet trait bounds")
-            }
-        }
-    };
-}
-
-impl_common!(i256);
-impl_common!(u256);
-
-impl ops::Shr<u32> for u256 {
-    type Output = Self;
-
-    fn shr(self, rhs: u32) -> Self::Output {
-        assert!(rhs < Self::BITS, "attempted to shift right with overflow");
-
-        if rhs == 0 {
-            return self;
-        }
-
-        let mut ret = self;
-        let byte_shift = rhs / 64;
-        let bit_shift = rhs % 64;
-
-        for idx in 0..4 {
-            let base_idx = idx + byte_shift as usize;
-
-            let Some(base) = ret.0.get(base_idx) else {
-                ret.0[idx] = 0;
-                continue;
-            };
-
-            let mut new_val = base >> bit_shift;
-
-            if let Some(new) = ret.0.get(base_idx + 1) {
-                new_val |= new.overflowing_shl(64 - bit_shift).0;
-            }
-
-            ret.0[idx] = new_val;
-        }
-
-        ret
-    }
-}
-
-macro_rules! word {
-    (1, $val:expr) => {
-        (($val >> (32 * 3)) & Self::from(WORD_LO_MASK)) as u64
-    };
-    (2, $val:expr) => {
-        (($val >> (32 * 2)) & Self::from(WORD_LO_MASK)) as u64
-    };
-    (3, $val:expr) => {
-        (($val >> (32 * 1)) & Self::from(WORD_LO_MASK)) as u64
-    };
-    (4, $val:expr) => {
-        (($val >> (32 * 0)) & Self::from(WORD_LO_MASK)) as u64
-    };
-}
-
-impl HInt for u128 {
-    type D = u256;
-
-    fn widen(self) -> Self::D {
-        let w0 = self & u128::from(u64::MAX);
-        let w1 = (self >> u64::BITS) & u128::from(u64::MAX);
-        u256([w0 as u64, w1 as u64, 0, 0])
-    }
-
-    fn zero_widen(self) -> Self::D {
-        self.widen()
-    }
-
-    fn zero_widen_mul(self, rhs: Self) -> Self::D {
-        let product11: u64 = word!(1, self) * word!(1, rhs);
-        let product12: u64 = word!(1, self) * word!(2, rhs);
-        let product13: u64 = word!(1, self) * word!(3, rhs);
-        let product14: u64 = word!(1, self) * word!(4, rhs);
-        let product21: u64 = word!(2, self) * word!(1, rhs);
-        let product22: u64 = word!(2, self) * word!(2, rhs);
-        let product23: u64 = word!(2, self) * word!(3, rhs);
-        let product24: u64 = word!(2, self) * word!(4, rhs);
-        let product31: u64 = word!(3, self) * word!(1, rhs);
-        let product32: u64 = word!(3, self) * word!(2, rhs);
-        let product33: u64 = word!(3, self) * word!(3, rhs);
-        let product34: u64 = word!(3, self) * word!(4, rhs);
-        let product41: u64 = word!(4, self) * word!(1, rhs);
-        let product42: u64 = word!(4, self) * word!(2, rhs);
-        let product43: u64 = word!(4, self) * word!(3, rhs);
-        let product44: u64 = word!(4, self) * word!(4, rhs);
-
-        let sum0: u128 = u128::from(product44);
-        let sum1: u128 = u128::from(product34) + u128::from(product43);
-        let sum2: u128 = u128::from(product24) + u128::from(product33) + u128::from(product42);
-        let sum3: u128 = u128::from(product14)
-            + u128::from(product23)
-            + u128::from(product32)
-            + u128::from(product41);
-        let sum4: u128 = u128::from(product13) + u128::from(product22) + u128::from(product31);
-        let sum5: u128 = u128::from(product12) + u128::from(product21);
-        let sum6: u128 = u128::from(product11);
-
-        let r0: u128 =
-            (sum0 & u128::from(WORD_FULL_MASK)) + ((sum1 & u128::from(WORD_LO_MASK)) << 32);
-        let r1: u128 = (sum0 >> 64)
-            + ((sum1 >> 32) & u128::from(WORD_FULL_MASK))
-            + (sum2 & u128::from(WORD_FULL_MASK))
-            + ((sum3 << 32) & u128::from(WORD_HI_MASK));
-
-        let (lo, carry) = r0.overflowing_add(r1 << 64);
-        let hi = (r1 >> 64)
-            + (sum1 >> 96)
-            + (sum2 >> 64)
-            + (sum3 >> 32)
-            + sum4
-            + (sum5 << 32)
-            + (sum6 << 64)
-            + u128::from(carry);
-
-        u256([
-            (lo & U128_LO_MASK) as u64,
-            ((lo >> 64) & U128_LO_MASK) as u64,
-            (hi & U128_LO_MASK) as u64,
-            ((hi >> 64) & U128_LO_MASK) as u64,
-        ])
-    }
-
-    fn widen_mul(self, rhs: Self) -> Self::D {
-        self.zero_widen_mul(rhs)
-    }
-
-    fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
-    }
-}
-
-impl HInt for i128 {
-    type D = i256;
-
-    fn widen(self) -> Self::D {
-        let mut ret = self.unsigned().zero_widen().signed();
-        if self.is_negative() {
-            ret.0[2] = u64::MAX;
-            ret.0[3] = u64::MAX;
-        }
-        ret
-    }
-
-    fn zero_widen(self) -> Self::D {
-        self.unsigned().zero_widen().signed()
-    }
-
-    fn zero_widen_mul(self, rhs: Self) -> Self::D {
-        self.unsigned().zero_widen_mul(rhs.unsigned()).signed()
-    }
-
-    fn widen_mul(self, rhs: Self) -> Self::D {
-        unimplemented!("signed i128 widening multiply is not used")
-    }
-
-    fn widen_hi(self) -> Self::D {
-        self.widen() << <Self as MinInt>::BITS
-    }
-}
-
-impl DInt for u256 {
-    type H = u128;
-
-    fn lo(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
-        u128::from_le_bytes(tmp)
-    }
-
-    fn hi(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
-        u128::from_le_bytes(tmp)
-    }
-}
-
-impl DInt for i256 {
-    type H = i128;
-
-    fn lo(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[0].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[1].to_le_bytes());
-        i128::from_le_bytes(tmp)
-    }
-
-    fn hi(self) -> Self::H {
-        let mut tmp = [0u8; 16];
-        tmp[..8].copy_from_slice(&self.0[2].to_le_bytes());
-        tmp[8..].copy_from_slice(&self.0[3].to_le_bytes());
-        i128::from_le_bytes(tmp)
-    }
-}
diff --git a/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs b/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs
index aa5cb39935ad8..3b920ecff6b20 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/leading_zeros.rs
@@ -9,7 +9,7 @@ pub use implementation::{leading_zeros_default, leading_zeros_riscv};
 pub(crate) use implementation::{leading_zeros_default, leading_zeros_riscv};
 
 mod implementation {
-    use crate::int::{CastFrom, Int};
+    use crate::support::{CastFrom, Int};
 
     /// Returns the number of leading binary zeros in `x`.
     #[allow(dead_code)]
diff --git a/library/compiler-builtins/compiler-builtins/src/int/mod.rs b/library/compiler-builtins/compiler-builtins/src/int/mod.rs
index 518ccb23f8009..cd4011a933815 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/mod.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/mod.rs
@@ -1,18 +1,10 @@
 mod specialized_div_rem;
 
 pub mod addsub;
-mod big;
 pub mod bswap;
 pub mod leading_zeros;
 pub mod mul;
 pub mod sdiv;
 pub mod shift;
 pub mod trailing_zeros;
-mod traits;
 pub mod udiv;
-
-pub use big::{i256, u256};
-#[cfg(not(feature = "unstable-public-internals"))]
-pub(crate) use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
-#[cfg(feature = "unstable-public-internals")]
-pub use traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt};
diff --git a/library/compiler-builtins/compiler-builtins/src/int/mul.rs b/library/compiler-builtins/compiler-builtins/src/int/mul.rs
index 040c69342d148..9331ab60a4fc1 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/mul.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/mul.rs
@@ -1,4 +1,4 @@
-use crate::int::{DInt, HInt, Int};
+use crate::support::{DInt, HInt, Int};
 
 trait Mul: DInt + Int
 where
@@ -98,11 +98,63 @@ impl_signed_mulo!(i64_overflowing_mul, i64, u64);
 impl_signed_mulo!(i128_overflowing_mul, i128, u128);
 
 intrinsics! {
+    // Ancient Egyptian/Ethiopian/Russian multiplication method
+    // see https://en.wikipedia.org/wiki/Ancient_Egyptian_multiplication
+    //
+    // This is a long-available stock algorithm; e.g. it is documented in
+    // Knuth's "The Art of Computer Programming" volume 2 (under the section
+    // "Evaluation of Powers") since at least the 2nd edition (1981).
+    //
+    // The main attraction of this method is that it implements (software)
+    // multiplication atop four simple operations: doubling, halving, checking
+    // if a value is even/odd, and addition. This is *not* considered to be the
+    // fastest multiplication method, but it may be amongst the simplest (and
+    // smallest with respect to code size).
+    //
+    // for reference, see also implementation from gcc
+    // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c
+    //
+    // and from LLVM (in relatively readable RISC-V assembly):
+    // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc
+    #[cfg(any(target_arch = "riscv32", target_arch = "riscv64", target_arch = "m68k"))]
+    pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 {
+        let (mut a, mut b) = (a, b);
+        let mut r: u32 = 0;
+
+        while a > 0 {
+            if a & 1 > 0 {
+                r = r.wrapping_add(b);
+            }
+            a >>= 1;
+            b <<= 1;
+        }
+
+        r
+    }
+
     #[maybe_use_optimized_c_shim]
     #[arm_aeabi_alias = __aeabi_lmul]
-    #[cfg(any(not(any(target_arch = "riscv32", target_arch = "riscv64")), target_feature = "m"))]
     pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 {
-        a.mul(b)
+        #[cfg(all(any(target_arch = "riscv32", target_arch = "riscv64"), not(target_feature = "m")))]
+        {
+            let (mut a, mut b) = (a, b);
+            let mut r: u64 = 0;
+
+            while a > 0 {
+                if a & 1 > 0 {
+                    r = r.wrapping_add(b);
+                }
+                a >>= 1;
+                b <<= 1;
+            }
+
+            r
+        }
+
+        #[cfg(not(all(any(target_arch = "riscv32", target_arch = "riscv64"), not(target_feature = "m"))))]
+        {
+            a.mul(b)
+        }
     }
 
     pub extern "C" fn __multi3(a: i128, b: i128) -> i128 {
@@ -139,4 +191,5 @@ intrinsics! {
         *oflow = o.into();
         mul
     }
+
 }
diff --git a/library/compiler-builtins/compiler-builtins/src/int/shift.rs b/library/compiler-builtins/compiler-builtins/src/int/shift.rs
index a85c1b33d6714..f5cac9ad4e367 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/shift.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/shift.rs
@@ -1,4 +1,4 @@
-use crate::int::{DInt, HInt, Int, MinInt};
+use crate::support::{DInt, HInt, Int, MinInt};
 
 trait Ashl: DInt {
     /// Returns `a << b`, requires `b < Self::BITS`
diff --git a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs
index 5ffe1f59b4db6..902c191d65285 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/specialized_div_rem/mod.rs
@@ -144,7 +144,7 @@ fn u64_by_u64_div_rem(duo: u64, div: u64) -> (u64, u64) {
         target_family = "wasm",
         not(any(target_pointer_width = "16", target_pointer_width = "32")),
     ),
-    not(all(not(feature = "no-asm"), target_arch = "x86_64")),
+    not(all(feature = "arch", target_arch = "x86_64")),
     not(any(target_arch = "sparc", target_arch = "sparc64"))
 ))]
 impl_trifecta!(
@@ -165,7 +165,7 @@ impl_trifecta!(
         target_family = "wasm",
         not(any(target_pointer_width = "16", target_pointer_width = "32")),
     )),
-    not(all(not(feature = "no-asm"), target_arch = "x86_64")),
+    not(all(feature = "arch", target_arch = "x86_64")),
     not(any(target_arch = "sparc", target_arch = "sparc64"))
 ))]
 impl_delegate!(
@@ -186,7 +186,7 @@ impl_delegate!(
 ///
 /// If the quotient does not fit in a `u64`, a floating point exception occurs.
 /// If `div == 0`, then a division by zero exception occurs.
-#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
+#[cfg(all(feature = "arch", target_arch = "x86_64"))]
 #[inline]
 unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
     let duo_lo = duo as u64;
@@ -208,7 +208,7 @@ unsafe fn u128_by_u64_div_rem(duo: u128, div: u64) -> (u64, u64) {
 }
 
 // use `asymmetric` instead of `trifecta` on x86_64
-#[cfg(all(not(feature = "no-asm"), target_arch = "x86_64"))]
+#[cfg(all(feature = "arch", target_arch = "x86_64"))]
 impl_asymmetric!(
     u128_div_rem,
     zero_div_fn,
@@ -237,7 +237,7 @@ fn u32_by_u32_div_rem(duo: u32, div: u32) -> (u32, u32) {
 // When not on x86 and the pointer width is not 64, use `delegate` since the division size is larger
 // than register size.
 #[cfg(all(
-    not(all(not(feature = "no-asm"), target_arch = "x86")),
+    not(all(feature = "arch", target_arch = "x86")),
     not(target_pointer_width = "64")
 ))]
 impl_delegate!(
@@ -254,7 +254,7 @@ impl_delegate!(
 
 // When not on x86 and the pointer width is 64, use `binary_long`.
 #[cfg(all(
-    not(all(not(feature = "no-asm"), target_arch = "x86")),
+    not(all(feature = "arch", target_arch = "x86")),
     target_pointer_width = "64"
 ))]
 impl_binary_long!(
@@ -272,7 +272,7 @@ impl_binary_long!(
 ///
 /// If the quotient does not fit in a `u32`, a floating point exception occurs.
 /// If `div == 0`, then a division by zero exception occurs.
-#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
+#[cfg(all(feature = "arch", target_arch = "x86"))]
 #[inline]
 unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
     let duo_lo = duo as u32;
@@ -294,7 +294,7 @@ unsafe fn u64_by_u32_div_rem(duo: u64, div: u32) -> (u32, u32) {
 }
 
 // use `asymmetric` instead of `delegate` on x86
-#[cfg(all(not(feature = "no-asm"), target_arch = "x86"))]
+#[cfg(all(feature = "arch", target_arch = "x86"))]
 impl_asymmetric!(
     u64_div_rem,
     zero_div_fn,
diff --git a/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs b/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs
index 1b0ae5b73ad24..f42e9926dbb11 100644
--- a/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs
+++ b/library/compiler-builtins/compiler-builtins/src/int/trailing_zeros.rs
@@ -4,7 +4,7 @@ pub use implementation::trailing_zeros;
 pub(crate) use implementation::trailing_zeros;
 
 mod implementation {
-    use crate::int::{CastFrom, Int};
+    use crate::support::{CastFrom, Int};
 
     /// Returns number of trailing binary zeros in `x`.
     #[allow(dead_code)]
diff --git a/library/compiler-builtins/compiler-builtins/src/int/traits.rs b/library/compiler-builtins/compiler-builtins/src/int/traits.rs
deleted file mode 100644
index 25b9718ad53fb..0000000000000
--- a/library/compiler-builtins/compiler-builtins/src/int/traits.rs
+++ /dev/null
@@ -1,99 +0,0 @@
-pub use crate::support::{CastFrom, CastInto, Int, MinInt};
-
-/// Trait for integers twice the bit width of another integer. This is implemented for all
-/// primitives except for `u8`, because there is not a smaller primitive.
-pub trait DInt: MinInt {
-    /// Integer that is half the bit width of the integer this trait is implemented for
-    type H: HInt<D = Self>;
-
-    /// Returns the low half of `self`
-    fn lo(self) -> Self::H;
-    /// Returns the high half of `self`
-    fn hi(self) -> Self::H;
-    /// Returns the low and high halves of `self` as a tuple
-    fn lo_hi(self) -> (Self::H, Self::H) {
-        (self.lo(), self.hi())
-    }
-    /// Constructs an integer using lower and higher half parts
-    fn from_lo_hi(lo: Self::H, hi: Self::H) -> Self {
-        lo.zero_widen() | hi.widen_hi()
-    }
-}
-
-/// Trait for integers half the bit width of another integer. This is implemented for all
-/// primitives except for `u128`, because it there is not a larger primitive.
-pub trait HInt: Int {
-    /// Integer that is double the bit width of the integer this trait is implemented for
-    type D: DInt<H = Self> + MinInt;
-
-    // NB: some of the below methods could have default implementations (e.g. `widen_hi`), but for
-    // unknown reasons this can cause infinite recursion when optimizations are disabled. See
-    // <https://github.com/rust-lang/compiler-builtins/pull/707> for context.
-
-    /// Widens (using default extension) the integer to have double bit width
-    fn widen(self) -> Self::D;
-    /// Widens (zero extension only) the integer to have double bit width. This is needed to get
-    /// around problems with associated type bounds (such as `Int<Othersign: DInt>`) being unstable
-    fn zero_widen(self) -> Self::D;
-    /// Widens the integer to have double bit width and shifts the integer into the higher bits
-    fn widen_hi(self) -> Self::D;
-    /// Widening multiplication with zero widening. This cannot overflow.
-    fn zero_widen_mul(self, rhs: Self) -> Self::D;
-    /// Widening multiplication. This cannot overflow.
-    fn widen_mul(self, rhs: Self) -> Self::D;
-}
-
-macro_rules! impl_d_int {
-    ($($X:ident $D:ident),*) => {
-        $(
-            impl DInt for $D {
-                type H = $X;
-
-                fn lo(self) -> Self::H {
-                    self as $X
-                }
-                fn hi(self) -> Self::H {
-                    (self >> <$X as MinInt>::BITS) as $X
-                }
-            }
-        )*
-    };
-}
-
-macro_rules! impl_h_int {
-    ($($H:ident $uH:ident $X:ident),*) => {
-        $(
-            impl HInt for $H {
-                type D = $X;
-
-                fn widen(self) -> Self::D {
-                    self as $X
-                }
-                fn zero_widen(self) -> Self::D {
-                    (self as $uH) as $X
-                }
-                fn zero_widen_mul(self, rhs: Self) -> Self::D {
-                    self.zero_widen().wrapping_mul(rhs.zero_widen())
-                }
-                fn widen_mul(self, rhs: Self) -> Self::D {
-                    self.widen().wrapping_mul(rhs.widen())
-                }
-                fn widen_hi(self) -> Self::D {
-                    (self as $X) << <Self as MinInt>::BITS
-                }
-            }
-        )*
-    };
-}
-
-impl_d_int!(u8 u16, u16 u32, u32 u64, u64 u128, i8 i16, i16 i32, i32 i64, i64 i128);
-impl_h_int!(
-    u8 u8 u16,
-    u16 u16 u32,
-    u32 u32 u64,
-    u64 u64 u128,
-    i8 u8 i16,
-    i16 u16 i32,
-    i32 u32 i64,
-    i64 u64 i128
-);
diff --git a/library/compiler-builtins/compiler-builtins/src/lib.rs b/library/compiler-builtins/compiler-builtins/src/lib.rs
index 07960222f20f0..9e847206caf19 100644
--- a/library/compiler-builtins/compiler-builtins/src/lib.rs
+++ b/library/compiler-builtins/compiler-builtins/src/lib.rs
@@ -48,6 +48,9 @@ pub mod mem;
 pub mod sync;
 
 // `libm` expects its `support` module to be available in the crate root.
+#[cfg(feature = "unstable-public-internals")]
+pub use math::libm_math::support;
+#[cfg(not(feature = "unstable-public-internals"))]
 use math::libm_math::support;
 
 #[cfg(target_arch = "arm")]
@@ -60,7 +63,10 @@ pub mod aarch64;
 // in the builtins-test tests. So this is a way of enabling the module during testing.
 #[cfg(all(
     target_arch = "aarch64",
-    any(target_feature = "outline-atomics", feature = "mangled-names")
+    any(
+        target_feature = "outline-atomics",
+        feature = "unstable-public-internals"
+    )
 ))]
 pub mod aarch64_outline_atomics;
 
@@ -70,9 +76,6 @@ pub mod avr;
 #[cfg(target_arch = "hexagon")]
 pub mod hexagon;
 
-#[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))]
-pub mod riscv;
-
 #[cfg(target_arch = "x86")]
 pub mod x86;
 
diff --git a/library/compiler-builtins/compiler-builtins/src/macros.rs b/library/compiler-builtins/compiler-builtins/src/macros.rs
index 203cd0949ac52..c5e49f7780641 100644
--- a/library/compiler-builtins/compiler-builtins/src/macros.rs
+++ b/library/compiler-builtins/compiler-builtins/src/macros.rs
@@ -254,7 +254,7 @@ macro_rules! intrinsics {
             $($body)*
         }
 
-        #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))]
+        #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), feature = "unmangled-names"))]
         mod $name {
             #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
@@ -290,7 +290,7 @@ macro_rules! intrinsics {
             $($body)*
         }
 
-        #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), not(feature = "mangled-names")))]
+        #[cfg(all(target_vendor = "apple", any(target_arch = "x86", target_arch = "x86_64"), feature = "unmangled-names"))]
         mod $name {
             #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
@@ -331,7 +331,7 @@ macro_rules! intrinsics {
             $($body)*
         }
 
-        #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))]
+        #[cfg(all(target_arch = "arm", feature = "unmangled-names"))]
         mod $name {
             #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
@@ -341,7 +341,7 @@ macro_rules! intrinsics {
             }
         }
 
-        #[cfg(all(target_arch = "arm", not(feature = "mangled-names")))]
+        #[cfg(all(target_arch = "arm", feature = "unmangled-names"))]
         mod $alias {
             #[unsafe(no_mangle)]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
@@ -392,7 +392,7 @@ macro_rules! intrinsics {
         intrinsics!($($rest)*);
     );
 
-    // C mem* functions are only generated when the "mem" feature is enabled.
+    // C mem* functions are only exposed via `no_mangle` when the "mem" feature is enabled.
     (
         #[mem_builtin]
         $(#[$($attr:tt)*])*
@@ -407,7 +407,7 @@ macro_rules! intrinsics {
             $($body)*
         }
 
-        #[cfg(all(feature = "mem", not(feature = "mangled-names")))]
+        #[cfg(all(feature = "mem", feature = "unmangled-names"))]
         mod $name {
             $(#[$($attr)*])*
             #[unsafe(no_mangle)]
@@ -435,7 +435,7 @@ macro_rules! intrinsics {
         pub mod $name {
             #[unsafe(naked)]
             $(#[$($attr)*])*
-            #[cfg_attr(not(feature = "mangled-names"), unsafe(no_mangle))]
+            #[cfg_attr(feature = "unmangled-names", unsafe(no_mangle))]
             #[cfg_attr(not(any(all(windows, target_env = "gnu"), target_os = "cygwin")), linkage = "weak")]
             pub unsafe extern $abi fn $name( $($argname: $ty),* ) $(-> $ret)? {
                 $($body)*
@@ -470,7 +470,7 @@ macro_rules! intrinsics {
             $($body)*
         }
 
-        #[cfg(not(feature = "mangled-names"))]
+        #[cfg(feature = "unmangled-names")]
         mod $name {
             $(#[$($attr)*])*
             #[unsafe(no_mangle)]
diff --git a/library/compiler-builtins/compiler-builtins/src/mem/impls.rs b/library/compiler-builtins/compiler-builtins/src/mem/impls.rs
index 9681f5d6dac6e..c7cfa6fb4618e 100644
--- a/library/compiler-builtins/compiler-builtins/src/mem/impls.rs
+++ b/library/compiler-builtins/compiler-builtins/src/mem/impls.rs
@@ -9,7 +9,7 @@
 // ptr::add in these loops will wrap. And if compiler-builtins is compiled with cfg(ub_checks),
 // this will fail a UB check at runtime.
 //
-// Since this scenario is UB, we are within our rights hit this check and halt execution...
+// Since this scenario is UB, we are within our rights to hit this check and halt execution...
 // But we are also within our rights to try to make it work.
 // We use wrapping_add/wrapping_sub for pointer arithmetic in this module in an attempt to support
 // this use. Of course this is not a guarantee that such use will work, it just means that this
@@ -35,7 +35,7 @@ const WORD_COPY_THRESHOLD: usize = if 2 * WORD_SIZE > 16 {
     16
 };
 
-#[cfg(feature = "mem-unaligned")]
+#[cfg(mem_unaligned)]
 unsafe fn read_usize_unaligned(x: *const usize) -> usize {
     // Do not use `core::ptr::read_unaligned` here, since it calls `copy_nonoverlapping` which
     // is translated to memcpy in LLVM.
@@ -46,7 +46,7 @@ unsafe fn read_usize_unaligned(x: *const usize) -> usize {
 /// Loads a `T`-sized chunk from `src` into `dst` at offset `offset`, if that does not exceed
 /// `load_sz`. The offset pointers must both be `T`-aligned. Returns the new offset, advanced by the
 /// chunk size if a load happened.
-#[cfg(not(feature = "mem-unaligned"))]
+#[cfg(not(mem_unaligned))]
 #[inline(always)]
 unsafe fn load_chunk_aligned<T: Copy>(
     src: *const usize,
@@ -66,7 +66,7 @@ unsafe fn load_chunk_aligned<T: Copy>(
 /// Load `load_sz` many bytes from `src`, which must be usize-aligned. Acts as if we did a `usize`
 /// read with the out-of-bounds part filled with 0s.
 /// `load_sz` be strictly less than `WORD_SIZE`.
-#[cfg(not(feature = "mem-unaligned"))]
+#[cfg(not(mem_unaligned))]
 #[inline(always)]
 unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
     debug_assert!(load_sz < WORD_SIZE);
@@ -88,7 +88,7 @@ unsafe fn load_aligned_partial(src: *const usize, load_sz: usize) -> usize {
 /// `usize`-aligned. The bytes are returned as the *last* bytes of the return value, i.e., this acts
 /// as if we had done a `usize` read from `src`, with the out-of-bounds part filled with 0s.
 /// `load_sz` be strictly less than `WORD_SIZE`.
-#[cfg(not(feature = "mem-unaligned"))]
+#[cfg(not(mem_unaligned))]
 #[inline(always)]
 unsafe fn load_aligned_end_partial(src: *const usize, load_sz: usize) -> usize {
     debug_assert!(load_sz < WORD_SIZE);
@@ -136,7 +136,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
 
     /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
     /// `src` *must not* be `usize`-aligned.
-    #[cfg(not(feature = "mem-unaligned"))]
+    #[cfg(not(mem_unaligned))]
     #[inline(always)]
     unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
         debug_assert!(n > 0 && n % WORD_SIZE == 0);
@@ -185,7 +185,7 @@ pub unsafe fn copy_forward(mut dest: *mut u8, mut src: *const u8, mut n: usize)
 
     /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
     /// `src` *must not* be `usize`-aligned.
-    #[cfg(feature = "mem-unaligned")]
+    #[cfg(mem_unaligned)]
     #[inline(always)]
     unsafe fn copy_forward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
         let mut dest_usize = dest as *mut usize;
@@ -252,7 +252,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
 
     /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
     /// `src` *must not* be `usize`-aligned.
-    #[cfg(not(feature = "mem-unaligned"))]
+    #[cfg(not(mem_unaligned))]
     #[inline(always)]
     unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
         debug_assert!(n > 0 && n % WORD_SIZE == 0);
@@ -301,7 +301,7 @@ pub unsafe fn copy_backward(dest: *mut u8, src: *const u8, mut n: usize) {
 
     /// `n` is in units of bytes, but must be a multiple of the word size and must not be 0.
     /// `src` *must not* be `usize`-aligned.
-    #[cfg(feature = "mem-unaligned")]
+    #[cfg(mem_unaligned)]
     #[inline(always)]
     unsafe fn copy_backward_misaligned_words(dest: *mut u8, src: *const u8, n: usize) {
         let mut dest_usize = dest as *mut usize;
diff --git a/library/compiler-builtins/compiler-builtins/src/mem/mod.rs b/library/compiler-builtins/compiler-builtins/src/mem/mod.rs
index a227f60a2949b..ac41cd33416f6 100644
--- a/library/compiler-builtins/compiler-builtins/src/mem/mod.rs
+++ b/library/compiler-builtins/compiler-builtins/src/mem/mod.rs
@@ -4,10 +4,7 @@
 #![allow(unsafe_op_in_unsafe_fn)]
 
 // memcpy/memmove/memset have optimized implementations on some architectures
-#[cfg_attr(
-    all(not(feature = "no-asm"), target_arch = "x86_64"),
-    path = "x86_64.rs"
-)]
+#[cfg_attr(all(feature = "arch", target_arch = "x86_64"), path = "x86_64.rs")]
 mod impls;
 
 intrinsics! {
diff --git a/library/compiler-builtins/compiler-builtins/src/probestack.rs b/library/compiler-builtins/compiler-builtins/src/probestack.rs
index 1cab64ea113c5..c4a2eeb0e0178 100644
--- a/library/compiler-builtins/compiler-builtins/src/probestack.rs
+++ b/library/compiler-builtins/compiler-builtins/src/probestack.rs
@@ -41,7 +41,7 @@
 //! probes on any other architecture like ARM or PowerPC64. LLVM I'm sure would
 //! be more than welcome to accept such a change!
 
-#![cfg(not(feature = "mangled-names"))]
+#![cfg(feature = "unmangled-names")]
 // Windows and Cygwin already has builtins to do this.
 #![cfg(not(any(windows, target_os = "cygwin")))]
 // We only define stack probing for these architectures today.
diff --git a/library/compiler-builtins/compiler-builtins/src/riscv.rs b/library/compiler-builtins/compiler-builtins/src/riscv.rs
deleted file mode 100644
index bf31255334193..0000000000000
--- a/library/compiler-builtins/compiler-builtins/src/riscv.rs
+++ /dev/null
@@ -1,50 +0,0 @@
-intrinsics! {
-    // Ancient Egyptian/Ethiopian/Russian multiplication method
-    // see https://en.wikipedia.org/wiki/Ancient_Egyptian_multiplication
-    //
-    // This is a long-available stock algorithm; e.g. it is documented in
-    // Knuth's "The Art of Computer Programming" volume 2 (under the section
-    // "Evaluation of Powers") since at least the 2nd edition (1981).
-    //
-    // The main attraction of this method is that it implements (software)
-    // multiplication atop four simple operations: doubling, halving, checking
-    // if a value is even/odd, and addition. This is *not* considered to be the
-    // fastest multiplication method, but it may be amongst the simplest (and
-    // smallest with respect to code size).
-    //
-    // for reference, see also implementation from gcc
-    // https://raw.githubusercontent.com/gcc-mirror/gcc/master/libgcc/config/epiphany/mulsi3.c
-    //
-    // and from LLVM (in relatively readable RISC-V assembly):
-    // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/riscv/int_mul_impl.inc
-    pub extern "C" fn __mulsi3(a: u32, b: u32) -> u32 {
-        let (mut a, mut b) = (a, b);
-        let mut r: u32 = 0;
-
-        while a > 0 {
-            if a & 1 > 0 {
-                r = r.wrapping_add(b);
-            }
-            a >>= 1;
-            b <<= 1;
-        }
-
-        r
-    }
-
-    #[cfg(not(target_feature = "m"))]
-    pub extern "C" fn __muldi3(a: u64, b: u64) -> u64 {
-        let (mut a, mut b) = (a, b);
-        let mut r: u64 = 0;
-
-        while a > 0 {
-            if a & 1 > 0 {
-                r = r.wrapping_add(b);
-            }
-            a >>= 1;
-            b <<= 1;
-        }
-
-        r
-    }
-}
diff --git a/library/compiler-builtins/crates/api-list-common/Cargo.toml b/library/compiler-builtins/crates/api-list-common/Cargo.toml
new file mode 100644
index 0000000000000..bf9262eb828ff
--- /dev/null
+++ b/library/compiler-builtins/crates/api-list-common/Cargo.toml
@@ -0,0 +1,6 @@
+[package]
+name = "api-list-common"
+version = "0.1.0"
+edition = "2024"
+publish = false
+license = "MIT OR Apache-2.0"
diff --git a/library/compiler-builtins/crates/api-list-common/src/lib.rs b/library/compiler-builtins/crates/api-list-common/src/lib.rs
new file mode 100644
index 0000000000000..3e5868e752bcb
--- /dev/null
+++ b/library/compiler-builtins/crates/api-list-common/src/lib.rs
@@ -0,0 +1,1557 @@
+//! A list of API we have available, shared among various test crates.
+
+use std::fmt;
+use std::sync::LazyLock;
+
+/// Convenient structure for defining items, gets expanded into a flat structure.
+struct NestedOp {
+    rust_sig: Signature,
+    c_sig: Option<Signature>,
+    fn_list: &'static [&'static str],
+    scope: OpScope,
+}
+
+/// Indicate where a function is defined and whether it is public or private.
+#[derive(Clone, Copy, Debug)]
+pub enum OpScope {
+    /// Part of `libm`'s public API.
+    LibmPublic,
+    /// Functions internal to `libm`, e.g. `rem_pio2`.
+    LibmPrivate,
+    /// Functions part of the public API for `compiler-builtins`.
+    BuiltinsPublic,
+}
+
+impl OpScope {
+    /// Where we should look for functions of this scope.
+    pub const fn path_root(self) -> &'static str {
+        match self {
+            OpScope::LibmPublic => "libm",
+            OpScope::LibmPrivate => todo!(),
+            OpScope::BuiltinsPublic => "crate::builtins_wrapper",
+        }
+    }
+
+    pub fn defined_in_compiler_builtins(self) -> bool {
+        match self {
+            OpScope::LibmPublic | OpScope::LibmPrivate => false,
+            OpScope::BuiltinsPublic => true,
+        }
+    }
+}
+
+/// We need a flat list to work with most of the time, but define things as a more convenient
+/// nested list.
+const ALL_OPERATIONS_NESTED: &[NestedOp] = &[
+    /********************************
+     * compiler-builtins operations *
+     ********************************/
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F16, Ty::F16],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &["addf16", "mulf16", "subf16"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::F32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["addf32", "divf32", "mulf32", "subf32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::F64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["addf64", "divf64", "mulf64", "subf64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128, Ty::F128],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["addf128", "divf128", "mulf128", "subf128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::I32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["powif32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::I32],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["powif64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128, Ty::I32],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["powif128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    /* Comparison */
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F16, Ty::F16],
+            returns: &[Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &[
+            "eqf16", "gef16", "gtf16", "lef16", "ltf16", "nef16", "unordf16",
+        ],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::F32],
+            returns: &[Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &[
+            "eqf32", "gef32", "gtf32", "lef32", "ltf32", "nef32", "unordf32",
+        ],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::F64],
+            returns: &[Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &[
+            "eqf64", "gef64", "gtf64", "lef64", "ltf64", "nef64", "unordf64",
+        ],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128, Ty::F128],
+            returns: &[Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &[
+            "eqf128",
+            "gef128",
+            "gtf128",
+            "lef128",
+            "ltf128",
+            "nef128",
+            "unordf128",
+        ],
+        scope: OpScope::BuiltinsPublic,
+    },
+    /* conversion */
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F16],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["extend_f16_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F16],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["extend_f16_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F16],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["extend_f16_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["extend_f32_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["extend_f32_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["extend_f64_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &["narrow_f32_f16"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &["narrow_f64_f16"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &["narrow_f128_f16"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["narrow_f64_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["narrow_f128_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["narrow_f128_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f32_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::I64],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f32_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::I128],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f32_i128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f64_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::I64],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f64_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::I128],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f64_i128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f128_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::I64],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f128_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::I128],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f128_i128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::U32],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f32_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::U64],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f32_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::U128],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f32_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::U32],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f64_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::U64],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f64_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::U128],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f64_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::U32],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f128_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::U64],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f128_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::U128],
+        },
+        c_sig: None,
+        fn_list: &["ftoi_f128_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["itof_i32_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["itof_i64_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["itof_i128_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["itof_i32_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["itof_i64_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["itof_i128_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["itof_i32_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["itof_i64_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["itof_i128_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["itof_u32_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["itof_u64_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["itof_u128_f32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["itof_u32_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["itof_u64_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["itof_u128_f64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["itof_u32_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["itof_u64_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["itof_u128_f128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    /* int arithmetic */
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32, Ty::I32],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["idiv_i32", "imod_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32, Ty::I32],
+            returns: &[Ty::I32, Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["idivmod_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32, Ty::I32],
+            returns: &[Ty::I32, Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &["imulo_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32, Ty::U32],
+            returns: &[Ty::U32],
+        },
+        c_sig: None,
+        fn_list: &["idiv_u32", "imod_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32, Ty::U32],
+            returns: &[Ty::U32, Ty::U32],
+        },
+        c_sig: None,
+        fn_list: &["idivmod_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64, Ty::I64],
+            returns: &[Ty::I64],
+        },
+        c_sig: None,
+        fn_list: &["idiv_i64", "imod_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64, Ty::I64],
+            returns: &[Ty::I64, Ty::I64],
+        },
+        c_sig: None,
+        fn_list: &["idivmod_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64, Ty::U64],
+            returns: &[Ty::U64],
+        },
+        c_sig: None,
+        fn_list: &["idiv_u64", "imod_u64", "imul_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64, Ty::U64],
+            returns: &[Ty::U64, Ty::U64],
+        },
+        c_sig: None,
+        fn_list: &["idivmod_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64, Ty::I64],
+            returns: &[Ty::I64, Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &["imulo_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128, Ty::I128],
+            returns: &[Ty::I128],
+        },
+        c_sig: None,
+        fn_list: &[
+            "iadd_i128",
+            "idiv_i128",
+            "imod_i128",
+            "imul_i128",
+            "isub_i128",
+        ],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128, Ty::I128],
+            returns: &[Ty::I128, Ty::I128],
+        },
+        c_sig: None,
+        fn_list: &["idivmod_i128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128, Ty::I128],
+            returns: &[Ty::I128, Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &["iaddo_i128", "imulo_i128", "isubo_i128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128, Ty::U128],
+            returns: &[Ty::U128],
+        },
+        c_sig: None,
+        fn_list: &["iadd_u128", "idiv_u128", "imod_u128", "isub_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128, Ty::U128],
+            returns: &[Ty::U128, Ty::U128],
+        },
+        c_sig: None,
+        fn_list: &["idivmod_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128, Ty::U128],
+            returns: &[Ty::U128, Ty::Bool],
+        },
+        c_sig: None,
+        fn_list: &["iaddo_u128", "imulo_u128", "isubo_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    /* int shifts */
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32, Ty::U32],
+            returns: &[Ty::U32],
+        },
+        c_sig: None,
+        fn_list: &["ashl_u32", "lshr_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64, Ty::U32],
+            returns: &[Ty::U64],
+        },
+        c_sig: None,
+        fn_list: &["ashl_u64", "lshr_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128, Ty::U32],
+            returns: &[Ty::U128],
+        },
+        c_sig: None,
+        fn_list: &["ashl_u128", "lshr_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I32, Ty::U32],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ashr_i32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I64, Ty::U32],
+            returns: &[Ty::I64],
+        },
+        c_sig: None,
+        fn_list: &["ashr_i64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::I128, Ty::U32],
+            returns: &[Ty::I128],
+        },
+        c_sig: None,
+        fn_list: &["ashr_i128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    /* int bitwise ops */
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32],
+            returns: &[Ty::USize],
+        },
+        c_sig: None,
+        fn_list: &["leading_zeros_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64],
+            returns: &[Ty::USize],
+        },
+        c_sig: None,
+        fn_list: &["leading_zeros_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128],
+            returns: &[Ty::USize],
+        },
+        c_sig: None,
+        fn_list: &["leading_zeros_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U32],
+            returns: &[Ty::USize],
+        },
+        c_sig: None,
+        fn_list: &["trailing_zeros_u32"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U64],
+            returns: &[Ty::USize],
+        },
+        c_sig: None,
+        fn_list: &["trailing_zeros_u64"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    NestedOp {
+        rust_sig: Signature {
+            args: &[Ty::U128],
+            returns: &[Ty::USize],
+        },
+        c_sig: None,
+        fn_list: &["trailing_zeros_u128"],
+        scope: OpScope::BuiltinsPublic,
+    },
+    /*******************
+     * libm operations *
+     *******************/
+    NestedOp {
+        // `fn(f16) -> f16`
+        rust_sig: Signature {
+            args: &[Ty::F16],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &[
+            "ceilf16",
+            "fabsf16",
+            "floorf16",
+            "rintf16",
+            "roundevenf16",
+            "roundf16",
+            "sqrtf16",
+            "truncf16",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `fn(f32) -> f32`
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &[
+            "acosf",
+            "acoshf",
+            "asinf",
+            "asinhf",
+            "atanf",
+            "atanhf",
+            "cbrtf",
+            "ceilf",
+            "cosf",
+            "coshf",
+            "erfcf",
+            "erff",
+            "exp10f",
+            "exp2f",
+            "expf",
+            "expm1f",
+            "fabsf",
+            "floorf",
+            "j0f",
+            "j1f",
+            "lgammaf",
+            "log10f",
+            "log1pf",
+            "log2f",
+            "logf",
+            "rintf",
+            "roundevenf",
+            "roundf",
+            "sinf",
+            "sinhf",
+            "sqrtf",
+            "tanf",
+            "tanhf",
+            "tgammaf",
+            "truncf",
+            "y0f",
+            "y1f",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64) -> f64`
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &[
+            "acos",
+            "acosh",
+            "asin",
+            "asinh",
+            "atan",
+            "atanh",
+            "cbrt",
+            "ceil",
+            "cos",
+            "cosh",
+            "erf",
+            "erfc",
+            "exp",
+            "exp10",
+            "exp2",
+            "expm1",
+            "fabs",
+            "floor",
+            "j0",
+            "j1",
+            "lgamma",
+            "log",
+            "log10",
+            "log1p",
+            "log2",
+            "rint",
+            "round",
+            "roundeven",
+            "sin",
+            "sinh",
+            "sqrt",
+            "tan",
+            "tanh",
+            "tgamma",
+            "trunc",
+            "y0",
+            "y1",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `fn(f128) -> f128`
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &[
+            "ceilf128",
+            "fabsf128",
+            "floorf128",
+            "rintf128",
+            "roundevenf128",
+            "roundf128",
+            "sqrtf128",
+            "truncf128",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f16, f16) -> f16`
+        rust_sig: Signature {
+            args: &[Ty::F16, Ty::F16],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &[
+            "copysignf16",
+            "fdimf16",
+            "fmaxf16",
+            "fmaximum_numf16",
+            "fmaximumf16",
+            "fminf16",
+            "fminimum_numf16",
+            "fminimumf16",
+            "fmodf16",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, f32) -> f32`
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::F32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &[
+            "atan2f",
+            "copysignf",
+            "fdimf",
+            "fmaxf",
+            "fmaximum_numf",
+            "fmaximumf",
+            "fminf",
+            "fminimum_numf",
+            "fminimumf",
+            "fmodf",
+            "hypotf",
+            "nextafterf",
+            "powf",
+            "remainderf",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, f64) -> f64`
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::F64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &[
+            "atan2",
+            "copysign",
+            "fdim",
+            "fmax",
+            "fmaximum",
+            "fmaximum_num",
+            "fmin",
+            "fminimum",
+            "fminimum_num",
+            "fmod",
+            "hypot",
+            "nextafter",
+            "pow",
+            "remainder",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f128, f128) -> f128`
+        rust_sig: Signature {
+            args: &[Ty::F128, Ty::F128],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &[
+            "copysignf128",
+            "fdimf128",
+            "fmaxf128",
+            "fmaximum_numf128",
+            "fmaximumf128",
+            "fminf128",
+            "fminimum_numf128",
+            "fminimumf128",
+            "fmodf128",
+        ],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, f32, f32) -> f32`
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::F32, Ty::F32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["fmaf"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, f64, f64) -> f64`
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::F64, Ty::F64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["fma"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f128, f128, f128) -> f128`
+        rust_sig: Signature {
+            args: &[Ty::F128, Ty::F128, Ty::F128],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["fmaf128"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f16) -> i32`
+        rust_sig: Signature {
+            args: &[Ty::F16],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ilogbf16"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32) -> i32`
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ilogbf"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64) -> i32`
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ilogb"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f128) -> i32`
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::I32],
+        },
+        c_sig: None,
+        fn_list: &["ilogbf128"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(i32, f32) -> f32`
+        rust_sig: Signature {
+            args: &[Ty::I32, Ty::F32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["jnf", "ynf"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(i32, f64) -> f64`
+        rust_sig: Signature {
+            args: &[Ty::I32, Ty::F64],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["jn", "yn"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f16, i32) -> f16`
+        rust_sig: Signature {
+            args: &[Ty::F16, Ty::I32],
+            returns: &[Ty::F16],
+        },
+        c_sig: None,
+        fn_list: &["ldexpf16", "scalbnf16"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, i32) -> f32`
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::I32],
+            returns: &[Ty::F32],
+        },
+        c_sig: None,
+        fn_list: &["ldexpf", "scalbnf"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, i64) -> f64`
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::I32],
+            returns: &[Ty::F64],
+        },
+        c_sig: None,
+        fn_list: &["ldexp", "scalbn"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f128, i32) -> f128`
+        rust_sig: Signature {
+            args: &[Ty::F128, Ty::I32],
+            returns: &[Ty::F128],
+        },
+        c_sig: None,
+        fn_list: &["ldexpf128", "scalbnf128"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F32, Ty::F32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F32, Ty::MutF32],
+            returns: &[Ty::F32],
+        }),
+        fn_list: &["modff"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F64, Ty::F64],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F64, Ty::MutF64],
+            returns: &[Ty::F64],
+        }),
+        fn_list: &["modf"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f16, &mut c_int) -> f16` as `(f16) -> (f16, i32)`
+        rust_sig: Signature {
+            args: &[Ty::F16],
+            returns: &[Ty::F16, Ty::I32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F16, Ty::MutCInt],
+            returns: &[Ty::F16],
+        }),
+        fn_list: &["frexpf16"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F32, Ty::I32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F32, Ty::MutCInt],
+            returns: &[Ty::F32],
+        }),
+        fn_list: &["frexpf", "lgammaf_r"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F64, Ty::I32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F64, Ty::MutCInt],
+            returns: &[Ty::F64],
+        }),
+        fn_list: &["frexp", "lgamma_r"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f128, &mut c_int) -> f128` as `(f128) -> (f128, i32)`
+        rust_sig: Signature {
+            args: &[Ty::F128],
+            returns: &[Ty::F128, Ty::I32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F128, Ty::MutCInt],
+            returns: &[Ty::F128],
+        }),
+        fn_list: &["frexpf128"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
+        rust_sig: Signature {
+            args: &[Ty::F32, Ty::F32],
+            returns: &[Ty::F32, Ty::I32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F32, Ty::F32, Ty::MutCInt],
+            returns: &[Ty::F32],
+        }),
+        fn_list: &["remquof"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
+        rust_sig: Signature {
+            args: &[Ty::F64, Ty::F64],
+            returns: &[Ty::F64, Ty::I32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F64, Ty::F64, Ty::MutCInt],
+            returns: &[Ty::F64],
+        }),
+        fn_list: &["remquo"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
+        rust_sig: Signature {
+            args: &[Ty::F32],
+            returns: &[Ty::F32, Ty::F32],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F32, Ty::MutF32, Ty::MutF32],
+            returns: &[],
+        }),
+        fn_list: &["sincosf"],
+        scope: OpScope::LibmPublic,
+    },
+    NestedOp {
+        // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
+        rust_sig: Signature {
+            args: &[Ty::F64],
+            returns: &[Ty::F64, Ty::F64],
+        },
+        c_sig: Some(Signature {
+            args: &[Ty::F64, Ty::MutF64, Ty::MutF64],
+            returns: &[],
+        }),
+        fn_list: &["sincos"],
+        scope: OpScope::LibmPublic,
+    },
+];
+
+/// A type used in a function signature.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Ty {
+    F16,
+    F32,
+    F64,
+    F128,
+    I32,
+    I64,
+    I128,
+    U32,
+    U64,
+    U128,
+    USize,
+    Bool,
+    CInt,
+    MutF16,
+    MutF32,
+    MutF64,
+    MutF128,
+    MutI32,
+    MutCInt,
+}
+
+impl Ty {
+    /// The number of bits needed to represent this type's possible values. That is,
+    /// `log2(variant_count)`.
+    pub fn effective_bits(self) -> u32 {
+        match self {
+            Ty::Bool => 1,
+            Ty::F16 | Ty::MutF16 => 16,
+            Ty::F32 | Ty::I32 | Ty::U32 | Ty::MutF32 | Ty::MutI32 => 32,
+            Ty::F64 | Ty::I64 | Ty::U64 | Ty::MutF64 => 64,
+            Ty::F128 | Ty::I128 | Ty::U128 | Ty::MutF128 => 128,
+            Ty::USize => usize::BITS,
+            // Assume we're not testing on a 16-bit system
+            Ty::CInt | Ty::MutCInt => 32,
+        }
+    }
+
+    /// How to group functions that mostly have this kind of input.
+    fn group(self) -> Group {
+        match self {
+            Ty::F16 | Ty::MutF16 => Group::F16,
+            Ty::F32 | Ty::MutF32 => Group::F32,
+            Ty::F64 | Ty::MutF64 => Group::F64,
+            Ty::F128 | Ty::MutF128 => Group::F128,
+            Ty::I32
+            | Ty::I64
+            | Ty::I128
+            | Ty::U32
+            | Ty::U64
+            | Ty::U128
+            | Ty::USize
+            | Ty::Bool
+            | Ty::CInt
+            | Ty::MutI32
+            | Ty::MutCInt => Group::Integer,
+        }
+    }
+
+    fn is_float(self) -> bool {
+        match self {
+            Ty::F16
+            | Ty::F32
+            | Ty::F64
+            | Ty::F128
+            | Ty::MutF16
+            | Ty::MutF32
+            | Ty::MutF64
+            | Ty::MutF128 => true,
+            Ty::I32
+            | Ty::I64
+            | Ty::I128
+            | Ty::U32
+            | Ty::U64
+            | Ty::U128
+            | Ty::USize
+            | Ty::Bool
+            | Ty::CInt
+            | Ty::MutI32
+            | Ty::MutCInt => false,
+        }
+    }
+}
+
+/// How a function should get grouped for things like extensive tests.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
+pub enum Group {
+    F16,
+    F32,
+    F64,
+    F128,
+    Integer,
+}
+
+impl fmt::Display for Ty {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let s = match self {
+            Ty::F16 => "f16",
+            Ty::F32 => "f32",
+            Ty::F64 => "f64",
+            Ty::F128 => "f128",
+            Ty::I32 => "i32",
+            Ty::I64 => "i64",
+            Ty::I128 => "i128",
+            Ty::U32 => "u32",
+            Ty::U64 => "u64",
+            Ty::U128 => "u128",
+            Ty::USize => "usize",
+            Ty::Bool => "bool",
+            Ty::CInt => "::core::ffi::c_int",
+            Ty::MutF16 => "&mut f16",
+            Ty::MutF32 => "&mut f32",
+            Ty::MutF64 => "&mut f64",
+            Ty::MutF128 => "&mut f128",
+            Ty::MutI32 => "&mut i32",
+            Ty::MutCInt => "&mut ::core::ffi::c_int",
+        };
+        f.write_str(s)
+    }
+}
+
+/// Representation of e.g. `(f32, f32) -> f32`
+#[derive(Debug, Clone)]
+pub struct Signature {
+    pub args: &'static [Ty],
+    pub returns: &'static [Ty],
+}
+
+/// Combined information about a function implementation.
+#[derive(Debug, Clone)]
+pub struct MathOpInfo {
+    pub name: &'static str,
+    /// How this function should be grouped when needed. Based on the first float argument.
+    pub group: Group,
+    /// Function signature for C implementations
+    pub c_sig: Signature,
+    /// Function signature for Rust implementations
+    pub rust_sig: Signature,
+    /// Indicate what crate this function is defined in and whether it is public or private.
+    pub scope: OpScope,
+    /// The path to this function, including crate but excluding the function itself.
+    pub path: String,
+}
+
+/// A flat representation of `ALL_FUNCTIONS`.
+pub static ALL_OPERATIONS: LazyLock<Vec<MathOpInfo>> = LazyLock::new(|| {
+    let mut ret = Vec::new();
+
+    for op in ALL_OPERATIONS_NESTED {
+        let fn_names = op.fn_list;
+        for name in fn_names {
+            // Locate the first float argument or return value, fall back to whatever the first
+            // argument is if there are no floats.
+            let group_ty = op
+                .rust_sig
+                .args
+                .iter()
+                .chain(op.rust_sig.returns.iter())
+                .find(|arg| arg.is_float())
+                .unwrap_or(&op.rust_sig.args[0]);
+            let group = group_ty.group();
+
+            let api = MathOpInfo {
+                name,
+                group,
+                rust_sig: op.rust_sig.clone(),
+                c_sig: op.c_sig.clone().unwrap_or_else(|| op.rust_sig.clone()),
+                scope: op.scope,
+                path: format!("{}::{name}", op.scope.path_root()),
+            };
+            ret.push(api);
+        }
+    }
+
+    ret.sort_by_key(|item| item.name);
+    ret
+});
+
+#[cfg(test)]
+mod tests {
+    use std::collections::HashSet;
+
+    pub use super::*;
+
+    #[test]
+    fn sorted_fn_list() {
+        for op in ALL_OPERATIONS_NESTED {
+            if !op.fn_list.is_sorted() {
+                let mut sorted = (*op.fn_list).to_owned();
+                sorted.sort_unstable();
+                panic!(
+                    "names list is not sorted: {:?}\nExpected: {sorted:?}",
+                    op.fn_list
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn no_duplicates_in_list() {
+        let mut names = HashSet::new();
+        let mut paths = HashSet::new();
+        for item in &*ALL_OPERATIONS {
+            let new_name = names.insert(item.name);
+            assert!(new_name, "duplicate name `{item:?}`");
+            let new_path = paths.insert(&item.path);
+            assert!(new_path, "duplicate path`{item:?}`");
+        }
+    }
+}
diff --git a/library/compiler-builtins/crates/libm-macros/Cargo.toml b/library/compiler-builtins/crates/libm-macros/Cargo.toml
index f99a92e21c709..920cc32104943 100644
--- a/library/compiler-builtins/crates/libm-macros/Cargo.toml
+++ b/library/compiler-builtins/crates/libm-macros/Cargo.toml
@@ -9,6 +9,7 @@ license = "MIT OR Apache-2.0"
 proc-macro = true
 
 [dependencies]
+api-list-common.workspace = true
 heck.workspace = true
 proc-macro2.workspace = true
 quote.workspace = true
diff --git a/library/compiler-builtins/crates/libm-macros/src/enums.rs b/library/compiler-builtins/crates/libm-macros/src/enums.rs
index b4646f984d471..f15bdaa63821a 100644
--- a/library/compiler-builtins/crates/libm-macros/src/enums.rs
+++ b/library/compiler-builtins/crates/libm-macros/src/enums.rs
@@ -93,11 +93,6 @@ pub fn function_enum(
                     #( #base_arms, )*
                 }
             }
-
-            /// Return information about this operation.
-            pub fn math_op(self) -> &'static crate::op::MathOpInfo {
-                crate::op::ALL_OPERATIONS.iter().find(|op| op.name == self.as_str()).unwrap()
-            }
         }
     };
 
diff --git a/library/compiler-builtins/crates/libm-macros/src/lib.rs b/library/compiler-builtins/crates/libm-macros/src/lib.rs
index 7efa1488f570e..7bafd0ce8e1d4 100644
--- a/library/compiler-builtins/crates/libm-macros/src/lib.rs
+++ b/library/compiler-builtins/crates/libm-macros/src/lib.rs
@@ -1,18 +1,17 @@
 mod enums;
 mod parse;
-mod shared;
 
+use api_list_common::{ALL_OPERATIONS, Group, MathOpInfo, Ty};
 use parse::{Invocation, StructuredInput};
 use proc_macro as pm;
 use proc_macro2::{self as pm2, Span};
 use quote::{ToTokens, quote};
-pub(crate) use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
 use syn::spanned::Spanned;
 use syn::visit_mut::VisitMut;
-use syn::{Ident, ItemEnum};
+use syn::{Ident, ItemEnum, PathArguments, PathSegment};
 
 const KNOWN_TYPES: &[&str] = &[
-    "FTy", "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", "public",
+    "CFn", "CArgs", "CRet", "RustFn", "RustArgs", "RustRet", "path",
 ];
 
 /// Populate an enum with a variant representing function. Names are in upper camel case.
@@ -66,8 +65,6 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p
 ///     (
 ///         // Name of that function
 ///         fn_name: $fn_name:ident,
-///         // The basic float type for this function (e.g. `f32`, `f64`)
-///         FTy: $FTy:ty,
 ///         // Function signature of the C version (e.g. `fn(f32, &mut f32) -> f32`)
 ///         CFn: $CFn:ty,
 ///         // A tuple representing the C version's arguments (e.g. `(f32, &mut f32)`)
@@ -80,8 +77,8 @@ pub fn base_name_enum(attributes: pm::TokenStream, tokens: pm::TokenStream) -> p
 ///         RustArgs: $RustArgs:ty,
 ///         // The Rust version's return type (e.g. `(f32, f32)`)
 ///         RustRet: $RustRet:ty,
-///         // True if this is part of `libm`'s public API
-///         public: $public:expr,
+///         // Path to the function, e.g. `libm::fma` or `crate::builtins_wrapper::addf32`.
+///         path: $path:path,
 ///         // Attributes for the current function, if any
 ///         attrs: [$($attr:meta),*],
 ///         // Extra tokens passed directly (if any)
@@ -143,11 +140,12 @@ pub fn for_each_function(tokens: pm::TokenStream) -> pm::TokenStream {
 fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>> {
     // Replace magic mappers with a list of relevant functions.
     if let Some(map) = &mut input.fn_extra {
-        for (name, ty) in [
-            ("ALL_F16", FloatTy::F16),
-            ("ALL_F32", FloatTy::F32),
-            ("ALL_F64", FloatTy::F64),
-            ("ALL_F128", FloatTy::F128),
+        for (name, group) in [
+            ("ALL_F16", Group::F16),
+            ("ALL_F32", Group::F32),
+            ("ALL_F64", Group::F64),
+            ("ALL_F128", Group::F128),
+            ("ALL_INT", Group::Integer),
         ] {
             let Some(k) = map.keys().find(|key| *key == name) else {
                 continue;
@@ -156,7 +154,19 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>
             let key = k.clone();
             let val = map.remove(&key).unwrap();
 
-            for op in ALL_OPERATIONS.iter().filter(|op| op.float_ty == ty) {
+            for op in ALL_OPERATIONS.iter().filter(|op| op.group == group) {
+                map.insert(Ident::new(op.name, key.span()), val.clone());
+            }
+        }
+
+        if let Some(k) = map.keys().find(|key| *key == "ALL_BUILTINS") {
+            let key = k.clone();
+            let val = map.remove(&key).unwrap();
+
+            for op in ALL_OPERATIONS
+                .iter()
+                .filter(|op| op.scope.defined_in_compiler_builtins())
+            {
                 map.insert(Ident::new(op.name, key.span()), val.clone());
             }
         }
@@ -220,8 +230,23 @@ fn validate(input: &mut StructuredInput) -> syn::Result<Vec<&'static MathOpInfo>
         }
 
         // Omit f16 and f128 functions if requested
-        if input.skip_f16_f128 && (func.float_ty == FloatTy::F16 || func.float_ty == FloatTy::F128)
-        {
+        if input.skip_f16_f128 {
+            if matches!(func.group, Group::F16 | Group::F128) {
+                continue;
+            }
+
+            if func
+                .rust_sig
+                .args
+                .iter()
+                .chain(func.rust_sig.returns.iter())
+                .any(|ty| matches!(ty, Ty::F16 | Ty::F128))
+            {
+                continue;
+            }
+        }
+
+        if input.skip_builtins && func.scope.defined_in_compiler_builtins() {
             continue;
         }
 
@@ -356,25 +381,33 @@ fn expand(input: StructuredInput, fn_list: &[&MathOpInfo]) -> syn::Result<pm2::T
             None => pm2::TokenStream::new(),
         };
 
-        let base_fty = func.float_ty;
-        let c_args = &func.c_sig.args;
-        let c_ret = &func.c_sig.returns;
-        let rust_args = &func.rust_sig.args;
-        let rust_ret = &func.rust_sig.returns;
-        let public = func.public;
+        let path = syn::Path {
+            leading_colon: None,
+            segments: func
+                .path
+                .split("::")
+                .map(|pseg| PathSegment {
+                    ident: Ident::new(pseg, Span::call_site()),
+                    arguments: PathArguments::None,
+                })
+                .collect(),
+        };
 
         let mut ty_fields = Vec::new();
         for ty in &input.emit_types {
+            let c_args = func.c_sig.args.iter().copied().map(ty_to_tokens);
+            let c_ret = func.c_sig.returns.iter().copied().map(ty_to_tokens);
+            let rust_args = func.rust_sig.args.iter().copied().map(ty_to_tokens);
+            let rust_ret = func.rust_sig.returns.iter().copied().map(ty_to_tokens);
             let field = match ty.to_string().as_str() {
-                "FTy" => quote! { FTy: #base_fty, },
                 "CFn" => quote! { CFn: fn( #(#c_args),* ,) -> ( #(#c_ret),* ), },
                 "CArgs" => quote! { CArgs: ( #(#c_args),* ,), },
                 "CRet" => quote! { CRet: ( #(#c_ret),* ), },
                 "RustFn" => quote! { RustFn: fn( #(#rust_args),* ,) -> ( #(#rust_ret),* ), },
                 "RustArgs" => quote! { RustArgs: ( #(#rust_args),* ,), },
                 "RustRet" => quote! { RustRet: ( #(#rust_ret),* ), },
-                "public" => quote! { public: #public, },
-                _ => unreachable!("checked in validation"),
+                "path" => quote! { path: #path, },
+                _ => unreachable!("fields should be checked in validation"),
             };
             ty_fields.push(field);
         }
@@ -450,7 +483,7 @@ impl VisitMut for MacroReplace {
 /// Return the unsuffixed version of a function name; e.g. `abs` and `absf` both return `abs`,
 /// `lgamma_r` and `lgammaf_r` both return `lgamma_r`.
 fn base_name(name: &str) -> &str {
-    let known_mappings = &[
+    let known_mappings = [
         ("erff", "erf"),
         ("erf", "erf"),
         ("lgammaf_r", "lgamma_r"),
@@ -458,45 +491,55 @@ fn base_name(name: &str) -> &str {
         ("modf", "modf"),
     ];
 
-    match known_mappings.iter().find(|known| known.0 == name) {
-        Some(found) => found.1,
-        None => name
-            .strip_suffix("f")
-            .or_else(|| name.strip_suffix("f16"))
-            .or_else(|| name.strip_suffix("f128"))
-            .unwrap_or(name),
+    if let Some(found) = known_mappings.iter().find(|known| known.0 == name) {
+        return found.1;
     }
-}
 
-impl ToTokens for Ty {
-    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
-        let ts = match self {
-            Ty::F16 => quote! { f16 },
-            Ty::F32 => quote! { f32 },
-            Ty::F64 => quote! { f64 },
-            Ty::F128 => quote! { f128 },
-            Ty::I32 => quote! { i32 },
-            Ty::CInt => quote! { ::core::ffi::c_int },
-            Ty::MutF16 => quote! { &'a mut f16 },
-            Ty::MutF32 => quote! { &'a mut f32 },
-            Ty::MutF64 => quote! { &'a mut f64 },
-            Ty::MutF128 => quote! { &'a mut f128 },
-            Ty::MutI32 => quote! { &'a mut i32 },
-            Ty::MutCInt => quote! { &'a mut core::ffi::c_int },
-        };
+    // Attempt to strip unambiguous suffixes first. This is repeated so e.g.
+    // `extend_f32_f64` turns into `extend`.
+    let strip = [
+        "_f16", "_f32", "_f64", "_f128", "_i32", "_i64", "_i128", "_u32", "_u64", "_u128", "f16",
+        "f32", "f64", "f128",
+    ];
 
-        tokens.extend(ts);
+    let mut any_found = false;
+    let mut ret = name;
+
+    for sfx in strip {
+        if let Some(stripped) = ret.strip_suffix(sfx) {
+            ret = stripped;
+            any_found = true;
+        }
+    }
+
+    // Only if no suffix was stripped, try stripping the C-style float suffix.
+    if !any_found && let Some(stripped) = ret.strip_suffix("f") {
+        ret = stripped;
     }
+
+    ret
 }
-impl ToTokens for FloatTy {
-    fn to_tokens(&self, tokens: &mut pm2::TokenStream) {
-        let ts = match self {
-            FloatTy::F16 => quote! { f16 },
-            FloatTy::F32 => quote! { f32 },
-            FloatTy::F64 => quote! { f64 },
-            FloatTy::F128 => quote! { f128 },
-        };
 
-        tokens.extend(ts);
+fn ty_to_tokens(ty: Ty) -> pm2::TokenStream {
+    match ty {
+        Ty::F16 => quote! { f16 },
+        Ty::F32 => quote! { f32 },
+        Ty::F64 => quote! { f64 },
+        Ty::F128 => quote! { f128 },
+        Ty::I32 => quote! { i32 },
+        Ty::I64 => quote! { i64 },
+        Ty::I128 => quote! { i128 },
+        Ty::U32 => quote! { u32 },
+        Ty::U64 => quote! { u64 },
+        Ty::U128 => quote! { u128 },
+        Ty::USize => quote! { usize },
+        Ty::Bool => quote! { bool },
+        Ty::CInt => quote! { ::core::ffi::c_int },
+        Ty::MutF16 => quote! { &'a mut f16 },
+        Ty::MutF32 => quote! { &'a mut f32 },
+        Ty::MutF64 => quote! { &'a mut f64 },
+        Ty::MutF128 => quote! { &'a mut f128 },
+        Ty::MutI32 => quote! { &'a mut i32 },
+        Ty::MutCInt => quote! { &'a mut core::ffi::c_int },
     }
 }
diff --git a/library/compiler-builtins/crates/libm-macros/src/parse.rs b/library/compiler-builtins/crates/libm-macros/src/parse.rs
index 4876f3ef7263a..97067b876bda6 100644
--- a/library/compiler-builtins/crates/libm-macros/src/parse.rs
+++ b/library/compiler-builtins/crates/libm-macros/src/parse.rs
@@ -50,9 +50,13 @@ pub struct StructuredInput {
     pub emit_types: Vec<Ident>,
     /// Skip these functions
     pub skip: Vec<Ident>,
-    /// If true, omit f16 and f128 functions that aren't present in other libraries.
+    /// If true, omit f16 and f128 functions that may not be present in libraries we test
+    /// against (e.g. musl).
     pub skip_f16_f128: bool,
-    /// Invoke only for these functions
+    /// If true, omit functions that are defined in `compiler-builtins` and are not present
+    /// in libraries we test against (e.g. musl).
+    pub skip_builtins: bool,
+    /// Invoke only for the functions listed here.
     pub only: Option<Vec<Ident>>,
     /// Attributes that get applied to specific functions
     pub attributes: Option<Vec<AttributeMap>>,
@@ -73,6 +77,7 @@ impl StructuredInput {
         let emit_types_expr = expect_field(&mut map, "emit_types").ok();
         let skip_expr = expect_field(&mut map, "skip").ok();
         let skip_f16_f128 = expect_field(&mut map, "skip_f16_f128").ok();
+        let skip_builtins = expect_field(&mut map, "skip_builtins").ok();
         let only_expr = expect_field(&mut map, "only").ok();
         let attr_expr = expect_field(&mut map, "attributes").ok();
         let extra = expect_field(&mut map, "extra").ok();
@@ -101,6 +106,11 @@ impl StructuredInput {
             None => false,
         };
 
+        let skip_builtins = match skip_builtins {
+            Some(expr) => expect_litbool(expr)?.value,
+            None => false,
+        };
+
         let only_span = only_expr.as_ref().map(|expr| expr.span());
         let only = match only_expr {
             Some(expr) => Some(Parser::parse2(parse_ident_array, expr.into_token_stream())?),
@@ -131,6 +141,7 @@ impl StructuredInput {
             emit_types,
             skip,
             skip_f16_f128,
+            skip_builtins,
             only,
             only_span,
             attributes,
diff --git a/library/compiler-builtins/crates/libm-macros/src/shared.rs b/library/compiler-builtins/crates/libm-macros/src/shared.rs
deleted file mode 100644
index ee1feed7c35eb..0000000000000
--- a/library/compiler-builtins/crates/libm-macros/src/shared.rs
+++ /dev/null
@@ -1,640 +0,0 @@
-/* List of all functions that is shared between `libm-macros` and `libm-test`. */
-
-use std::fmt;
-use std::sync::LazyLock;
-
-struct NestedOp {
-    float_ty: FloatTy,
-    rust_sig: Signature,
-    c_sig: Option<Signature>,
-    fn_list: &'static [&'static str],
-    public: bool,
-}
-
-/// We need a flat list to work with most of the time, but define things as a more convenient
-/// nested list.
-const ALL_OPERATIONS_NESTED: &[NestedOp] = &[
-    NestedOp {
-        // `fn(f16) -> f16`
-        float_ty: FloatTy::F16,
-        rust_sig: Signature {
-            args: &[Ty::F16],
-            returns: &[Ty::F16],
-        },
-        c_sig: None,
-        fn_list: &[
-            "ceilf16",
-            "fabsf16",
-            "floorf16",
-            "rintf16",
-            "roundevenf16",
-            "roundf16",
-            "sqrtf16",
-            "truncf16",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `fn(f32) -> f32`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32],
-            returns: &[Ty::F32],
-        },
-        c_sig: None,
-        fn_list: &[
-            "acosf",
-            "acoshf",
-            "asinf",
-            "asinhf",
-            "atanf",
-            "atanhf",
-            "cbrtf",
-            "ceilf",
-            "cosf",
-            "coshf",
-            "erfcf",
-            "erff",
-            "exp10f",
-            "exp2f",
-            "expf",
-            "expm1f",
-            "fabsf",
-            "floorf",
-            "j0f",
-            "j1f",
-            "lgammaf",
-            "log10f",
-            "log1pf",
-            "log2f",
-            "logf",
-            "rintf",
-            "roundevenf",
-            "roundf",
-            "sinf",
-            "sinhf",
-            "sqrtf",
-            "tanf",
-            "tanhf",
-            "tgammaf",
-            "truncf",
-            "y0f",
-            "y1f",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `(f64) -> f64`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64],
-            returns: &[Ty::F64],
-        },
-        c_sig: None,
-        fn_list: &[
-            "acos",
-            "acosh",
-            "asin",
-            "asinh",
-            "atan",
-            "atanh",
-            "cbrt",
-            "ceil",
-            "cos",
-            "cosh",
-            "erf",
-            "erfc",
-            "exp",
-            "exp10",
-            "exp2",
-            "expm1",
-            "fabs",
-            "floor",
-            "j0",
-            "j1",
-            "lgamma",
-            "log",
-            "log10",
-            "log1p",
-            "log2",
-            "rint",
-            "round",
-            "roundeven",
-            "sin",
-            "sinh",
-            "sqrt",
-            "tan",
-            "tanh",
-            "tgamma",
-            "trunc",
-            "y0",
-            "y1",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `fn(f128) -> f128`
-        float_ty: FloatTy::F128,
-        rust_sig: Signature {
-            args: &[Ty::F128],
-            returns: &[Ty::F128],
-        },
-        c_sig: None,
-        fn_list: &[
-            "ceilf128",
-            "fabsf128",
-            "floorf128",
-            "rintf128",
-            "roundevenf128",
-            "roundf128",
-            "sqrtf128",
-            "truncf128",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `(f16, f16) -> f16`
-        float_ty: FloatTy::F16,
-        rust_sig: Signature {
-            args: &[Ty::F16, Ty::F16],
-            returns: &[Ty::F16],
-        },
-        c_sig: None,
-        fn_list: &[
-            "copysignf16",
-            "fdimf16",
-            "fmaxf16",
-            "fmaximum_numf16",
-            "fmaximumf16",
-            "fminf16",
-            "fminimum_numf16",
-            "fminimumf16",
-            "fmodf16",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, f32) -> f32`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32, Ty::F32],
-            returns: &[Ty::F32],
-        },
-        c_sig: None,
-        fn_list: &[
-            "atan2f",
-            "copysignf",
-            "fdimf",
-            "fmaxf",
-            "fmaximum_numf",
-            "fmaximumf",
-            "fminf",
-            "fminimum_numf",
-            "fminimumf",
-            "fmodf",
-            "hypotf",
-            "nextafterf",
-            "powf",
-            "remainderf",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, f64) -> f64`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64, Ty::F64],
-            returns: &[Ty::F64],
-        },
-        c_sig: None,
-        fn_list: &[
-            "atan2",
-            "copysign",
-            "fdim",
-            "fmax",
-            "fmaximum",
-            "fmaximum_num",
-            "fmin",
-            "fminimum",
-            "fminimum_num",
-            "fmod",
-            "hypot",
-            "nextafter",
-            "pow",
-            "remainder",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `(f128, f128) -> f128`
-        float_ty: FloatTy::F128,
-        rust_sig: Signature {
-            args: &[Ty::F128, Ty::F128],
-            returns: &[Ty::F128],
-        },
-        c_sig: None,
-        fn_list: &[
-            "copysignf128",
-            "fdimf128",
-            "fmaxf128",
-            "fmaximum_numf128",
-            "fmaximumf128",
-            "fminf128",
-            "fminimum_numf128",
-            "fminimumf128",
-            "fmodf128",
-        ],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, f32, f32) -> f32`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32, Ty::F32, Ty::F32],
-            returns: &[Ty::F32],
-        },
-        c_sig: None,
-        fn_list: &["fmaf"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, f64, f64) -> f64`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64, Ty::F64, Ty::F64],
-            returns: &[Ty::F64],
-        },
-        c_sig: None,
-        fn_list: &["fma"],
-        public: true,
-    },
-    NestedOp {
-        // `(f128, f128, f128) -> f128`
-        float_ty: FloatTy::F128,
-        rust_sig: Signature {
-            args: &[Ty::F128, Ty::F128, Ty::F128],
-            returns: &[Ty::F128],
-        },
-        c_sig: None,
-        fn_list: &["fmaf128"],
-        public: true,
-    },
-    NestedOp {
-        // `(f16) -> i32`
-        float_ty: FloatTy::F16,
-        rust_sig: Signature {
-            args: &[Ty::F16],
-            returns: &[Ty::I32],
-        },
-        c_sig: None,
-        fn_list: &["ilogbf16"],
-        public: true,
-    },
-    NestedOp {
-        // `(f32) -> i32`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32],
-            returns: &[Ty::I32],
-        },
-        c_sig: None,
-        fn_list: &["ilogbf"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64) -> i32`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64],
-            returns: &[Ty::I32],
-        },
-        c_sig: None,
-        fn_list: &["ilogb"],
-        public: true,
-    },
-    NestedOp {
-        // `(f128) -> i32`
-        float_ty: FloatTy::F128,
-        rust_sig: Signature {
-            args: &[Ty::F128],
-            returns: &[Ty::I32],
-        },
-        c_sig: None,
-        fn_list: &["ilogbf128"],
-        public: true,
-    },
-    NestedOp {
-        // `(i32, f32) -> f32`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::I32, Ty::F32],
-            returns: &[Ty::F32],
-        },
-        c_sig: None,
-        fn_list: &["jnf", "ynf"],
-        public: true,
-    },
-    NestedOp {
-        // `(i32, f64) -> f64`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::I32, Ty::F64],
-            returns: &[Ty::F64],
-        },
-        c_sig: None,
-        fn_list: &["jn", "yn"],
-        public: true,
-    },
-    NestedOp {
-        // `(f16, i32) -> f16`
-        float_ty: FloatTy::F16,
-        rust_sig: Signature {
-            args: &[Ty::F16, Ty::I32],
-            returns: &[Ty::F16],
-        },
-        c_sig: None,
-        fn_list: &["ldexpf16", "scalbnf16"],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, i32) -> f32`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32, Ty::I32],
-            returns: &[Ty::F32],
-        },
-        c_sig: None,
-        fn_list: &["ldexpf", "scalbnf"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, i64) -> f64`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64, Ty::I32],
-            returns: &[Ty::F64],
-        },
-        c_sig: None,
-        fn_list: &["ldexp", "scalbn"],
-        public: true,
-    },
-    NestedOp {
-        // `(f128, i32) -> f128`
-        float_ty: FloatTy::F128,
-        rust_sig: Signature {
-            args: &[Ty::F128, Ty::I32],
-            returns: &[Ty::F128],
-        },
-        c_sig: None,
-        fn_list: &["ldexpf128", "scalbnf128"],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, &mut f32) -> f32` as `(f32) -> (f32, f32)`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32],
-            returns: &[Ty::F32, Ty::F32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F32, Ty::MutF32],
-            returns: &[Ty::F32],
-        }),
-        fn_list: &["modff"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, &mut f64) -> f64` as  `(f64) -> (f64, f64)`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64],
-            returns: &[Ty::F64, Ty::F64],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F64, Ty::MutF64],
-            returns: &[Ty::F64],
-        }),
-        fn_list: &["modf"],
-        public: true,
-    },
-    NestedOp {
-        // `(f16, &mut c_int) -> f16` as `(f16) -> (f16, i32)`
-        float_ty: FloatTy::F16,
-        rust_sig: Signature {
-            args: &[Ty::F16],
-            returns: &[Ty::F16, Ty::I32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F16, Ty::MutCInt],
-            returns: &[Ty::F16],
-        }),
-        fn_list: &["frexpf16"],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, &mut c_int) -> f32` as `(f32) -> (f32, i32)`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32],
-            returns: &[Ty::F32, Ty::I32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F32, Ty::MutCInt],
-            returns: &[Ty::F32],
-        }),
-        fn_list: &["frexpf", "lgammaf_r"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, &mut c_int) -> f64` as `(f64) -> (f64, i32)`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64],
-            returns: &[Ty::F64, Ty::I32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F64, Ty::MutCInt],
-            returns: &[Ty::F64],
-        }),
-        fn_list: &["frexp", "lgamma_r"],
-        public: true,
-    },
-    NestedOp {
-        // `(f128, &mut c_int) -> f128` as `(f128) -> (f128, i32)`
-        float_ty: FloatTy::F128,
-        rust_sig: Signature {
-            args: &[Ty::F128],
-            returns: &[Ty::F128, Ty::I32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F128, Ty::MutCInt],
-            returns: &[Ty::F128],
-        }),
-        fn_list: &["frexpf128"],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, f32, &mut c_int) -> f32` as `(f32, f32) -> (f32, i32)`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32, Ty::F32],
-            returns: &[Ty::F32, Ty::I32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F32, Ty::F32, Ty::MutCInt],
-            returns: &[Ty::F32],
-        }),
-        fn_list: &["remquof"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, f64, &mut c_int) -> f64` as `(f64, f64) -> (f64, i32)`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64, Ty::F64],
-            returns: &[Ty::F64, Ty::I32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F64, Ty::F64, Ty::MutCInt],
-            returns: &[Ty::F64],
-        }),
-        fn_list: &["remquo"],
-        public: true,
-    },
-    NestedOp {
-        // `(f32, &mut f32, &mut f32)` as `(f32) -> (f32, f32)`
-        float_ty: FloatTy::F32,
-        rust_sig: Signature {
-            args: &[Ty::F32],
-            returns: &[Ty::F32, Ty::F32],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F32, Ty::MutF32, Ty::MutF32],
-            returns: &[],
-        }),
-        fn_list: &["sincosf"],
-        public: true,
-    },
-    NestedOp {
-        // `(f64, &mut f64, &mut f64)` as `(f64) -> (f64, f64)`
-        float_ty: FloatTy::F64,
-        rust_sig: Signature {
-            args: &[Ty::F64],
-            returns: &[Ty::F64, Ty::F64],
-        },
-        c_sig: Some(Signature {
-            args: &[Ty::F64, Ty::MutF64, Ty::MutF64],
-            returns: &[],
-        }),
-        fn_list: &["sincos"],
-        public: true,
-    },
-];
-
-/// A type used in a function signature.
-#[allow(dead_code)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum Ty {
-    F16,
-    F32,
-    F64,
-    F128,
-    I32,
-    CInt,
-    MutF16,
-    MutF32,
-    MutF64,
-    MutF128,
-    MutI32,
-    MutCInt,
-}
-
-/// A subset of [`Ty`] representing only floats.
-#[allow(dead_code)]
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
-pub enum FloatTy {
-    F16,
-    F32,
-    F64,
-    F128,
-}
-
-impl fmt::Display for Ty {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let s = match self {
-            Ty::F16 => "f16",
-            Ty::F32 => "f32",
-            Ty::F64 => "f64",
-            Ty::F128 => "f128",
-            Ty::I32 => "i32",
-            Ty::CInt => "::core::ffi::c_int",
-            Ty::MutF16 => "&mut f16",
-            Ty::MutF32 => "&mut f32",
-            Ty::MutF64 => "&mut f64",
-            Ty::MutF128 => "&mut f128",
-            Ty::MutI32 => "&mut i32",
-            Ty::MutCInt => "&mut ::core::ffi::c_int",
-        };
-        f.write_str(s)
-    }
-}
-
-impl fmt::Display for FloatTy {
-    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        let s = match self {
-            FloatTy::F16 => "f16",
-            FloatTy::F32 => "f32",
-            FloatTy::F64 => "f64",
-            FloatTy::F128 => "f128",
-        };
-        f.write_str(s)
-    }
-}
-
-/// Representation of e.g. `(f32, f32) -> f32`
-#[derive(Debug, Clone)]
-pub struct Signature {
-    pub args: &'static [Ty],
-    pub returns: &'static [Ty],
-}
-
-/// Combined information about a function implementation.
-#[derive(Debug, Clone)]
-pub struct MathOpInfo {
-    pub name: &'static str,
-    pub float_ty: FloatTy,
-    /// Function signature for C implementations
-    pub c_sig: Signature,
-    /// Function signature for Rust implementations
-    pub rust_sig: Signature,
-    /// True if part of libm's public API
-    pub public: bool,
-}
-
-/// A flat representation of `ALL_FUNCTIONS`.
-pub static ALL_OPERATIONS: LazyLock<Vec<MathOpInfo>> = LazyLock::new(|| {
-    let mut ret = Vec::new();
-
-    for op in ALL_OPERATIONS_NESTED {
-        let fn_names = op.fn_list;
-        for name in fn_names {
-            let api = MathOpInfo {
-                name,
-                float_ty: op.float_ty,
-                rust_sig: op.rust_sig.clone(),
-                c_sig: op.c_sig.clone().unwrap_or_else(|| op.rust_sig.clone()),
-                public: op.public,
-            };
-            ret.push(api);
-        }
-
-        if !fn_names.is_sorted() {
-            let mut sorted = (*fn_names).to_owned();
-            sorted.sort_unstable();
-            panic!("names list is not sorted: {fn_names:?}\nExpected: {sorted:?}");
-        }
-    }
-
-    ret.sort_by_key(|item| item.name);
-    ret
-});
diff --git a/library/compiler-builtins/crates/libm-macros/tests/basic.rs b/library/compiler-builtins/crates/libm-macros/tests/basic.rs
index 1876db8f5869d..668127c298e47 100644
--- a/library/compiler-builtins/crates/libm-macros/tests/basic.rs
+++ b/library/compiler-builtins/crates/libm-macros/tests/basic.rs
@@ -7,14 +7,13 @@
 macro_rules! basic {
     (
         fn_name: $fn_name:ident,
-        FTy: $FTy:ty,
         CFn: $CFn:ty,
         CArgs: $CArgs:ty,
         CRet: $CRet:ty,
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
-        public: $public:expr,
+        path: $path:path,
         attrs: [$($attr:meta),*],
         extra: [$($extra_tt:tt)*],
         fn_extra: $fn_extra:expr,
@@ -22,12 +21,11 @@ macro_rules! basic {
         $(#[$attr])*
         #[allow(dead_code)]
         pub mod $fn_name {
-            type FTy= $FTy;
             type CFnTy<'a> = $CFn;
             type RustFnTy = $RustFn;
             type RustArgsTy = $RustArgs;
             type RustRetTy = $RustRet;
-            const PUBLIC: bool = $public;
+            const PATH: &str = stringify!($path);
             const A: &[&str] = &[$($extra_tt)*];
             fn foo(a: f32) -> f32 {
                 $fn_extra(a)
@@ -147,6 +145,7 @@ fn test_fn_extra_expansion() {
     let mut vf32 = Vec::new();
     let mut vf64 = Vec::new();
     let mut vf128 = Vec::new();
+    let mut vbuiltins = Vec::new();
 
     // Test with no extra, no skip, and no attributes
     libm_macros::for_each_function! {
@@ -156,6 +155,7 @@ fn test_fn_extra_expansion() {
             ALL_F32 => vf32,
             ALL_F64 => vf64,
             ALL_F128 => vf128,
+            ALL_BUILTINS => vbuiltins,
         }
     }
 
@@ -171,8 +171,11 @@ fn test_fn_extra_expansion() {
     for name in vf32 {
         assert!(name.ends_with("f"), "{name}");
     }
-    let _ = vf64;
     for name in vf128 {
         assert!(name.ends_with("f128"), "{name}");
     }
+
+    // Nothing to assert here
+    let _ = vf64;
+    let _ = vbuiltins;
 }
diff --git a/library/compiler-builtins/crates/libm-macros/tests/enum.rs b/library/compiler-builtins/crates/libm-macros/tests/enum.rs
index 93e209a0dcc90..f012e0a9ef0ad 100644
--- a/library/compiler-builtins/crates/libm-macros/tests/enum.rs
+++ b/library/compiler-builtins/crates/libm-macros/tests/enum.rs
@@ -23,16 +23,3 @@ fn basename() {
     assert_eq!(Identifier::Sin.base_name(), BaseName::Sin);
     assert_eq!(Identifier::Sinf.base_name(), BaseName::Sin);
 }
-
-#[test]
-fn math_op() {
-    assert_eq!(Identifier::Sin.math_op().float_ty, FloatTy::F64);
-    assert_eq!(Identifier::Sinf.math_op().float_ty, FloatTy::F32);
-}
-
-// Replicate the structure that we have in `libm-test`
-mod op {
-    include!("../../libm-macros/src/shared.rs");
-}
-
-use op::FloatTy;
diff --git a/library/compiler-builtins/crates/update-api-list/Cargo.toml b/library/compiler-builtins/crates/update-api-list/Cargo.toml
new file mode 100644
index 0000000000000..7a14a0e9770f5
--- /dev/null
+++ b/library/compiler-builtins/crates/update-api-list/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "update-api-list"
+version = "0.1.0"
+edition = "2024"
+publish = false
+license = "MIT OR Apache-2.0"
+
+[dependencies]
+api-list-common.workspace = true
+getopts.workspace = true
+glob.workspace = true
+pretty_assertions.workspace = true
+regex.workspace = true
diff --git a/library/compiler-builtins/crates/update-api-list/src/lib.rs b/library/compiler-builtins/crates/update-api-list/src/lib.rs
new file mode 100644
index 0000000000000..a07ae3411ee37
--- /dev/null
+++ b/library/compiler-builtins/crates/update-api-list/src/lib.rs
@@ -0,0 +1,11 @@
+use std::path::{Path, PathBuf};
+use std::sync::LazyLock;
+
+pub static WORKSPACE_ROOT: LazyLock<PathBuf> = LazyLock::new(|| {
+    Path::new(env!("CARGO_MANIFEST_DIR"))
+        .parent()
+        .unwrap()
+        .parent()
+        .unwrap()
+        .to_owned()
+});
diff --git a/library/compiler-builtins/crates/update-api-list/tests/all.rs b/library/compiler-builtins/crates/update-api-list/tests/all.rs
new file mode 100644
index 0000000000000..814c7cc3fa0ca
--- /dev/null
+++ b/library/compiler-builtins/crates/update-api-list/tests/all.rs
@@ -0,0 +1,128 @@
+use std::fs;
+use std::path::Path;
+use std::process::Command;
+use std::sync::LazyLock;
+
+use pretty_assertions::assert_str_eq;
+use regex::Regex;
+use update_api_list::WORKSPACE_ROOT;
+
+static PUBLIC_FUNCTIONS: LazyLock<Vec<String>> = LazyLock::new(|| {
+    fs::read_to_string(WORKSPACE_ROOT.join("etc/function-list.txt"))
+        .unwrap()
+        .lines()
+        .map(|line| line.trim())
+        .filter(|line| !(line.starts_with("#") || line.is_empty()))
+        .map(|line| line.to_owned())
+        .collect()
+});
+
+/// In each file, check annotations indicating that blocks of code should be sorted or should
+/// include an exhaustive list of all public API.
+#[test]
+fn tidy_lists() {
+    let out = Command::new("git")
+        .arg("ls-files")
+        .current_dir(&*WORKSPACE_ROOT)
+        .output()
+        .unwrap();
+    assert!(out.status.success());
+
+    let file_list = str::from_utf8(&out.stdout).unwrap();
+
+    for path in file_list.lines() {
+        let relpath = Path::new(path);
+        let abspath = WORKSPACE_ROOT.join(relpath);
+        if abspath.is_dir() || relpath == file!() {
+            continue;
+        }
+
+        let src = fs::read_to_string(&abspath).unwrap();
+        let lines: Vec<_> = src.lines().collect();
+
+        validate_delimited_block(
+            relpath,
+            &lines,
+            "verify-sorted-start",
+            "verify-sorted-end",
+            ensure_sorted,
+        );
+
+        validate_delimited_block(
+            relpath,
+            &lines,
+            "verify-apilist-start",
+            "verify-apilist-end",
+            ensure_contains_api,
+        );
+    }
+}
+
+/// Identify blocks of code wrapped within `start` and `end`, collect their contents to a list of
+/// strings, and call `validate` for each of those lists.
+fn validate_delimited_block(
+    relpath: &Path,
+    lines: &[&str],
+    start: &str,
+    end: &str,
+    validate: impl Fn(&Path, usize, &[&str]),
+) {
+    let mut block_lines = Vec::new();
+    let mut block_start_line = None;
+    for (mut line_num, line) in lines.iter().enumerate() {
+        line_num += 1;
+
+        if line.contains(start) {
+            block_start_line = Some(line_num);
+            continue;
+        }
+
+        // End of a block, validate its contents
+        if line.contains(end) {
+            let Some(start_line) = block_start_line else {
+                panic!("`{end}` without `{start}` at {relpath:?}:{line_num}");
+            };
+
+            validate(relpath, start_line, &block_lines);
+            block_lines.clear();
+            block_start_line = None;
+            continue;
+        }
+
+        if block_start_line.is_some() {
+            block_lines.push(*line);
+        }
+    }
+
+    if let Some(start_line) = block_start_line {
+        panic!("`{start}` without `{end}` at {relpath:?}:{start_line}");
+    }
+}
+
+/// Given a list of strings, ensure that each public function we have is named somewhere.
+fn ensure_contains_api(relpath: &Path, block_start_line: usize, lines: &[&str]) {
+    let mut not_found = Vec::new();
+
+    for func in &*PUBLIC_FUNCTIONS {
+        // The function name may be on its own or somewhere in a snake case string.
+        let re = Regex::new(&format!(r"(\b|_){func}(\b|_)")).unwrap();
+        if !lines.iter().any(|line| re.is_match(line)) {
+            not_found.push(func);
+        }
+    }
+
+    if not_found.is_empty() {
+        return;
+    }
+
+    panic!("functions not found at {relpath:?}:{block_start_line}: {not_found:?}");
+}
+
+fn ensure_sorted(relpath: &Path, block_start_line: usize, lines: &[&str]) {
+    let mut sorted = lines.to_owned();
+    sorted.sort_unstable();
+    let a = lines.join("\n");
+    let b = sorted.join("\n");
+
+    assert_str_eq!(a, b, "sorted block at {relpath:?}:{block_start_line}");
+}
diff --git a/library/compiler-builtins/crates/util/build.rs b/library/compiler-builtins/crates/util/build.rs
index a1be4127527ae..f7ed72addee10 100644
--- a/library/compiler-builtins/crates/util/build.rs
+++ b/library/compiler-builtins/crates/util/build.rs
@@ -1,10 +1,10 @@
-#![allow(unexpected_cfgs)]
-
 #[path = "../../libm/configure.rs"]
 mod configure;
 
+use configure::{Config, Library};
+
 fn main() {
     println!("cargo:rerun-if-changed=../../libm/configure.rs");
-    let cfg = configure::Config::from_env();
-    configure::emit_libm_config(&cfg);
+    let cfg = Config::from_env(Library::Util);
+    configure::emit(&cfg);
 }
diff --git a/library/compiler-builtins/crates/util/src/main.rs b/library/compiler-builtins/crates/util/src/main.rs
index 70aa613f18d06..ac2c126d8c0d1 100644
--- a/library/compiler-builtins/crates/util/src/main.rs
+++ b/library/compiler-builtins/crates/util/src/main.rs
@@ -9,10 +9,10 @@ use std::num::ParseIntError;
 use std::str::FromStr;
 
 use cfg_if::cfg_if;
-use libm::support::{Float, Hexf, hf32, hf64};
+use libm::support::{Float, Hex, hf32, hf64};
 #[cfg(feature = "build-mpfr")]
 use libm_test::mpfloat::MpOp;
-use libm_test::{Hex, MathOp, TupleCall};
+use libm_test::{MathOp, TupleCall, builtins_wrapper};
 #[cfg(feature = "build-mpfr")]
 use rug::az::{self, Az};
 
@@ -37,7 +37,14 @@ SUBCOMMAND:
 
 fn main() {
     let args = env::args().collect::<Vec<_>>();
-    let str_args = args.iter().map(|s| s.as_str()).collect::<Vec<_>>();
+    let str_args = args
+        .iter()
+        .map(|s| {
+            // Allow pasting from comma-separated arguments
+            let s = s.as_str();
+            s.strip_suffix(",").unwrap_or(s)
+        })
+        .collect::<Vec<_>>();
 
     match &str_args.as_slice()[1..] {
         ["eval" | "x", basis, op, inputs @ ..] => do_eval(basis, op, inputs),
@@ -55,6 +62,7 @@ macro_rules! handle_call {
         CFn: $CFn:ty,
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
+        path: $path:path,
         attrs: [$($attr:meta),*],
         extra: ($basis:ident, $op:ident, $inputs:ident),
         fn_extra: $musl_fn:expr,
@@ -64,7 +72,7 @@ macro_rules! handle_call {
             type Op = libm_test::op::$fn_name::Routine;
 
             let input = <$RustArgs>::parse($inputs);
-            let libm_fn: <Op as MathOp>::RustFn = libm::$fn_name;
+            let libm_fn: <Op as MathOp>::RustFn = $path;
 
             let output = match $basis {
                 "libm" => input.call_intercept_panics(libm_fn),
@@ -81,7 +89,7 @@ macro_rules! handle_call {
                 }
                 _ => panic!("unrecognized or disabled basis '{}'", $basis),
             };
-            println!("{output:?} {:x}", Hexf(output));
+            println!("{output:?} {:x}", Hex(output));
             return;
         }
     };
@@ -91,7 +99,7 @@ macro_rules! handle_call {
 fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
     libm_macros::for_each_function! {
         callback: handle_call,
-        emit_types: [CFn, RustFn, RustArgs],
+        emit_types: [CFn, RustFn, RustArgs, path],
         extra: (basis, op, inputs),
         fn_extra: match MACRO_FN_NAME {
             // Not provided by musl
@@ -106,7 +114,8 @@ fn do_eval(basis: &str, op: &str, inputs: &[&str]) {
             | roundeven
             | roundevenf
             | ALL_F16
-            | ALL_F128 => None,
+            | ALL_F128
+            | ALL_BUILTINS => None,
             _ => Some(musl_math_sys::MACRO_FN_NAME)
         }
     }
@@ -160,18 +169,17 @@ fn do_classify(inputs: &[&str]) {
 fn classify_print<F>(x: F)
 where
     F: Float,
-    F::Int: Hex,
 {
     println!("{x:?}");
-    println!("    hex:  {}", Hexf(x));
-    println!("    bits: {}", x.to_bits().hex());
+    println!("    hex:  {}", Hex(x));
+    println!("    bits: {}", Hex(x.to_bits()));
     println!("    nan:  {}", x.is_nan());
     println!("    inf:  {}", x.is_infinite());
     println!("    normal: {}", !x.is_subnormal());
     println!("    pos:  {}", x.is_sign_positive());
-    println!("    exp:  {} {}", x.ex(), x.ex().hex());
+    println!("    exp:  {} {}", x.ex(), Hex(x.ex()));
     println!("    exp unbiased: {}", x.exp_unbiased());
-    println!("    frac: {} {}", x.frac(), x.frac().hex());
+    println!("    frac: {} {}", x.frac(), Hex(x.frac()));
 }
 
 /// Parse a tuple from a space-delimited string.
@@ -308,6 +316,48 @@ impl_parse_tuple!(f64);
 #[cfg(f128_enabled)]
 impl_parse_tuple_via_rug!(f128);
 
+macro_rules! impl_parse_tuple_int {
+    (@skip_u32 $ty:ty) => {
+        impl ParseTuple for ($ty,) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 1, "expected a single argument, got {input:?}");
+                (parse(input, 0),)
+            }
+        }
+
+        impl ParseTuple for ($ty, $ty) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+
+        impl FromStrRadix for $ty {
+            fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
+                let s = strip_radix_prefix(s, radix);
+                <$ty>::from_str_radix(s, radix)
+            }
+        }
+    };
+    ($ty:ty) => {
+        impl_parse_tuple_int!(@skip_u32 $ty);
+
+        impl ParseTuple for ($ty, u32) {
+            fn parse(input: &[&str]) -> Self {
+                assert_eq!(input.len(), 2, "expected two arguments, got {input:?}");
+                (parse(input, 0), parse(input, 1))
+            }
+        }
+    };
+}
+
+impl_parse_tuple_int!(i32);
+impl_parse_tuple_int!(i64);
+impl_parse_tuple_int!(i128);
+impl_parse_tuple_int!(@skip_u32 u32);
+impl_parse_tuple_int!(u64);
+impl_parse_tuple_int!(u128);
+
 /// Try to parse the number, printing a nice message on failure.
 fn parse<T: FromStr + FromStrRadix>(input: &[&str], idx: usize) -> T {
     let s = input[idx];
@@ -354,13 +404,6 @@ trait FromStrRadix: Sized {
     fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError>;
 }
 
-impl FromStrRadix for i32 {
-    fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
-        let s = strip_radix_prefix(s, radix);
-        i32::from_str_radix(s, radix)
-    }
-}
-
 #[cfg(f16_enabled)]
 impl FromStrRadix for f16 {
     fn from_str_radix(s: &str, radix: u32) -> Result<Self, ParseIntError> {
diff --git a/library/compiler-builtins/etc/function-definitions.json b/library/compiler-builtins/etc/function-definitions.json
index 6bd395a84b66f..38d609da3fcfa 100644
--- a/library/compiler-builtins/etc/function-definitions.json
+++ b/library/compiler-builtins/etc/function-definitions.json
@@ -98,8 +98,8 @@
     },
     "ceil": {
         "sources": [
-            "libm/src/math/arch/i586.rs",
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/i586/rounding.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/ceil.rs",
             "libm/src/math/generic/ceil.rs"
         ],
@@ -107,7 +107,7 @@
     },
     "ceilf": {
         "sources": [
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/ceil.rs",
             "libm/src/math/generic/ceil.rs"
         ],
@@ -253,7 +253,7 @@
     },
     "fabs": {
         "sources": [
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/wasm32/fabs.rs",
             "libm/src/math/fabs.rs",
             "libm/src/math/generic/fabs.rs"
         ],
@@ -261,7 +261,7 @@
     },
     "fabsf": {
         "sources": [
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/wasm32/fabs.rs",
             "libm/src/math/fabs.rs",
             "libm/src/math/generic/fabs.rs"
         ],
@@ -311,8 +311,8 @@
     },
     "floor": {
         "sources": [
-            "libm/src/math/arch/i586.rs",
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/i586/rounding.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/floor.rs",
             "libm/src/math/generic/floor.rs"
         ],
@@ -320,7 +320,7 @@
     },
     "floorf": {
         "sources": [
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/floor.rs",
             "libm/src/math/generic/floor.rs"
         ],
@@ -342,23 +342,26 @@
     },
     "fma": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/aarch64/fma.rs",
             "libm/src/math/arch/x86/fma.rs",
-            "libm/src/math/fma.rs"
+            "libm/src/math/fma.rs",
+            "libm/src/math/generic/fma.rs"
         ],
         "type": "f64"
     },
     "fmaf": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/aarch64/fma.rs",
             "libm/src/math/arch/x86/fma.rs",
-            "libm/src/math/fma.rs"
+            "libm/src/math/fma.rs",
+            "libm/src/math/generic/fma.rs"
         ],
         "type": "f32"
     },
     "fmaf128": {
         "sources": [
-            "libm/src/math/fma.rs"
+            "libm/src/math/fma.rs",
+            "libm/src/math/generic/fma.rs"
         ],
         "type": "f128"
     },
@@ -820,29 +823,34 @@
     },
     "rint": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/aarch64/rounding.rs",
+            "libm/src/math/arch/i586/rounding.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
+            "libm/src/math/generic/rint.rs",
             "libm/src/math/rint.rs"
         ],
         "type": "f64"
     },
     "rintf": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/aarch64/rounding.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
+            "libm/src/math/generic/rint.rs",
             "libm/src/math/rint.rs"
         ],
         "type": "f32"
     },
     "rintf128": {
         "sources": [
+            "libm/src/math/generic/rint.rs",
             "libm/src/math/rint.rs"
         ],
         "type": "f128"
     },
     "rintf16": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/aarch64/rounding.rs",
+            "libm/src/math/generic/rint.rs",
             "libm/src/math/rint.rs"
         ],
         "type": "f16"
@@ -965,9 +973,9 @@
     },
     "sqrt": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/arch/wasm32.rs",
-            "libm/src/math/arch/x86.rs",
+            "libm/src/math/arch/aarch64/sqrt.rs",
+            "libm/src/math/arch/wasm32/sqrt.rs",
+            "libm/src/math/arch/x86/sqrt.rs",
             "libm/src/math/generic/sqrt.rs",
             "libm/src/math/sqrt.rs"
         ],
@@ -975,9 +983,9 @@
     },
     "sqrtf": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
-            "libm/src/math/arch/wasm32.rs",
-            "libm/src/math/arch/x86.rs",
+            "libm/src/math/arch/aarch64/sqrt.rs",
+            "libm/src/math/arch/wasm32/sqrt.rs",
+            "libm/src/math/arch/x86/sqrt.rs",
             "libm/src/math/generic/sqrt.rs",
             "libm/src/math/sqrt.rs"
         ],
@@ -992,7 +1000,7 @@
     },
     "sqrtf16": {
         "sources": [
-            "libm/src/math/arch/aarch64.rs",
+            "libm/src/math/arch/aarch64/sqrt.rs",
             "libm/src/math/generic/sqrt.rs",
             "libm/src/math/sqrt.rs"
         ],
@@ -1036,7 +1044,7 @@
     },
     "trunc": {
         "sources": [
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/generic/trunc.rs",
             "libm/src/math/trunc.rs"
         ],
@@ -1044,7 +1052,7 @@
     },
     "truncf": {
         "sources": [
-            "libm/src/math/arch/wasm32.rs",
+            "libm/src/math/arch/wasm32/rounding.rs",
             "libm/src/math/generic/trunc.rs",
             "libm/src/math/trunc.rs"
         ],
diff --git a/library/compiler-builtins/etc/update-api-list.py b/library/compiler-builtins/etc/update-api-list.py
index 76c75cbf4dccb..2d8ad2903646d 100755
--- a/library/compiler-builtins/etc/update-api-list.py
+++ b/library/compiler-builtins/etc/update-api-list.py
@@ -6,6 +6,8 @@
 needed, or that lists are sorted.
 """
 
+# FIXME: this needs to be updated to work with compiler-builtins sources
+
 import difflib
 import json
 import re
@@ -14,7 +16,7 @@
 from dataclasses import dataclass
 from glob import glob
 from pathlib import Path
-from typing import Any, Callable, TypeAlias
+from typing import Any, TypeAlias
 
 SELF_PATH = Path(__file__)
 ETC_DIR = SELF_PATH.parent
@@ -116,10 +118,20 @@ def _init_function_list(self, index: IndexTy) -> None:
 
     def _init_defs(self, index: IndexTy) -> None:
         defs = {name: set() for name in self.public_functions}
-        funcs = (i for i in index.values() if "function" in i["inner"])
-        funcs = (f for f in funcs if f["name"] in self.public_functions)
-        for func in funcs:
-            defs[func["name"]].add(func["span"]["filename"])
+        all_funcs = (i for i in index.values() if "function" in i["inner"])
+
+        for func_def in all_funcs:
+            func_def_name = func_def["name"]
+            for pub_func_name in self.public_functions:
+                needles = [
+                    pub_func_name,
+                    f"{pub_func_name}_round",
+                    f"{pub_func_name}_status",
+                ]
+                if not any(needle == func_def_name for needle in needles):
+                    continue
+
+                defs[pub_func_name].add(func_def["span"]["filename"])
 
         # A lot of the `arch` module is often configured out so doesn't show up in docs. Use
         # string matching as a fallback.
@@ -134,6 +146,8 @@ def _init_defs(self, index: IndexTy) -> None:
 
         for name, sources in defs.items():
             base_sources = defs[base_name(name)[0]]
+
+            # Also add any functions in `generic` that use this function's base name
             for src in (s for s in base_sources if "generic" in s):
                 sources.add(src)
 
@@ -187,36 +201,6 @@ def write_function_defs(self, check: bool) -> None:
             with open(out_file, "w") as f:
                 f.write(output)
 
-    def tidy_lists(self) -> None:
-        """In each file, check annotations indicating blocks of code should be sorted or should
-        include all public API.
-        """
-
-        flist = sp.check_output(["git", "ls-files"], cwd=ROOT_DIR, text=True)
-
-        for path in flist.splitlines():
-            fpath = ROOT_DIR.joinpath(path)
-            if fpath.is_dir() or fpath == SELF_PATH:
-                continue
-
-            lines = fpath.read_text().splitlines()
-
-            validate_delimited_block(
-                fpath,
-                lines,
-                "verify-sorted-start",
-                "verify-sorted-end",
-                ensure_sorted,
-            )
-
-            validate_delimited_block(
-                fpath,
-                lines,
-                "verify-apilist-start",
-                "verify-apilist-end",
-                lambda p, n, lines: self.ensure_contains_api(p, n, lines),
-            )
-
     def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
         """Given a list of strings, ensure that each public function we have is named
         somewhere.
@@ -238,54 +222,6 @@ def ensure_contains_api(self, fpath: Path, line_num: int, lines: list[str]):
         exit(1)
 
 
-def validate_delimited_block(
-    fpath: Path,
-    lines: list[str],
-    start: str,
-    end: str,
-    validate: Callable[[Path, int, list[str]], None],
-) -> None:
-    """Identify blocks of code wrapped within `start` and `end`, collect their contents
-    to a list of strings, and call `validate` for each of those lists.
-    """
-    relpath = fpath.relative_to(ROOT_DIR)
-    block_lines = []
-    block_start_line: None | int = None
-    for line_num, line in enumerate(lines):
-        line_num += 1
-
-        if start in line:
-            block_start_line = line_num
-            continue
-
-        if end in line:
-            if block_start_line is None:
-                eprint(f"`{end}` without `{start}` at {relpath}:{line_num}")
-                exit(1)
-
-            validate(fpath, block_start_line, block_lines)
-            block_lines = []
-            block_start_line = None
-            continue
-
-        if block_start_line is not None:
-            block_lines.append(line)
-
-    if block_start_line is not None:
-        eprint(f"`{start}` without `{end}` at {relpath}:{block_start_line}")
-        exit(1)
-
-
-def ensure_sorted(fpath: Path, block_start_line: int, lines: list[str]) -> None:
-    """Ensure that a list of lines is sorted, otherwise print a diff and exit."""
-    relpath = fpath.relative_to(ROOT_DIR)
-    diff_and_exit(
-        "\n".join(lines),
-        "\n".join(sorted(lines)),
-        f"sorted block at {relpath}:{block_start_line}",
-    )
-
-
 def diff_and_exit(actual: str, expected: str, name: str):
     """If the two strings are different, print a diff between them and then exit
     with an error.
@@ -322,11 +258,13 @@ def base_name(name: str) -> tuple[str, str]:
 
     if name.endswith("f"):
         return (name.rstrip("f"), "f32")
-
-    if name.endswith("f16"):
+    elif name.endswith("f16"):
         return (name.rstrip("f16"), "f16")
-
-    if name.endswith("f128"):
+    elif name.endswith("f32"):
+        return (name.rstrip("f32"), "f32")
+    elif name.endswith("f64"):
+        return (name.rstrip("f64"), "f64")
+    elif name.endswith("f128"):
         return (name.rstrip("f128"), "f128")
 
     return (name, "f64")
@@ -340,8 +278,6 @@ def ensure_updated_list(check: bool) -> None:
     crate.write_function_list(check)
     crate.write_function_defs(check)
 
-    crate.tidy_lists()
-
 
 def main():
     """By default overwrite the file. If `--check` is passed, print a diff instead and
diff --git a/library/compiler-builtins/libm-test/Cargo.toml b/library/compiler-builtins/libm-test/Cargo.toml
index 8a8c2b0a2ce01..5014fb0008611 100644
--- a/library/compiler-builtins/libm-test/Cargo.toml
+++ b/library/compiler-builtins/libm-test/Cargo.toml
@@ -7,10 +7,12 @@ license = "MIT OR Apache-2.0"
 
 [dependencies]
 anyhow.workspace = true
+api-list-common.workspace = true
+compiler_builtins = { workspace = true, default-features = false, features = ["unstable-public-internals"] }
 # This is not directly used but is required so we can enable `gmp-mpfr-sys/force-cross`.
 gmp-mpfr-sys = { workspace = true, optional = true }
 indicatif.workspace = true
-libm = { workspace = true, default-features = true, features = ["unstable-public-internals"] }
+libm = { workspace = true, default-features = false, features = ["unstable-public-internals"] }
 libm-macros.workspace = true
 musl-math-sys = { workspace = true, optional = true }
 paste.workspace = true
@@ -34,7 +36,9 @@ rand = { workspace = true, optional = true }
 libtest-mimic.workspace = true
 
 [features]
-default = ["build-mpfr", "unstable-float"]
+# Defaults should match the defaults in compiler-builtins since we have that
+# dependency with `default-features=false`.
+default = ["build-mpfr", "unstable-float", "compiler_builtins/arch"]
 
 # Propagated from libm because this affects which functions we test.
 unstable-float = ["libm/unstable-float", "rug?/nightly-float"]
@@ -72,9 +76,3 @@ required-features = ["walltime"]
 # `z` so these tests get run last.
 name = "z_extensive"
 harness = false
-
-[lints.rust]
-# Values from the chared config.rs used by `libm` but not the test crate
-unexpected_cfgs = { level = "warn", check-cfg = [
-  'cfg(feature, values("arch", "force-soft-floats", "unstable-intrinsics"))',
-] }
diff --git a/library/compiler-builtins/libm-test/benches/icount.rs b/library/compiler-builtins/libm-test/benches/icount.rs
index 617e9fb7ad21f..f67f7b049d30e 100644
--- a/library/compiler-builtins/libm-test/benches/icount.rs
+++ b/library/compiler-builtins/libm-test/benches/icount.rs
@@ -5,7 +5,7 @@
 use std::hint::black_box;
 
 use gungraun::{library_benchmark, library_benchmark_group, main};
-use libm::support::{HInt, Hexf, hf16, hf32, hf64, hf128, u256};
+use libm::support::{HInt, Hex, hf16, hf32, hf64, hf128, i256, u256};
 use libm_test::generate::spaced;
 use libm_test::{CheckBasis, CheckCtx, GeneratorKind, MathOp, OpRustArgs, TupleCall, op};
 
@@ -41,11 +41,6 @@ macro_rules! icount_benches {
                     input.call(f);
                 }
             }
-
-            library_benchmark_group!(
-                name = [< icount_bench_ $fn_name _group  >];
-                benchmarks = [< icount_bench_ $fn_name >]
-            );
         }
     };
 }
@@ -54,6 +49,324 @@ libm_macros::for_each_function! {
     callback: icount_benches,
 }
 
+library_benchmark_group!(
+    name = icount_bench_math_group,
+    benchmarks = [
+        // verify-apilist-start
+        // verify-sorted-start
+        icount_bench_acos,
+        icount_bench_acosf,
+        icount_bench_acosh,
+        icount_bench_acoshf,
+        icount_bench_addf128,
+        icount_bench_addf16,
+        icount_bench_addf32,
+        icount_bench_addf64,
+        icount_bench_ashl_u128,
+        icount_bench_ashl_u32,
+        icount_bench_ashl_u64,
+        icount_bench_ashr_i128,
+        icount_bench_ashr_i32,
+        icount_bench_ashr_i64,
+        icount_bench_asin,
+        icount_bench_asinf,
+        icount_bench_asinh,
+        icount_bench_asinhf,
+        icount_bench_atan,
+        icount_bench_atan2,
+        icount_bench_atan2f,
+        icount_bench_atanf,
+        icount_bench_atanh,
+        icount_bench_atanhf,
+        icount_bench_cbrt,
+        icount_bench_cbrtf,
+        icount_bench_ceil,
+        icount_bench_ceilf,
+        icount_bench_ceilf128,
+        icount_bench_ceilf16,
+        icount_bench_copysign,
+        icount_bench_copysignf,
+        icount_bench_copysignf128,
+        icount_bench_copysignf16,
+        icount_bench_cos,
+        icount_bench_cosf,
+        icount_bench_cosh,
+        icount_bench_coshf,
+        icount_bench_divf128,
+        icount_bench_divf32,
+        icount_bench_divf64,
+        icount_bench_eqf128,
+        icount_bench_eqf16,
+        icount_bench_eqf32,
+        icount_bench_eqf64,
+        icount_bench_erf,
+        icount_bench_erfc,
+        icount_bench_erfcf,
+        icount_bench_erff,
+        icount_bench_exp,
+        icount_bench_exp10,
+        icount_bench_exp10f,
+        icount_bench_exp2,
+        icount_bench_exp2f,
+        icount_bench_expf,
+        icount_bench_expm1,
+        icount_bench_expm1f,
+        icount_bench_extend_f16_f128,
+        icount_bench_extend_f16_f32,
+        icount_bench_extend_f16_f64,
+        icount_bench_extend_f32_f128,
+        icount_bench_extend_f32_f64,
+        icount_bench_extend_f64_f128,
+        icount_bench_fabs,
+        icount_bench_fabsf,
+        icount_bench_fabsf128,
+        icount_bench_fabsf16,
+        icount_bench_fdim,
+        icount_bench_fdimf,
+        icount_bench_fdimf128,
+        icount_bench_fdimf16,
+        icount_bench_floor,
+        icount_bench_floorf,
+        icount_bench_floorf128,
+        icount_bench_floorf16,
+        icount_bench_fma,
+        icount_bench_fmaf,
+        icount_bench_fmaf128,
+        icount_bench_fmax,
+        icount_bench_fmaxf,
+        icount_bench_fmaxf128,
+        icount_bench_fmaxf16,
+        icount_bench_fmaximum,
+        icount_bench_fmaximum_num,
+        icount_bench_fmaximum_numf,
+        icount_bench_fmaximum_numf128,
+        icount_bench_fmaximum_numf16,
+        icount_bench_fmaximumf,
+        icount_bench_fmaximumf128,
+        icount_bench_fmaximumf16,
+        icount_bench_fmin,
+        icount_bench_fminf,
+        icount_bench_fminf128,
+        icount_bench_fminf16,
+        icount_bench_fminimum,
+        icount_bench_fminimum_num,
+        icount_bench_fminimum_numf,
+        icount_bench_fminimum_numf128,
+        icount_bench_fminimum_numf16,
+        icount_bench_fminimumf,
+        icount_bench_fminimumf128,
+        icount_bench_fminimumf16,
+        icount_bench_fmod,
+        icount_bench_fmodf,
+        icount_bench_fmodf128,
+        icount_bench_fmodf16,
+        icount_bench_frexp,
+        icount_bench_frexpf,
+        icount_bench_frexpf128,
+        icount_bench_frexpf16,
+        icount_bench_ftoi_f128_i128,
+        icount_bench_ftoi_f128_i32,
+        icount_bench_ftoi_f128_i64,
+        icount_bench_ftoi_f128_u128,
+        icount_bench_ftoi_f128_u32,
+        icount_bench_ftoi_f128_u64,
+        icount_bench_ftoi_f32_i128,
+        icount_bench_ftoi_f32_i32,
+        icount_bench_ftoi_f32_i64,
+        icount_bench_ftoi_f32_u128,
+        icount_bench_ftoi_f32_u32,
+        icount_bench_ftoi_f32_u64,
+        icount_bench_ftoi_f64_i128,
+        icount_bench_ftoi_f64_i32,
+        icount_bench_ftoi_f64_i64,
+        icount_bench_ftoi_f64_u128,
+        icount_bench_ftoi_f64_u32,
+        icount_bench_ftoi_f64_u64,
+        icount_bench_gef128,
+        icount_bench_gef16,
+        icount_bench_gef32,
+        icount_bench_gef64,
+        icount_bench_gtf128,
+        icount_bench_gtf16,
+        icount_bench_gtf32,
+        icount_bench_gtf64,
+        icount_bench_hypot,
+        icount_bench_hypotf,
+        icount_bench_iadd_i128,
+        icount_bench_iadd_u128,
+        icount_bench_iaddo_i128,
+        icount_bench_iaddo_u128,
+        icount_bench_idiv_i128,
+        icount_bench_idiv_i32,
+        icount_bench_idiv_i64,
+        icount_bench_idiv_u128,
+        icount_bench_idiv_u32,
+        icount_bench_idiv_u64,
+        icount_bench_idivmod_i128,
+        icount_bench_idivmod_i32,
+        icount_bench_idivmod_i64,
+        icount_bench_idivmod_u128,
+        icount_bench_idivmod_u32,
+        icount_bench_idivmod_u64,
+        icount_bench_ilogb,
+        icount_bench_ilogbf,
+        icount_bench_ilogbf128,
+        icount_bench_ilogbf16,
+        icount_bench_imod_i128,
+        icount_bench_imod_i32,
+        icount_bench_imod_i64,
+        icount_bench_imod_u128,
+        icount_bench_imod_u32,
+        icount_bench_imod_u64,
+        icount_bench_imul_i128,
+        icount_bench_imul_u64,
+        icount_bench_imulo_i128,
+        icount_bench_imulo_i32,
+        icount_bench_imulo_i64,
+        icount_bench_imulo_u128,
+        icount_bench_isub_i128,
+        icount_bench_isub_u128,
+        icount_bench_isubo_i128,
+        icount_bench_isubo_u128,
+        icount_bench_itof_i128_f128,
+        icount_bench_itof_i128_f32,
+        icount_bench_itof_i128_f64,
+        icount_bench_itof_i32_f128,
+        icount_bench_itof_i32_f32,
+        icount_bench_itof_i32_f64,
+        icount_bench_itof_i64_f128,
+        icount_bench_itof_i64_f32,
+        icount_bench_itof_i64_f64,
+        icount_bench_itof_u128_f128,
+        icount_bench_itof_u128_f32,
+        icount_bench_itof_u128_f64,
+        icount_bench_itof_u32_f128,
+        icount_bench_itof_u32_f32,
+        icount_bench_itof_u32_f64,
+        icount_bench_itof_u64_f128,
+        icount_bench_itof_u64_f32,
+        icount_bench_itof_u64_f64,
+        icount_bench_j0,
+        icount_bench_j0f,
+        icount_bench_j1,
+        icount_bench_j1f,
+        icount_bench_jn,
+        icount_bench_jnf,
+        icount_bench_ldexp,
+        icount_bench_ldexpf,
+        icount_bench_ldexpf128,
+        icount_bench_ldexpf16,
+        icount_bench_leading_zeros_u128,
+        icount_bench_leading_zeros_u32,
+        icount_bench_leading_zeros_u64,
+        icount_bench_lef128,
+        icount_bench_lef16,
+        icount_bench_lef32,
+        icount_bench_lef64,
+        icount_bench_lgamma,
+        icount_bench_lgamma_r,
+        icount_bench_lgammaf,
+        icount_bench_lgammaf_r,
+        icount_bench_log,
+        icount_bench_log10,
+        icount_bench_log10f,
+        icount_bench_log1p,
+        icount_bench_log1pf,
+        icount_bench_log2,
+        icount_bench_log2f,
+        icount_bench_logf,
+        icount_bench_lshr_u128,
+        icount_bench_lshr_u32,
+        icount_bench_lshr_u64,
+        icount_bench_ltf128,
+        icount_bench_ltf16,
+        icount_bench_ltf32,
+        icount_bench_ltf64,
+        icount_bench_modf,
+        icount_bench_modff,
+        icount_bench_mulf128,
+        icount_bench_mulf16,
+        icount_bench_mulf32,
+        icount_bench_mulf64,
+        icount_bench_narrow_f128_f16,
+        icount_bench_narrow_f128_f32,
+        icount_bench_narrow_f128_f64,
+        icount_bench_narrow_f32_f16,
+        icount_bench_narrow_f64_f16,
+        icount_bench_narrow_f64_f32,
+        icount_bench_nef128,
+        icount_bench_nef16,
+        icount_bench_nef32,
+        icount_bench_nef64,
+        icount_bench_nextafter,
+        icount_bench_nextafterf,
+        icount_bench_pow,
+        icount_bench_powf,
+        icount_bench_powif128,
+        icount_bench_powif32,
+        icount_bench_powif64,
+        icount_bench_remainder,
+        icount_bench_remainderf,
+        icount_bench_remquo,
+        icount_bench_remquof,
+        icount_bench_rint,
+        icount_bench_rintf,
+        icount_bench_rintf128,
+        icount_bench_rintf16,
+        icount_bench_round,
+        icount_bench_roundeven,
+        icount_bench_roundevenf,
+        icount_bench_roundevenf128,
+        icount_bench_roundevenf16,
+        icount_bench_roundf,
+        icount_bench_roundf128,
+        icount_bench_roundf16,
+        icount_bench_scalbn,
+        icount_bench_scalbnf,
+        icount_bench_scalbnf128,
+        icount_bench_scalbnf16,
+        icount_bench_sin,
+        icount_bench_sincos,
+        icount_bench_sincosf,
+        icount_bench_sinf,
+        icount_bench_sinh,
+        icount_bench_sinhf,
+        icount_bench_sqrt,
+        icount_bench_sqrtf,
+        icount_bench_sqrtf128,
+        icount_bench_sqrtf16,
+        icount_bench_subf128,
+        icount_bench_subf16,
+        icount_bench_subf32,
+        icount_bench_subf64,
+        icount_bench_tan,
+        icount_bench_tanf,
+        icount_bench_tanh,
+        icount_bench_tanhf,
+        icount_bench_tgamma,
+        icount_bench_tgammaf,
+        icount_bench_trailing_zeros_u128,
+        icount_bench_trailing_zeros_u32,
+        icount_bench_trailing_zeros_u64,
+        icount_bench_trunc,
+        icount_bench_truncf,
+        icount_bench_truncf128,
+        icount_bench_truncf16,
+        icount_bench_unordf128,
+        icount_bench_unordf16,
+        icount_bench_unordf32,
+        icount_bench_unordf64,
+        icount_bench_y0,
+        icount_bench_y0f,
+        icount_bench_y1,
+        icount_bench_y1f,
+        icount_bench_yn,
+        icount_bench_ynf,
+        // verify-sorted-end
+        // verify-apilist-end
+    ]
+);
+
 fn setup_u128_mul() -> Vec<(u128, u128)> {
     let step = u128::MAX / 300;
     let mut x = 0u128;
@@ -103,6 +416,13 @@ fn setup_u256_shift() -> Vec<(u256, u32)> {
     v
 }
 
+fn setup_i256_shift() -> Vec<(i256, u32)> {
+    setup_u256_shift()
+        .into_iter()
+        .map(|(x, i)| (x.signed(), i))
+        .collect()
+}
+
 #[library_benchmark]
 #[bench::linspace(setup_u128_mul())]
 fn icount_bench_u128_widen_mul(cases: Vec<(u128, u128)>) {
@@ -154,15 +474,25 @@ fn icount_bench_u256_shr(cases: Vec<(u256, u32)>) {
     }
 }
 
+#[library_benchmark]
+#[bench::linspace(setup_i256_shift())]
+fn icount_bench_i256_shr(cases: Vec<(i256, u32)>) {
+    for (x, y) in cases.iter().copied() {
+        black_box(black_box(x) >> black_box(y));
+    }
+}
+
 library_benchmark_group!(
-    name = icount_bench_u128_group;
-    benchmarks =
-    icount_bench_u128_widen_mul,
-    icount_bench_u256_narrowing_div,
-    icount_bench_u256_add,
-    icount_bench_u256_sub,
-    icount_bench_u256_shl,
-    icount_bench_u256_shr
+    name = icount_bench_u128_group,
+    benchmarks = [
+        icount_bench_u128_widen_mul,
+        icount_bench_u256_narrowing_div,
+        icount_bench_u256_add,
+        icount_bench_u256_sub,
+        icount_bench_u256_shl,
+        icount_bench_u256_shr,
+        icount_bench_i256_shr,
+    ]
 );
 
 #[library_benchmark]
@@ -194,226 +524,59 @@ fn icount_bench_hf128(s: &str) -> f128 {
 }
 
 library_benchmark_group!(
-    name = icount_bench_hf_parse_group;
-    benchmarks =
-    icount_bench_hf16,
-    icount_bench_hf32,
-    icount_bench_hf64,
-    icount_bench_hf128
+    name = icount_bench_hf_parse_group,
+    benchmarks = [
+        icount_bench_hf16,
+        icount_bench_hf32,
+        icount_bench_hf64,
+        icount_bench_hf128,
+    ]
 );
 
 #[library_benchmark]
 #[bench::short(1.015625)]
 #[bench::max(f16::MAX)]
 fn icount_bench_print_hf16(x: f16) -> String {
-    black_box(Hexf(x).to_string())
+    black_box(Hex(x).to_string())
 }
 
 #[library_benchmark]
 #[bench::short(1.015625)]
 #[bench::max(f32::MAX)]
 fn icount_bench_print_hf32(x: f32) -> String {
-    black_box(Hexf(x).to_string())
+    black_box(Hex(x).to_string())
 }
 
 #[library_benchmark]
 #[bench::short(1.015625)]
 #[bench::max(f64::MAX)]
 fn icount_bench_print_hf64(x: f64) -> String {
-    black_box(Hexf(x).to_string())
+    black_box(Hex(x).to_string())
 }
 
 #[library_benchmark]
 #[bench::short(1.015625)]
 #[bench::max(f128::MAX)]
 fn icount_bench_print_hf128(x: f128) -> String {
-    black_box(Hexf(x).to_string())
+    black_box(Hex(x).to_string())
 }
 
 library_benchmark_group!(
-    name = icount_bench_hf_print_group;
-    benchmarks =
-    icount_bench_print_hf16,
-    icount_bench_print_hf32,
-    icount_bench_print_hf64,
-    icount_bench_print_hf128
+    name = icount_bench_hf_print_group,
+    benchmarks = [
+        icount_bench_print_hf16,
+        icount_bench_print_hf32,
+        icount_bench_print_hf64,
+        icount_bench_print_hf128,
+    ]
 );
 
 main!(
-    library_benchmark_groups =
-    // Benchmarks not related to public libm math
-    icount_bench_u128_group,
-    icount_bench_hf_parse_group,
-    icount_bench_hf_print_group,
-    // verify-apilist-start
-    // verify-sorted-start
-    icount_bench_acos_group,
-    icount_bench_acosf_group,
-    icount_bench_acosh_group,
-    icount_bench_acoshf_group,
-    icount_bench_asin_group,
-    icount_bench_asinf_group,
-    icount_bench_asinh_group,
-    icount_bench_asinhf_group,
-    icount_bench_atan2_group,
-    icount_bench_atan2f_group,
-    icount_bench_atan_group,
-    icount_bench_atanf_group,
-    icount_bench_atanh_group,
-    icount_bench_atanhf_group,
-    icount_bench_cbrt_group,
-    icount_bench_cbrtf_group,
-    icount_bench_ceil_group,
-    icount_bench_ceilf128_group,
-    icount_bench_ceilf16_group,
-    icount_bench_ceilf_group,
-    icount_bench_copysign_group,
-    icount_bench_copysignf128_group,
-    icount_bench_copysignf16_group,
-    icount_bench_copysignf_group,
-    icount_bench_cos_group,
-    icount_bench_cosf_group,
-    icount_bench_cosh_group,
-    icount_bench_coshf_group,
-    icount_bench_erf_group,
-    icount_bench_erfc_group,
-    icount_bench_erfcf_group,
-    icount_bench_erff_group,
-    icount_bench_exp10_group,
-    icount_bench_exp10f_group,
-    icount_bench_exp2_group,
-    icount_bench_exp2f_group,
-    icount_bench_exp_group,
-    icount_bench_expf_group,
-    icount_bench_expm1_group,
-    icount_bench_expm1f_group,
-    icount_bench_fabs_group,
-    icount_bench_fabsf128_group,
-    icount_bench_fabsf16_group,
-    icount_bench_fabsf_group,
-    icount_bench_fdim_group,
-    icount_bench_fdimf128_group,
-    icount_bench_fdimf16_group,
-    icount_bench_fdimf_group,
-    icount_bench_floor_group,
-    icount_bench_floorf128_group,
-    icount_bench_floorf16_group,
-    icount_bench_floorf_group,
-    icount_bench_fma_group,
-    icount_bench_fmaf128_group,
-    icount_bench_fmaf_group,
-    icount_bench_fmax_group,
-    icount_bench_fmaxf128_group,
-    icount_bench_fmaxf16_group,
-    icount_bench_fmaxf_group,
-    icount_bench_fmaximum_group,
-    icount_bench_fmaximum_num_group,
-    icount_bench_fmaximum_numf128_group,
-    icount_bench_fmaximum_numf16_group,
-    icount_bench_fmaximum_numf_group,
-    icount_bench_fmaximumf128_group,
-    icount_bench_fmaximumf16_group,
-    icount_bench_fmaximumf_group,
-    icount_bench_fmin_group,
-    icount_bench_fminf128_group,
-    icount_bench_fminf16_group,
-    icount_bench_fminf_group,
-    icount_bench_fminimum_group,
-    icount_bench_fminimum_num_group,
-    icount_bench_fminimum_numf128_group,
-    icount_bench_fminimum_numf16_group,
-    icount_bench_fminimum_numf_group,
-    icount_bench_fminimumf128_group,
-    icount_bench_fminimumf16_group,
-    icount_bench_fminimumf_group,
-    icount_bench_fmod_group,
-    icount_bench_fmodf128_group,
-    icount_bench_fmodf16_group,
-    icount_bench_fmodf_group,
-    icount_bench_frexp_group,
-    icount_bench_frexpf128_group,
-    icount_bench_frexpf16_group,
-    icount_bench_frexpf_group,
-    icount_bench_hypot_group,
-    icount_bench_hypotf_group,
-    icount_bench_ilogb_group,
-    icount_bench_ilogbf128_group,
-    icount_bench_ilogbf16_group,
-    icount_bench_ilogbf_group,
-    icount_bench_j0_group,
-    icount_bench_j0f_group,
-    icount_bench_j1_group,
-    icount_bench_j1f_group,
-    icount_bench_jn_group,
-    icount_bench_jnf_group,
-    icount_bench_ldexp_group,
-    icount_bench_ldexpf128_group,
-    icount_bench_ldexpf16_group,
-    icount_bench_ldexpf_group,
-    icount_bench_lgamma_group,
-    icount_bench_lgamma_r_group,
-    icount_bench_lgammaf_group,
-    icount_bench_lgammaf_r_group,
-    icount_bench_log10_group,
-    icount_bench_log10f_group,
-    icount_bench_log1p_group,
-    icount_bench_log1pf_group,
-    icount_bench_log2_group,
-    icount_bench_log2f_group,
-    icount_bench_log_group,
-    icount_bench_logf_group,
-    icount_bench_modf_group,
-    icount_bench_modff_group,
-    icount_bench_nextafter_group,
-    icount_bench_nextafterf_group,
-    icount_bench_pow_group,
-    icount_bench_powf_group,
-    icount_bench_remainder_group,
-    icount_bench_remainderf_group,
-    icount_bench_remquo_group,
-    icount_bench_remquof_group,
-    icount_bench_rint_group,
-    icount_bench_rintf128_group,
-    icount_bench_rintf16_group,
-    icount_bench_rintf_group,
-    icount_bench_round_group,
-    icount_bench_roundeven_group,
-    icount_bench_roundevenf128_group,
-    icount_bench_roundevenf16_group,
-    icount_bench_roundevenf_group,
-    icount_bench_roundf128_group,
-    icount_bench_roundf16_group,
-    icount_bench_roundf_group,
-    icount_bench_scalbn_group,
-    icount_bench_scalbnf128_group,
-    icount_bench_scalbnf16_group,
-    icount_bench_scalbnf_group,
-    icount_bench_sin_group,
-    icount_bench_sincos_group,
-    icount_bench_sincosf_group,
-    icount_bench_sinf_group,
-    icount_bench_sinh_group,
-    icount_bench_sinhf_group,
-    icount_bench_sqrt_group,
-    icount_bench_sqrtf128_group,
-    icount_bench_sqrtf16_group,
-    icount_bench_sqrtf_group,
-    icount_bench_tan_group,
-    icount_bench_tanf_group,
-    icount_bench_tanh_group,
-    icount_bench_tanhf_group,
-    icount_bench_tgamma_group,
-    icount_bench_tgammaf_group,
-    icount_bench_trunc_group,
-    icount_bench_truncf128_group,
-    icount_bench_truncf16_group,
-    icount_bench_truncf_group,
-    icount_bench_y0_group,
-    icount_bench_y0f_group,
-    icount_bench_y1_group,
-    icount_bench_y1f_group,
-    icount_bench_yn_group,
-    icount_bench_ynf_group,
-    // verify-sorted-end
-    // verify-apilist-end
+    library_benchmark_groups = [
+        // Benchmarks not related to public libm math
+        icount_bench_u128_group,
+        icount_bench_hf_parse_group,
+        icount_bench_hf_print_group,
+        icount_bench_math_group,
+    ]
 );
diff --git a/library/compiler-builtins/libm-test/benches/random.rs b/library/compiler-builtins/libm-test/benches/random.rs
index 1b17f049ecac2..ae4c2f51bdec5 100644
--- a/library/compiler-builtins/libm-test/benches/random.rs
+++ b/library/compiler-builtins/libm-test/benches/random.rs
@@ -69,7 +69,7 @@ where
         use anyhow::Context;
         use libm_test::CheckOutput;
 
-        if cfg!(x86_no_sse) && musl_extra.skip_on_i586 {
+        if cfg!(x86_no_sse2) && musl_extra.skip_on_i586 {
             break;
         }
 
@@ -137,7 +137,8 @@ libm_macros::for_each_function! {
         | roundeven
         | roundevenf
         | ALL_F16
-        | ALL_F128 => (false, None),
+        | ALL_F128
+        | ALL_BUILTINS => (false, None),
 
         // By default we never skip (false) and always have a musl function available
         _ => (false, Some(musl_math_sys::MACRO_FN_NAME))
diff --git a/library/compiler-builtins/libm-test/build.rs b/library/compiler-builtins/libm-test/build.rs
index 510ba842f10ab..fa7db8ed63e3e 100644
--- a/library/compiler-builtins/libm-test/build.rs
+++ b/library/compiler-builtins/libm-test/build.rs
@@ -1,9 +1,10 @@
 #[path = "../libm/configure.rs"]
 mod configure;
-use configure::Config;
+
+use configure::{Config, Library};
 
 fn main() {
     println!("cargo:rerun-if-changed=../libm/configure.rs");
-    let cfg = Config::from_env();
-    configure::emit_test_config(&cfg);
+    let cfg = Config::from_env(Library::LibmTest);
+    configure::emit(&cfg);
 }
diff --git a/library/compiler-builtins/libm-test/examples/plot_domains.rs b/library/compiler-builtins/libm-test/examples/plot_domains.rs
index 7331d454f2111..b2fd1979ad550 100644
--- a/library/compiler-builtins/libm-test/examples/plot_domains.rs
+++ b/library/compiler-builtins/libm-test/examples/plot_domains.rs
@@ -52,7 +52,7 @@ fn main() {
 /// Run multiple generators for a single operator.
 fn plot_one_operator<Op>(out_dir: &Path, config: &mut String)
 where
-    Op: MathOp<FTy = f32, RustArgs = (f32,)>,
+    Op: MathOp<RustArgs = (f32,)>,
     Op::RustArgs: SpacedInput<Op>,
 {
     let mut ctx = CheckCtx::new(Op::IDENTIFIER, CheckBasis::Mpfr, GeneratorKind::Spaced);
diff --git a/library/compiler-builtins/libm-test/src/builtins_wrapper.rs b/library/compiler-builtins/libm-test/src/builtins_wrapper.rs
new file mode 100644
index 0000000000000..ce517148b9ca3
--- /dev/null
+++ b/library/compiler-builtins/libm-test/src/builtins_wrapper.rs
@@ -0,0 +1,310 @@
+//! Wrappers around compiler-builtins functions.
+//!
+//! Functions from compiler-builtins have a different naming scheme from libm and often a different
+//! ABI (doesn't work with libm-test traits because that changes the type signature). Wrap these
+//! to make them a bit more similar to the rest of the libm functions.
+
+macro_rules! cb_op {
+    // Fully generic version
+    ($mod:ident, $cb_name:ident, $new_name:ident, ($($arg:ident: $ArgTy:ty),*) -> $RetTy:ty) => {
+        pub fn $new_name($($arg: $ArgTy),*) -> $RetTy {
+            compiler_builtins::float::$mod::$cb_name($($arg),*)
+        }
+    };
+    (@int $mod:ident, $cb_name:ident, $new_name:ident, ($($arg:ident: $ArgTy:ty),*) -> $RetTy:ty) => {
+        pub fn $new_name($($arg: $ArgTy),*) -> $RetTy {
+            compiler_builtins::int::$mod::$cb_name($($arg),*)
+        }
+    };
+
+    // Common signatures
+    (@binop $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        cb_op!($mod, $cb_name, $new_name, (a: $ty, b: $ty) -> $ty);
+    };
+
+    // Cmp signatures. See the documentation in cmp.rs regarding the result.
+    (@cmp_eq $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) == 0
+        }
+    };
+    (@cmp_ne $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) != 0
+        }
+    };
+    (@cmp_unord $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) != 0
+        }
+    };
+    (@cmp_lt $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) < 0
+        }
+    };
+    (@cmp_le $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) <= 0
+        }
+    };
+    (@cmp_gt $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) > 0
+        }
+    };
+    (@cmp_ge $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> bool {
+            compiler_builtins::float::$mod::$cb_name(a, b) >= 0
+        }
+    };
+    (@int_binop_oflow $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> ($ty, bool) {
+            let mut oflow = 0;
+            let res = compiler_builtins::int::$mod::$cb_name(a, b, &mut oflow);
+            (res, oflow != 0)
+        }
+    };
+
+    // Make division by 0 well-defined so testing is easier.
+    (@int_div $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> $ty {
+            if b == 0 {
+                return <$ty>::MIN;
+            }
+            compiler_builtins::int::$mod::$cb_name(a, b)
+        }
+    };
+    (@int_divmod $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> ($ty, $ty) {
+            if b == 0 {
+                return (<$ty>::MIN, <$ty>::MIN);
+            }
+            let mut rem = 0;
+            let div = compiler_builtins::int::$mod::$cb_name(a, b, &mut rem);
+            (div, rem)
+        }
+    };
+    (@int_udivmod $ty:ty, $mod:ident, $cb_name:ident, $new_name:ident) => {
+        pub fn $new_name(a: $ty, b: $ty) -> ($ty, $ty) {
+            if b == 0 {
+                return (<$ty>::MIN, <$ty>::MIN);
+            }
+            let mut rem = 0;
+            let div = compiler_builtins::int::$mod::$cb_name(a, b, Some(&mut rem));
+            (div, rem)
+        }
+    };
+}
+
+#[cfg(f16_enabled)]
+cb_op!(@binop f16, add, __addhf3, addf16);
+cb_op!(@binop f32, add, __addsf3, addf32);
+cb_op!(@binop f64, add, __adddf3, addf64);
+#[cfg(f128_enabled)]
+cb_op!(@binop f128, add, __addtf3, addf128);
+
+#[cfg(f16_enabled)]
+cb_op!(@binop f16, sub, __subhf3, subf16);
+cb_op!(@binop f32, sub, __subsf3, subf32);
+cb_op!(@binop f64, sub, __subdf3, subf64);
+#[cfg(f128_enabled)]
+cb_op!(@binop f128, sub, __subtf3, subf128);
+
+#[cfg(f16_enabled)]
+cb_op!(@binop f16, mul, __mulhf3, mulf16);
+cb_op!(@binop f32, mul, __mulsf3, mulf32);
+cb_op!(@binop f64, mul, __muldf3, mulf64);
+#[cfg(f128_enabled)]
+cb_op!(@binop f128, mul, __multf3, mulf128);
+
+cb_op!(@binop f32, div, __divsf3, divf32);
+cb_op!(@binop f64, div, __divdf3, divf64);
+#[cfg(f128_enabled)]
+cb_op!(@binop f128, div, __divtf3, divf128);
+
+cb_op!(pow, __powisf2, powif32, (a: f32, b: i32) -> f32);
+cb_op!(pow, __powidf2, powif64, (a: f64, b: i32) -> f64);
+#[cfg(f128_enabled)]
+cb_op!(pow, __powitf2, powif128, (a: f128, b: i32) -> f128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_eq f16, cmp, __eqhf2, eqf16);
+cb_op!(@cmp_eq f32, cmp, __eqsf2, eqf32);
+cb_op!(@cmp_eq f64, cmp, __eqdf2, eqf64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_eq f128, cmp, __eqtf2, eqf128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_gt f16, cmp, __gthf2, gtf16);
+cb_op!(@cmp_gt f32, cmp, __gtsf2, gtf32);
+cb_op!(@cmp_gt f64, cmp, __gtdf2, gtf64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_gt f128, cmp, __gttf2, gtf128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_ge f16, cmp, __gehf2, gef16);
+cb_op!(@cmp_ge f32, cmp, __gesf2, gef32);
+cb_op!(@cmp_ge f64, cmp, __gedf2, gef64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_ge f128, cmp, __getf2, gef128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_lt f16, cmp, __lthf2, ltf16);
+cb_op!(@cmp_lt f32, cmp, __ltsf2, ltf32);
+cb_op!(@cmp_lt f64, cmp, __ltdf2, ltf64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_lt f128, cmp, __lttf2, ltf128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_le f16, cmp, __lehf2, lef16);
+cb_op!(@cmp_le f32, cmp, __lesf2, lef32);
+cb_op!(@cmp_le f64, cmp, __ledf2, lef64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_le f128, cmp, __letf2, lef128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_ne f16, cmp, __nehf2, nef16);
+cb_op!(@cmp_ne f32, cmp, __nesf2, nef32);
+cb_op!(@cmp_ne f64, cmp, __nedf2, nef64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_ne f128, cmp, __netf2, nef128);
+
+#[cfg(f16_enabled)]
+cb_op!(@cmp_unord f16, cmp, __unordhf2, unordf16);
+cb_op!(@cmp_unord f32, cmp, __unordsf2, unordf32);
+cb_op!(@cmp_unord f64, cmp, __unorddf2, unordf64);
+#[cfg(f128_enabled)]
+cb_op!(@cmp_unord f128, cmp, __unordtf2, unordf128);
+
+#[cfg(f16_enabled)]
+cb_op!(extend, __extendhfsf2, extend_f16_f32, (a: f16) -> f32);
+#[cfg(f16_enabled)]
+cb_op!(extend, __extendhfdf2, extend_f16_f64, (a: f16) -> f64);
+#[cfg(f16_enabled)]
+#[cfg(f128_enabled)]
+cb_op!(extend, __extendhftf2, extend_f16_f128, (a: f16) -> f128);
+cb_op!(extend, __extendsfdf2, extend_f32_f64, (a: f32) -> f64);
+#[cfg(f128_enabled)]
+cb_op!(extend, __extendsftf2, extend_f32_f128, (a: f32) -> f128);
+#[cfg(f128_enabled)]
+cb_op!(extend, __extenddftf2, extend_f64_f128, (a: f64) -> f128);
+
+// Note that these are renamed from trunc to narrow to avoid collision with libm `trunc`.
+#[cfg(f16_enabled)]
+cb_op!(trunc, __truncsfhf2, narrow_f32_f16, (a: f32) -> f16);
+#[cfg(f16_enabled)]
+cb_op!(trunc, __truncdfhf2, narrow_f64_f16, (a: f64) -> f16);
+cb_op!(trunc, __truncdfsf2, narrow_f64_f32, (a: f64) -> f32);
+#[cfg(f16_enabled)]
+#[cfg(f128_enabled)]
+cb_op!(trunc, __trunctfhf2, narrow_f128_f16, (a: f128) -> f16);
+#[cfg(f128_enabled)]
+cb_op!(trunc, __trunctfsf2, narrow_f128_f32, (a: f128) -> f32);
+#[cfg(f128_enabled)]
+cb_op!(trunc, __trunctfdf2, narrow_f128_f64, (a: f128) -> f64);
+
+cb_op!(conv, __fixsfsi, ftoi_f32_i32, (a: f32) -> i32);
+cb_op!(conv, __fixsfdi, ftoi_f32_i64, (a: f32) -> i64);
+cb_op!(conv, __fixsfti, ftoi_f32_i128, (a: f32) -> i128);
+cb_op!(conv, __fixdfsi, ftoi_f64_i32, (a: f64) -> i32);
+cb_op!(conv, __fixdfdi, ftoi_f64_i64, (a: f64) -> i64);
+cb_op!(conv, __fixdfti, ftoi_f64_i128, (a: f64) -> i128);
+#[cfg(f128_enabled)]
+cb_op!(conv, __fixtfsi, ftoi_f128_i32, (a: f128) -> i32);
+#[cfg(f128_enabled)]
+cb_op!(conv, __fixtfdi, ftoi_f128_i64, (a: f128) -> i64);
+#[cfg(f128_enabled)]
+cb_op!(conv, __fixtfti, ftoi_f128_i128, (a: f128) -> i128);
+cb_op!(conv, __fixunssfsi, ftoi_f32_u32, (a: f32) -> u32);
+cb_op!(conv, __fixunssfdi, ftoi_f32_u64, (a: f32) -> u64);
+cb_op!(conv, __fixunssfti, ftoi_f32_u128, (a: f32) -> u128);
+cb_op!(conv, __fixunsdfsi, ftoi_f64_u32, (a: f64) -> u32);
+cb_op!(conv, __fixunsdfdi, ftoi_f64_u64, (a: f64) -> u64);
+cb_op!(conv, __fixunsdfti, ftoi_f64_u128, (a: f64) -> u128);
+#[cfg(f128_enabled)]
+cb_op!(conv, __fixunstfsi, ftoi_f128_u32, (a: f128) -> u32);
+#[cfg(f128_enabled)]
+cb_op!(conv, __fixunstfdi, ftoi_f128_u64, (a: f128) -> u64);
+#[cfg(f128_enabled)]
+cb_op!(conv, __fixunstfti, ftoi_f128_u128, (a: f128) -> u128);
+
+cb_op!(conv, __floatsisf, itof_i32_f32, (a: i32) -> f32);
+cb_op!(conv, __floatdisf, itof_i64_f32, (a: i64) -> f32);
+cb_op!(conv, __floattisf, itof_i128_f32, (a: i128) -> f32);
+cb_op!(conv, __floatsidf, itof_i32_f64, (a: i32) -> f64);
+cb_op!(conv, __floatdidf, itof_i64_f64, (a: i64) -> f64);
+cb_op!(conv, __floattidf, itof_i128_f64, (a: i128) -> f64);
+#[cfg(f128_enabled)]
+cb_op!(conv, __floatsitf, itof_i32_f128, (a: i32) -> f128);
+#[cfg(f128_enabled)]
+cb_op!(conv, __floatditf, itof_i64_f128, (a: i64) -> f128);
+#[cfg(f128_enabled)]
+cb_op!(conv, __floattitf, itof_i128_f128, (a: i128) -> f128);
+cb_op!(conv, __floatunsisf, itof_u32_f32, (a: u32) -> f32);
+cb_op!(conv, __floatundisf, itof_u64_f32, (a: u64) -> f32);
+cb_op!(conv, __floatuntisf, itof_u128_f32, (a: u128) -> f32);
+cb_op!(conv, __floatunsidf, itof_u32_f64, (a: u32) -> f64);
+cb_op!(conv, __floatundidf, itof_u64_f64, (a: u64) -> f64);
+cb_op!(conv, __floatuntidf, itof_u128_f64, (a: u128) -> f64);
+#[cfg(f128_enabled)]
+cb_op!(conv, __floatunsitf, itof_u32_f128, (a: u32) -> f128);
+#[cfg(f128_enabled)]
+cb_op!(conv, __floatunditf, itof_u64_f128, (a: u64) -> f128);
+#[cfg(f128_enabled)]
+cb_op!(conv, __floatuntitf, itof_u128_f128, (a: u128) -> f128);
+
+/* int ops */
+
+cb_op!(@int addsub, __rust_i128_add, iadd_i128, (a: i128, b: i128) -> i128);
+cb_op!(@int addsub, __rust_i128_sub, isub_i128, (a: i128, b: i128) -> i128);
+cb_op!(@int addsub, __rust_u128_add, iadd_u128, (a: u128, b: u128) -> u128);
+cb_op!(@int addsub, __rust_u128_sub, isub_u128, (a: u128, b: u128) -> u128);
+cb_op!(@int_binop_oflow i128, addsub, __rust_i128_addo, iaddo_i128);
+cb_op!(@int_binop_oflow i128, addsub, __rust_i128_subo, isubo_i128);
+cb_op!(@int_binop_oflow u128, addsub, __rust_u128_addo, iaddo_u128);
+cb_op!(@int_binop_oflow u128, addsub, __rust_u128_subo, isubo_u128);
+
+cb_op!(@int mul, __muldi3, imul_u64, (a: u64, b: u64) -> u64);
+cb_op!(@int mul, __multi3, imul_i128, (a: i128, b: i128) -> i128);
+cb_op!(@int_binop_oflow i32, mul, __mulosi4, imulo_i32);
+cb_op!(@int_binop_oflow i64, mul, __mulodi4, imulo_i64);
+cb_op!(@int_binop_oflow i128, mul, __muloti4, imulo_i128);
+cb_op!(@int_binop_oflow u128, mul, __rust_u128_mulo, imulo_u128);
+
+cb_op!(@int_div i32, sdiv, __divsi3, idiv_i32);
+cb_op!(@int_div i64, sdiv, __divdi3, idiv_i64);
+cb_op!(@int_div i128, sdiv, __divti3, idiv_i128);
+cb_op!(@int_div i32, sdiv, __modsi3, imod_i32);
+cb_op!(@int_div i64, sdiv, __moddi3, imod_i64);
+cb_op!(@int_div i128, sdiv, __modti3, imod_i128);
+cb_op!(@int_divmod i32, sdiv, __divmodsi4, idivmod_i32);
+cb_op!(@int_divmod i64, sdiv, __divmoddi4, idivmod_i64);
+cb_op!(@int_divmod i128, sdiv, __divmodti4, idivmod_i128);
+
+cb_op!(@int_div u32, udiv, __udivsi3, idiv_u32);
+cb_op!(@int_div u64, udiv, __udivdi3, idiv_u64);
+cb_op!(@int_div u128, udiv, __udivti3, idiv_u128);
+cb_op!(@int_div u32, udiv, __umodsi3, imod_u32);
+cb_op!(@int_div u64, udiv, __umoddi3, imod_u64);
+cb_op!(@int_div u128, udiv, __umodti3, imod_u128);
+cb_op!(@int_udivmod u32, udiv, __udivmodsi4, idivmod_u32);
+cb_op!(@int_udivmod u64, udiv, __udivmoddi4, idivmod_u64);
+cb_op!(@int_udivmod u128, udiv, __udivmodti4, idivmod_u128);
+
+cb_op!(@int shift, __ashlsi3, ashl_u32, (a: u32, b: u32) -> u32);
+cb_op!(@int shift, __ashldi3, ashl_u64, (a: u64, b: u32) -> u64);
+cb_op!(@int shift, __ashlti3, ashl_u128, (a: u128, b: u32) -> u128);
+cb_op!(@int shift, __ashrsi3, ashr_i32, (a: i32, b: u32) -> i32);
+cb_op!(@int shift, __ashrdi3, ashr_i64, (a: i64, b: u32) -> i64);
+cb_op!(@int shift, __ashrti3, ashr_i128, (a: i128, b: u32) -> i128);
+cb_op!(@int shift, __lshrsi3, lshr_u32, (a: u32, b: u32) -> u32);
+cb_op!(@int shift, __lshrdi3, lshr_u64, (a: u64, b: u32) -> u64);
+cb_op!(@int shift, __lshrti3, lshr_u128, (a: u128, b: u32) -> u128);
+
+cb_op!(@int leading_zeros, __clzsi2, leading_zeros_u32, (a: u32) -> usize);
+cb_op!(@int leading_zeros, __clzdi2, leading_zeros_u64, (a: u64) -> usize);
+cb_op!(@int leading_zeros, __clzti2, leading_zeros_u128, (a: u128) -> usize);
+cb_op!(@int trailing_zeros, __ctzsi2, trailing_zeros_u32, (a: u32) -> usize);
+cb_op!(@int trailing_zeros, __ctzdi2, trailing_zeros_u64, (a: u64) -> usize);
+cb_op!(@int trailing_zeros, __ctzti2, trailing_zeros_u128, (a: u128) -> usize);
diff --git a/library/compiler-builtins/libm-test/src/domain.rs b/library/compiler-builtins/libm-test/src/domain.rs
index eb009bfa093f4..0ae105f4a02d2 100644
--- a/library/compiler-builtins/libm-test/src/domain.rs
+++ b/library/compiler-builtins/libm-test/src/domain.rs
@@ -224,6 +224,47 @@ pub fn get_domain<F: Float, I: Int>(
     argnum: usize,
 ) -> EitherPrim<Domain<F>, Domain<I>> {
     let x = match id.base_name() {
+        // Basic arithmetic
+        BaseName::Add => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Sub => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Mul => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Div => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Powi => &EitherPrim::UNBOUNDED2[..],
+
+        // Comparison
+        BaseName::Eq => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Ne => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Gt => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Ge => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Lt => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Le => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Unord => &EitherPrim::UNBOUNDED2[..],
+
+        // Conversions
+        BaseName::Extend => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Narrow => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Ftoi => &EitherPrim::UNBOUNDED1[..],
+        BaseName::Itof => &EitherPrim::UNBOUNDED1[..],
+
+        // Integer ops
+        BaseName::Iadd => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Iaddo => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Isub => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Isubo => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Imul => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Imulo => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Idiv => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Imod => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Idivmod => &EitherPrim::UNBOUNDED2[..],
+        // Shifts technically aren't unbounded, but its range is restricted elsewhere in
+        // our test generators.
+        BaseName::Ashl => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Ashr => &EitherPrim::UNBOUNDED2[..],
+        BaseName::Lshr => &EitherPrim::UNBOUNDED2[..],
+        BaseName::LeadingZeros => &EitherPrim::UNBOUNDED1[..],
+        BaseName::TrailingZeros => &EitherPrim::UNBOUNDED1[..],
+
+        // Math functions
         BaseName::Acos => &EitherPrim::INVERSE_TRIG_PERIODIC[..],
         BaseName::Acosh => &EitherPrim::ACOSH[..],
         BaseName::Asin => &EitherPrim::INVERSE_TRIG_PERIODIC[..],
diff --git a/library/compiler-builtins/libm-test/src/f8_impl.rs b/library/compiler-builtins/libm-test/src/f8_impl.rs
index 9f19f518e2a34..3288e6a6acf2c 100644
--- a/library/compiler-builtins/libm-test/src/f8_impl.rs
+++ b/library/compiler-builtins/libm-test/src/f8_impl.rs
@@ -58,6 +58,14 @@ impl Float for f8 {
         self.0 as i8
     }
 
+    fn eq_repr(self, rhs: Self) -> bool {
+        if self.is_nan() && rhs.is_nan() {
+            true
+        } else {
+            self.to_bits() == rhs.to_bits()
+        }
+    }
+
     fn is_nan(self) -> bool {
         self.0 & Self::EXP_MASK == Self::EXP_MASK && self.0 & Self::SIG_MASK != 0
     }
diff --git a/library/compiler-builtins/libm-test/src/generate.rs b/library/compiler-builtins/libm-test/src/generate.rs
index da080d23fa79c..7118752f4f0bb 100644
--- a/library/compiler-builtins/libm-test/src/generate.rs
+++ b/library/compiler-builtins/libm-test/src/generate.rs
@@ -48,3 +48,28 @@ impl<I: Iterator> Iterator for KnownSize<I> {
 }
 
 impl<I: Iterator> ExactSizeIterator for KnownSize<I> {}
+
+/// Yield `(a0, b0), ..., (a0, bn), ..., (an, bn)` for iterators `[a0, ..., an]` and
+/// `[b0, ..., bn]`.
+fn product2<I0, I1>(i0: I0, i1: I1) -> impl Iterator<Item = (I0::Item, I1::Item)>
+where
+    I0: Iterator<Item: Copy>,
+    I1: Iterator<Item: Copy> + Clone,
+{
+    i0.flat_map(move |first| i1.clone().map(move |second| (first, second)))
+}
+
+/// Yield `(a0, b0, c0), ..., (a0, b0, cn), ..., (a0, bn, cn), ..., (an, bn, cn)` for iterators
+/// `[a0, ..., an]`, `[b0, ..., bn]` and `[c0, ..., cn]`.
+fn product3<I0, I1, I2>(
+    i0: I0,
+    i1: I1,
+    i2: I2,
+) -> impl Iterator<Item = (I0::Item, I1::Item, I2::Item)>
+where
+    I0: Iterator<Item: Copy>,
+    I1: Iterator<Item: Copy> + Clone,
+    I2: Iterator<Item: Copy> + Clone,
+{
+    product2(product2(i0, i1), i2).map(|((first, second), third)| (first, second, third))
+}
diff --git a/library/compiler-builtins/libm-test/src/generate/case_list.rs b/library/compiler-builtins/libm-test/src/generate/case_list.rs
index 66d7f6a282f6b..d3daf86843d17 100644
--- a/library/compiler-builtins/libm-test/src/generate/case_list.rs
+++ b/library/compiler-builtins/libm-test/src/generate/case_list.rs
@@ -6,9 +6,9 @@
 //!
 //! This is useful for adding regression tests or expected failures.
 
-use libm::hf64;
 #[cfg(f128_enabled)]
 use libm::hf128;
+use libm::{hf32, hf64};
 
 use crate::{CheckBasis, CheckCtx, GeneratorKind, MathOp, op};
 
@@ -18,834 +18,1576 @@ pub struct TestCase<Op: MathOp> {
 }
 
 impl<Op: MathOp> TestCase<Op> {
-    #[expect(dead_code)]
-    fn append_inputs(v: &mut Vec<Self>, l: &[Op::RustArgs]) {
-        v.extend(l.iter().copied().map(|input| Self {
-            input,
-            output: None,
-        }));
-    }
-
-    fn append_pairs(v: &mut Vec<Self>, l: &[(Op::RustArgs, Option<Op::RustRet>)])
+    /// Turn into a different operation with the same types.
+    fn cast<Op2: MathOp>(self) -> TestCase<Op2>
     where
-        Op::RustRet: Copy,
+        Op2::RustArgs: From<Op::RustArgs>,
+        Op2::RustRet: From<Op::RustRet>,
     {
-        v.extend(
-            l.iter()
-                .copied()
-                .map(|(input, output)| Self { input, output }),
-        );
+        TestCase {
+            input: self.input.into(),
+            output: self.output.map(Into::into),
+        }
     }
 }
 
+macro_rules! cases {
+    (
+        $(
+            $(#[$($meta:tt)*])*
+            ($($tt:tt)*)
+        ),* $(,)?
+    ) => {{
+        Vec::from_iter([
+            $(
+               $(#[$($meta)*])*
+                cases!(@single $($tt)*),
+            )*
+        ])
+    }};
+
+    // Variant without a result, which will check against MPFR.
+    (@single ($($arg:expr),* $(,)?), None $(,)?) => {
+        TestCase{
+            input: ($($arg,)*),
+            output: None,
+        }
+    };
+
+    // Variant for when the result is specified.
+    (@single ($($arg:expr),* $(,)?), $res:expr $(,)?) => {
+        TestCase{
+            input: ($($arg,)*),
+            output: Some($res),
+        }
+    };
+}
+
+/********************************
+ * compiler-builtins test cases *
+ ********************************/
+
+#[cfg(f16_enabled)]
+fn addf16_cases() -> Vec<TestCase<op::addf16::Routine>> {
+    cases![]
+}
+
+fn addf32_cases() -> Vec<TestCase<op::addf32::Routine>> {
+    cases![]
+}
+
+fn addf64_cases() -> Vec<TestCase<op::addf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn addf128_cases() -> Vec<TestCase<op::addf128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn subf16_cases() -> Vec<TestCase<op::subf16::Routine>> {
+    cases![]
+}
+
+fn subf32_cases() -> Vec<TestCase<op::subf32::Routine>> {
+    cases![]
+}
+
+fn subf64_cases() -> Vec<TestCase<op::subf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn subf128_cases() -> Vec<TestCase<op::subf128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn mulf16_cases() -> Vec<TestCase<op::mulf16::Routine>> {
+    cases![]
+}
+
+fn mulf32_cases() -> Vec<TestCase<op::mulf32::Routine>> {
+    cases![]
+}
+
+fn mulf64_cases() -> Vec<TestCase<op::mulf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn mulf128_cases() -> Vec<TestCase<op::mulf128::Routine>> {
+    cases![]
+}
+
+fn divf32_cases() -> Vec<TestCase<op::divf32::Routine>> {
+    cases![]
+}
+
+fn divf64_cases() -> Vec<TestCase<op::divf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn divf128_cases() -> Vec<TestCase<op::divf128::Routine>> {
+    cases![]
+}
+
+fn powif32_cases() -> Vec<TestCase<op::powif32::Routine>> {
+    cases![]
+}
+
+fn powif64_cases() -> Vec<TestCase<op::powif64::Routine>> {
+    cases![
+        // High error
+        ((0.9999497584118668, -5858518), 6.823250355352412e127),
+    ]
+}
+
+#[cfg(f128_enabled)]
+fn powif128_cases() -> Vec<TestCase<op::powif128::Routine>> {
+    cases![]
+}
+
+/* comparison */
+
+#[cfg(f16_enabled)]
+fn eqf16_cases() -> Vec<TestCase<op::eqf16::Routine>> {
+    cases![]
+}
+
+fn eqf32_cases() -> Vec<TestCase<op::eqf32::Routine>> {
+    cases![]
+}
+
+fn eqf64_cases() -> Vec<TestCase<op::eqf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn eqf128_cases() -> Vec<TestCase<op::eqf128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn gtf16_cases() -> Vec<TestCase<op::gtf16::Routine>> {
+    cases![]
+}
+
+fn gtf32_cases() -> Vec<TestCase<op::gtf32::Routine>> {
+    cases![]
+}
+
+fn gtf64_cases() -> Vec<TestCase<op::gtf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn gtf128_cases() -> Vec<TestCase<op::gtf128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn gef16_cases() -> Vec<TestCase<op::gef16::Routine>> {
+    cases![]
+}
+
+fn gef32_cases() -> Vec<TestCase<op::gef32::Routine>> {
+    cases![]
+}
+
+fn gef64_cases() -> Vec<TestCase<op::gef64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn gef128_cases() -> Vec<TestCase<op::gef128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn ltf16_cases() -> Vec<TestCase<op::ltf16::Routine>> {
+    cases![]
+}
+
+fn ltf32_cases() -> Vec<TestCase<op::ltf32::Routine>> {
+    cases![]
+}
+
+fn ltf64_cases() -> Vec<TestCase<op::ltf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ltf128_cases() -> Vec<TestCase<op::ltf128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn lef16_cases() -> Vec<TestCase<op::lef16::Routine>> {
+    cases![]
+}
+
+fn lef32_cases() -> Vec<TestCase<op::lef32::Routine>> {
+    cases![]
+}
+
+fn lef64_cases() -> Vec<TestCase<op::lef64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn lef128_cases() -> Vec<TestCase<op::lef128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn nef16_cases() -> Vec<TestCase<op::nef16::Routine>> {
+    cases![]
+}
+
+fn nef32_cases() -> Vec<TestCase<op::nef32::Routine>> {
+    cases![]
+}
+
+fn nef64_cases() -> Vec<TestCase<op::nef64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn nef128_cases() -> Vec<TestCase<op::nef128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn unordf16_cases() -> Vec<TestCase<op::unordf16::Routine>> {
+    cases![]
+}
+
+fn unordf32_cases() -> Vec<TestCase<op::unordf32::Routine>> {
+    cases![]
+}
+
+fn unordf64_cases() -> Vec<TestCase<op::unordf64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn unordf128_cases() -> Vec<TestCase<op::unordf128::Routine>> {
+    cases![]
+}
+
+/* conversion */
+
+#[cfg(f16_enabled)]
+fn extend_f16_f32_cases() -> Vec<TestCase<op::extend_f16_f32::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn extend_f16_f64_cases() -> Vec<TestCase<op::extend_f16_f64::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+#[cfg(f128_enabled)]
+fn extend_f16_f128_cases() -> Vec<TestCase<op::extend_f16_f128::Routine>> {
+    cases![]
+}
+
+fn extend_f32_f64_cases() -> Vec<TestCase<op::extend_f32_f64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn extend_f32_f128_cases() -> Vec<TestCase<op::extend_f32_f128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn extend_f64_f128_cases() -> Vec<TestCase<op::extend_f64_f128::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn narrow_f32_f16_cases() -> Vec<TestCase<op::narrow_f32_f16::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+fn narrow_f64_f16_cases() -> Vec<TestCase<op::narrow_f64_f16::Routine>> {
+    cases![]
+}
+
+fn narrow_f64_f32_cases() -> Vec<TestCase<op::narrow_f64_f32::Routine>> {
+    cases![]
+}
+
+#[cfg(f16_enabled)]
+#[cfg(f128_enabled)]
+fn narrow_f128_f16_cases() -> Vec<TestCase<op::narrow_f128_f16::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn narrow_f128_f32_cases() -> Vec<TestCase<op::narrow_f128_f32::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn narrow_f128_f64_cases() -> Vec<TestCase<op::narrow_f128_f64::Routine>> {
+    cases![]
+}
+
+fn ftoi_f32_i32_cases() -> Vec<TestCase<op::ftoi_f32_i32::Routine>> {
+    cases![]
+}
+
+fn ftoi_f32_i64_cases() -> Vec<TestCase<op::ftoi_f32_i64::Routine>> {
+    cases![]
+}
+
+fn ftoi_f32_i128_cases() -> Vec<TestCase<op::ftoi_f32_i128::Routine>> {
+    cases![]
+}
+
+fn ftoi_f64_i32_cases() -> Vec<TestCase<op::ftoi_f64_i32::Routine>> {
+    cases![]
+}
+
+fn ftoi_f64_i64_cases() -> Vec<TestCase<op::ftoi_f64_i64::Routine>> {
+    cases![]
+}
+
+fn ftoi_f64_i128_cases() -> Vec<TestCase<op::ftoi_f64_i128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ftoi_f128_i32_cases() -> Vec<TestCase<op::ftoi_f128_i32::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ftoi_f128_i64_cases() -> Vec<TestCase<op::ftoi_f128_i64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ftoi_f128_i128_cases() -> Vec<TestCase<op::ftoi_f128_i128::Routine>> {
+    cases![]
+}
+
+fn ftoi_f32_u32_cases() -> Vec<TestCase<op::ftoi_f32_u32::Routine>> {
+    cases![]
+}
+
+fn ftoi_f32_u64_cases() -> Vec<TestCase<op::ftoi_f32_u64::Routine>> {
+    cases![]
+}
+
+fn ftoi_f32_u128_cases() -> Vec<TestCase<op::ftoi_f32_u128::Routine>> {
+    cases![]
+}
+
+fn ftoi_f64_u32_cases() -> Vec<TestCase<op::ftoi_f64_u32::Routine>> {
+    cases![]
+}
+
+fn ftoi_f64_u64_cases() -> Vec<TestCase<op::ftoi_f64_u64::Routine>> {
+    cases![]
+}
+
+fn ftoi_f64_u128_cases() -> Vec<TestCase<op::ftoi_f64_u128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ftoi_f128_u32_cases() -> Vec<TestCase<op::ftoi_f128_u32::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ftoi_f128_u64_cases() -> Vec<TestCase<op::ftoi_f128_u64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn ftoi_f128_u128_cases() -> Vec<TestCase<op::ftoi_f128_u128::Routine>> {
+    cases![]
+}
+
+fn itof_i32_f32_cases() -> Vec<TestCase<op::itof_i32_f32::Routine>> {
+    cases![]
+}
+
+fn itof_i64_f32_cases() -> Vec<TestCase<op::itof_i64_f32::Routine>> {
+    cases![]
+}
+
+fn itof_i128_f32_cases() -> Vec<TestCase<op::itof_i128_f32::Routine>> {
+    cases![]
+}
+
+fn itof_i32_f64_cases() -> Vec<TestCase<op::itof_i32_f64::Routine>> {
+    cases![]
+}
+
+fn itof_i64_f64_cases() -> Vec<TestCase<op::itof_i64_f64::Routine>> {
+    cases![]
+}
+
+fn itof_i128_f64_cases() -> Vec<TestCase<op::itof_i128_f64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn itof_i32_f128_cases() -> Vec<TestCase<op::itof_i32_f128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn itof_i64_f128_cases() -> Vec<TestCase<op::itof_i64_f128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn itof_i128_f128_cases() -> Vec<TestCase<op::itof_i128_f128::Routine>> {
+    cases![]
+}
+
+fn itof_u32_f32_cases() -> Vec<TestCase<op::itof_u32_f32::Routine>> {
+    cases![]
+}
+
+fn itof_u64_f32_cases() -> Vec<TestCase<op::itof_u64_f32::Routine>> {
+    cases![]
+}
+
+fn itof_u128_f32_cases() -> Vec<TestCase<op::itof_u128_f32::Routine>> {
+    cases![]
+}
+
+fn itof_u32_f64_cases() -> Vec<TestCase<op::itof_u32_f64::Routine>> {
+    cases![]
+}
+
+fn itof_u64_f64_cases() -> Vec<TestCase<op::itof_u64_f64::Routine>> {
+    cases![]
+}
+
+fn itof_u128_f64_cases() -> Vec<TestCase<op::itof_u128_f64::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn itof_u32_f128_cases() -> Vec<TestCase<op::itof_u32_f128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn itof_u64_f128_cases() -> Vec<TestCase<op::itof_u64_f128::Routine>> {
+    cases![]
+}
+
+#[cfg(f128_enabled)]
+fn itof_u128_f128_cases() -> Vec<TestCase<op::itof_u128_f128::Routine>> {
+    cases![]
+}
+
+/* int arithmetic */
+
+fn iadd_i128_cases() -> Vec<TestCase<op::iadd_i128::Routine>> {
+    cases![]
+}
+
+fn iadd_u128_cases() -> Vec<TestCase<op::iadd_u128::Routine>> {
+    cases![]
+}
+
+fn iaddo_i128_cases() -> Vec<TestCase<op::iaddo_i128::Routine>> {
+    cases![]
+}
+
+fn iaddo_u128_cases() -> Vec<TestCase<op::iaddo_u128::Routine>> {
+    cases![]
+}
+
+fn isub_i128_cases() -> Vec<TestCase<op::isub_i128::Routine>> {
+    cases![]
+}
+
+fn isub_u128_cases() -> Vec<TestCase<op::isub_u128::Routine>> {
+    cases![]
+}
+
+fn isubo_i128_cases() -> Vec<TestCase<op::isubo_i128::Routine>> {
+    cases![]
+}
+
+fn isubo_u128_cases() -> Vec<TestCase<op::isubo_u128::Routine>> {
+    cases![]
+}
+
+fn idiv_i128_cases() -> Vec<TestCase<op::idiv_i128::Routine>> {
+    cases![]
+}
+
+fn idiv_i32_cases() -> Vec<TestCase<op::idiv_i32::Routine>> {
+    cases![]
+}
+
+fn idiv_i64_cases() -> Vec<TestCase<op::idiv_i64::Routine>> {
+    cases![]
+}
+
+fn idiv_u128_cases() -> Vec<TestCase<op::idiv_u128::Routine>> {
+    cases![]
+}
+
+fn idiv_u32_cases() -> Vec<TestCase<op::idiv_u32::Routine>> {
+    cases![]
+}
+
+fn idiv_u64_cases() -> Vec<TestCase<op::idiv_u64::Routine>> {
+    cases![]
+}
+
+fn idivmod_i128_cases() -> Vec<TestCase<op::idivmod_i128::Routine>> {
+    cases![]
+}
+
+fn idivmod_i32_cases() -> Vec<TestCase<op::idivmod_i32::Routine>> {
+    cases![]
+}
+
+fn idivmod_i64_cases() -> Vec<TestCase<op::idivmod_i64::Routine>> {
+    cases![]
+}
+
+fn idivmod_u128_cases() -> Vec<TestCase<op::idivmod_u128::Routine>> {
+    cases![]
+}
+
+fn idivmod_u32_cases() -> Vec<TestCase<op::idivmod_u32::Routine>> {
+    cases![]
+}
+
+fn idivmod_u64_cases() -> Vec<TestCase<op::idivmod_u64::Routine>> {
+    cases![]
+}
+
+fn imod_i128_cases() -> Vec<TestCase<op::imod_i128::Routine>> {
+    cases![]
+}
+
+fn imod_i32_cases() -> Vec<TestCase<op::imod_i32::Routine>> {
+    cases![]
+}
+
+fn imod_i64_cases() -> Vec<TestCase<op::imod_i64::Routine>> {
+    cases![]
+}
+
+fn imod_u128_cases() -> Vec<TestCase<op::imod_u128::Routine>> {
+    cases![]
+}
+
+fn imod_u32_cases() -> Vec<TestCase<op::imod_u32::Routine>> {
+    cases![]
+}
+
+fn imod_u64_cases() -> Vec<TestCase<op::imod_u64::Routine>> {
+    cases![]
+}
+
+fn imul_i128_cases() -> Vec<TestCase<op::imul_i128::Routine>> {
+    cases![]
+}
+
+fn imul_u64_cases() -> Vec<TestCase<op::imul_u64::Routine>> {
+    cases![]
+}
+
+fn imulo_i128_cases() -> Vec<TestCase<op::imulo_i128::Routine>> {
+    cases![]
+}
+
+fn imulo_i32_cases() -> Vec<TestCase<op::imulo_i32::Routine>> {
+    cases![]
+}
+
+fn imulo_i64_cases() -> Vec<TestCase<op::imulo_i64::Routine>> {
+    cases![]
+}
+
+fn imulo_u128_cases() -> Vec<TestCase<op::imulo_u128::Routine>> {
+    cases![]
+}
+
+/* int shifts */
+
+fn ashl_u32_cases() -> Vec<TestCase<op::ashl_u32::Routine>> {
+    cases![]
+}
+
+fn ashl_u64_cases() -> Vec<TestCase<op::ashl_u64::Routine>> {
+    cases![]
+}
+
+fn ashl_u128_cases() -> Vec<TestCase<op::ashl_u128::Routine>> {
+    cases![]
+}
+
+fn ashr_i32_cases() -> Vec<TestCase<op::ashr_i32::Routine>> {
+    cases![]
+}
+
+fn ashr_i64_cases() -> Vec<TestCase<op::ashr_i64::Routine>> {
+    cases![]
+}
+
+fn ashr_i128_cases() -> Vec<TestCase<op::ashr_i128::Routine>> {
+    cases![]
+}
+
+fn lshr_u32_cases() -> Vec<TestCase<op::lshr_u32::Routine>> {
+    cases![]
+}
+
+fn lshr_u64_cases() -> Vec<TestCase<op::lshr_u64::Routine>> {
+    cases![]
+}
+
+fn lshr_u128_cases() -> Vec<TestCase<op::lshr_u128::Routine>> {
+    cases![]
+}
+
+/* int bitwise ops */
+
+fn leading_zeros_u32_cases() -> Vec<TestCase<op::leading_zeros_u32::Routine>> {
+    cases![]
+}
+
+fn leading_zeros_u64_cases() -> Vec<TestCase<op::leading_zeros_u64::Routine>> {
+    cases![]
+}
+
+fn leading_zeros_u128_cases() -> Vec<TestCase<op::leading_zeros_u128::Routine>> {
+    cases![]
+}
+
+fn trailing_zeros_u32_cases() -> Vec<TestCase<op::trailing_zeros_u32::Routine>> {
+    cases![]
+}
+
+fn trailing_zeros_u64_cases() -> Vec<TestCase<op::trailing_zeros_u64::Routine>> {
+    cases![]
+}
+
+fn trailing_zeros_u128_cases() -> Vec<TestCase<op::trailing_zeros_u128::Routine>> {
+    cases![]
+}
+
+/*******************
+ * libm test cases *
+ *******************/
+
 fn acos_cases() -> Vec<TestCase<op::acos::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn acosf_cases() -> Vec<TestCase<op::acosf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn acosh_cases() -> Vec<TestCase<op::acosh::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn acoshf_cases() -> Vec<TestCase<op::acoshf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn asin_cases() -> Vec<TestCase<op::asin::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn asinf_cases() -> Vec<TestCase<op::asinf::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn asinh_cases() -> Vec<TestCase<op::asinh::Routine>> {
-    vec![]
+fn asinhf_cases() -> Vec<TestCase<op::asinhf::Routine>> {
+    cases![
+        // Failure on i586
+        ((-0.37330312), -0.3651353),
+        ((-0.421092), -0.40954682),
+    ]
 }
 
-fn asinhf_cases() -> Vec<TestCase<op::asinhf::Routine>> {
-    vec![]
+fn asinh_cases() -> Vec<TestCase<op::asinh::Routine>> {
+    cases![]
 }
 
 fn atan_cases() -> Vec<TestCase<op::atan::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn atan2_cases() -> Vec<TestCase<op::atan2::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn atan2f_cases() -> Vec<TestCase<op::atan2f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn atanf_cases() -> Vec<TestCase<op::atanf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn atanh_cases() -> Vec<TestCase<op::atanh::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn atanhf_cases() -> Vec<TestCase<op::atanhf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn cbrt_cases() -> Vec<TestCase<op::cbrt::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn cbrtf_cases() -> Vec<TestCase<op::cbrtf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn ceil_cases() -> Vec<TestCase<op::ceil::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn ceilf_cases() -> Vec<TestCase<op::ceilf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn ceilf128_cases() -> Vec<TestCase<op::ceilf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn ceilf16_cases() -> Vec<TestCase<op::ceilf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn copysign_cases() -> Vec<TestCase<op::copysign::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn copysignf_cases() -> Vec<TestCase<op::copysignf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn copysignf128_cases() -> Vec<TestCase<op::copysignf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn copysignf16_cases() -> Vec<TestCase<op::copysignf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn cos_cases() -> Vec<TestCase<op::cos::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn cosf_cases() -> Vec<TestCase<op::cosf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn cosh_cases() -> Vec<TestCase<op::cosh::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn coshf_cases() -> Vec<TestCase<op::coshf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn erf_cases() -> Vec<TestCase<op::erf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn erfc_cases() -> Vec<TestCase<op::erfc::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn erfcf_cases() -> Vec<TestCase<op::erfcf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn erff_cases() -> Vec<TestCase<op::erff::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn exp_cases() -> Vec<TestCase<op::exp::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn exp10_cases() -> Vec<TestCase<op::exp10::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn exp10f_cases() -> Vec<TestCase<op::exp10f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn exp2_cases() -> Vec<TestCase<op::exp2::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn exp2f_cases() -> Vec<TestCase<op::exp2f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn expf_cases() -> Vec<TestCase<op::expf::Routine>> {
-    vec![]
+    cases![
+        ((hf32!("-0x1.2d245ap-8")), hf32!("0x1.fda718p-1")),
+        ((hf32!("0x1.db1b7ap-7")), hf32!("0x1.03bd22p+0")),
+        ((hf32!("-0x1.dc15fcp+5")), hf32!("0x1.1ae6e6p-86")),
+    ]
 }
 
 fn expm1_cases() -> Vec<TestCase<op::expm1::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn expm1f_cases() -> Vec<TestCase<op::expm1f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fabs_cases() -> Vec<TestCase<op::fabs::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fabsf_cases() -> Vec<TestCase<op::fabsf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fabsf128_cases() -> Vec<TestCase<op::fabsf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fabsf16_cases() -> Vec<TestCase<op::fabsf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fdim_cases() -> Vec<TestCase<op::fdim::Routine>> {
-    vec![]
+    cases![
+        // Failures on i586
+        (
+            (
+                hf64!("0x1.10d2f8a8dffd1p+355"),
+                hf64!("-0x1.5203b17e54a8cp+373")
+            ),
+            hf64!("0x1.5203f5b312d2fp+373")
+        ),
+        (
+            (
+                hf64!("0x1.9ffdf64f0d2f8p+294"),
+                hf64!("-0x1.71addd21280b5p+344")
+            ),
+            hf64!("0x1.71addd21280bbp+344")
+        ),
+        (
+            (
+                hf64!("0x1.f3600eb4ad0e0p-953"),
+                hf64!("-0x1.0c29b2b40023dp-976")
+            ),
+            hf64!("0x1.f36010cd00737p-953")
+        ),
+    ]
 }
 
 fn fdimf_cases() -> Vec<TestCase<op::fdimf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fdimf128_cases() -> Vec<TestCase<op::fdimf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fdimf16_cases() -> Vec<TestCase<op::fdimf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn floor_cases() -> Vec<TestCase<op::floor::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn floorf_cases() -> Vec<TestCase<op::floorf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn floorf128_cases() -> Vec<TestCase<op::floorf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn floorf16_cases() -> Vec<TestCase<op::floorf16::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
-            // Previous failure with incorrect sign
-            ((5e-324, -5e-324, 0.0), Some(-0.0)),
-        ],
-    );
-    v
+fn fmaf_cases() -> Vec<TestCase<op::fmaf::Routine>> {
+    cases![
+        // Known rounding error for some implementations (notably MinGW)
+        ((-1.9369631e13f32, 2.1513551e-7, -1.7354427e-24), -4167095.8),
+        // Failure on i586
+        (
+            (
+                hf32!("-0x1.c92494p+109"),
+                hf32!("-0x0.000018p-126"),
+                hf32!("-0x1.6db6f0p-91"),
+            ),
+            hf32!("0x1.56db6ep-36")
+        ),
+    ]
 }
 
-fn fmaf_cases() -> Vec<TestCase<op::fmaf::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
-            // Known rounding error for some implementations (notably MinGW)
+fn fma_cases() -> Vec<TestCase<op::fma::Routine>> {
+    cases![
+        // Previous failure with incorrect sign
+        ((5e-324, -5e-324, 0.0), -0.0),
+        // Failure on i586
+        (
+            (0.999999999999999, 1.0000000000000013, 0.0),
+            1.0000000000000002
+        ),
+        // Failure on musl i686/i586
+        (
             (
-                (-1.9369631e13f32, 2.1513551e-7, -1.7354427e-24),
-                Some(-4167095.8),
+                hf64!("0x0.0000000100001p-1022"),
+                hf64!("0x1.ffffffffffffbp+1023"),
+                hf64!("0x0p+0")
             ),
-        ],
-    );
-    v
+            hf64!("0x1.00000fffffffdp-30")
+        )
+    ]
 }
 
 #[cfg(f128_enabled)]
 fn fmaf128_cases() -> Vec<TestCase<op::fmaf128::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
+    cases![
+        (
+            // Tricky rounding case that previously failed in extensive tests
             (
-                // Tricky rounding case that previously failed in extensive tests
-                (
-                    hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"),
-                    hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"),
-                    hf128!("-0x0.000000000000000000000000048ap-16382"),
-                ),
-                Some(hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")),
+                hf128!("-0x1.1966cc01966cc01966cc01966f06p-25"),
+                hf128!("-0x1.669933fe69933fe69933fe6997c9p-16358"),
+                hf128!("-0x0.000000000000000000000000048ap-16382"),
             ),
+            hf128!("0x0.c5171470a3ff5e0f68d751491b18p-16382")
+        ),
+        (
+            // Subnormal edge case that caused a failure
             (
-                // Subnormal edge case that caused a failure
-                (
-                    hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"),
-                    hf128!("0x1.ffffffffffffffffffffffffffffp-1"),
-                    hf128!("0x0.8000000000000000000000000009p-16382"),
-                ),
-                Some(hf128!("0x1.0000000000000000000000000000p-16382")),
+                hf128!("0x0.7ffffffffffffffffffffffffff7p-16382"),
+                hf128!("0x1.ffffffffffffffffffffffffffffp-1"),
+                hf128!("0x0.8000000000000000000000000009p-16382"),
             ),
-        ],
-    );
-    v
+            hf128!("0x1.0000000000000000000000000000p-16382")
+        ),
+    ]
 }
 
 #[cfg(f16_enabled)]
 fn fmaxf16_cases() -> Vec<TestCase<op::fmaxf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmaxf_cases() -> Vec<TestCase<op::fmaxf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmax_cases() -> Vec<TestCase<op::fmax::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fmaxf128_cases() -> Vec<TestCase<op::fmaxf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fmaximumf16_cases() -> Vec<TestCase<op::fmaximumf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmaximumf_cases() -> Vec<TestCase<op::fmaximumf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmaximum_cases() -> Vec<TestCase<op::fmaximum::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fmaximumf128_cases() -> Vec<TestCase<op::fmaximumf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fmaximum_numf16_cases() -> Vec<TestCase<op::fmaximum_numf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmaximum_numf_cases() -> Vec<TestCase<op::fmaximum_numf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmaximum_num_cases() -> Vec<TestCase<op::fmaximum_num::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fmaximum_numf128_cases() -> Vec<TestCase<op::fmaximum_numf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fminf16_cases() -> Vec<TestCase<op::fminf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fminf_cases() -> Vec<TestCase<op::fminf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmin_cases() -> Vec<TestCase<op::fmin::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fminf128_cases() -> Vec<TestCase<op::fminf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fminimumf16_cases() -> Vec<TestCase<op::fminimumf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fminimumf_cases() -> Vec<TestCase<op::fminimumf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fminimum_cases() -> Vec<TestCase<op::fminimum::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fminimumf128_cases() -> Vec<TestCase<op::fminimumf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fminimum_numf16_cases() -> Vec<TestCase<op::fminimum_numf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fminimum_numf_cases() -> Vec<TestCase<op::fminimum_numf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fminimum_num_cases() -> Vec<TestCase<op::fminimum_num::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn fminimum_numf128_cases() -> Vec<TestCase<op::fminimum_numf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn fmod_cases() -> Vec<TestCase<op::fmod::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
-            // Previous failure with incorrect loop iteration
-            // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
-            ((2.1, 3.123e-320), Some(2.0696e-320)),
-            ((2.1, 2.253547e-318), Some(1.772535e-318)),
-        ],
-    );
-    v
+    cases![
+        // Previous failure with incorrect loop iteration
+        // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
+        ((2.1, 3.123e-320), 2.0696e-320),
+        ((2.1, 2.253547e-318), 1.772535e-318),
+    ]
 }
 
 fn fmodf_cases() -> Vec<TestCase<op::fmodf::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
-            // Previous failure with incorrect loop iteration
-            // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
-            ((2.1, 8.858e-42), Some(8.085e-42)),
-            ((2.1, 6.39164e-40), Some(6.1636e-40)),
-            ((5.5, 6.39164e-40), Some(4.77036e-40)),
-            ((-151.189, 6.39164e-40), Some(-5.64734e-40)),
-        ],
-    );
-    v
+    cases![
+        // Previous failure with incorrect loop iteration
+        // <https://github.com/rust-lang/libm/pull/469#discussion_r2022337272>
+        ((2.1, 8.858e-42), 8.085e-42),
+        ((2.1, 6.39164e-40), 6.1636e-40),
+        ((5.5, 6.39164e-40), 4.77036e-40),
+        ((-151.189, 6.39164e-40), -5.64734e-40),
+    ]
 }
 
 #[cfg(f128_enabled)]
 fn fmodf128_cases() -> Vec<TestCase<op::fmodf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn fmodf16_cases() -> Vec<TestCase<op::fmodf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn frexpf16_cases() -> Vec<TestCase<op::frexpf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn frexpf_cases() -> Vec<TestCase<op::frexpf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn frexp_cases() -> Vec<TestCase<op::frexp::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn frexpf128_cases() -> Vec<TestCase<op::frexpf128::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn hypot_cases() -> Vec<TestCase<op::hypot::Routine>> {
-    vec![]
+fn hypotf_cases() -> Vec<TestCase<op::hypotf::Routine>> {
+    cases![]
 }
 
-fn hypotf_cases() -> Vec<TestCase<op::hypotf::Routine>> {
-    vec![]
+fn hypot_cases() -> Vec<TestCase<op::hypot::Routine>> {
+    cases![
+        // Cases that can overflow exponent if wrapping arithmetic is not used
+        (
+            (
+                hf64!("-0x1.800f800f80100p+1023"),
+                hf64!("0x1.8354835473720p+996"),
+            ),
+            None
+        ),
+        (
+            (hf64!("0x1.201b201b201c0p+0"), hf64!("0x1.b028b028b02a0p-1")),
+            None
+        ),
+        (
+            (
+                hf64!("-0x1.e538e538e564p+980"),
+                hf64!("-0x1.c4dfc4dfc508p+983"),
+            ),
+            None
+        ),
+        (
+            (
+                hf64!("-0x1.2f22e4f77aa58p+983"),
+                hf64!("-0x1.44c9f5524c8ccp+980"),
+            ),
+            None
+        ),
+    ]
 }
 
 #[cfg(f16_enabled)]
 fn ilogbf16_cases() -> Vec<TestCase<op::ilogbf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn ilogbf_cases() -> Vec<TestCase<op::ilogbf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn ilogb_cases() -> Vec<TestCase<op::ilogb::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn ilogbf128_cases() -> Vec<TestCase<op::ilogbf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn j0_cases() -> Vec<TestCase<op::j0::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn j0f_cases() -> Vec<TestCase<op::j0f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn j1_cases() -> Vec<TestCase<op::j1::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn j1f_cases() -> Vec<TestCase<op::j1f::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn jn_cases() -> Vec<TestCase<op::jn::Routine>> {
-    vec![]
+fn jnf_cases() -> Vec<TestCase<op::jnf::Routine>> {
+    cases![]
 }
 
-fn jnf_cases() -> Vec<TestCase<op::jnf::Routine>> {
-    vec![]
+fn jn_cases() -> Vec<TestCase<op::jn::Routine>> {
+    cases![
+        // Inputs that produce high errors
+        ((190, 1005.366268038242), 7.328620335959289e-10),
+        ((238, -311.0349), 7.270196433535006e-8),
+    ]
 }
 
 fn ldexp_cases() -> Vec<TestCase<op::ldexp::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn ldexpf_cases() -> Vec<TestCase<op::ldexpf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn ldexpf128_cases() -> Vec<TestCase<op::ldexpf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn ldexpf16_cases() -> Vec<TestCase<op::ldexpf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn lgamma_cases() -> Vec<TestCase<op::lgamma::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn lgamma_r_cases() -> Vec<TestCase<op::lgamma_r::Routine>> {
-    vec![]
+fn lgammaf_cases() -> Vec<TestCase<op::lgammaf::Routine>> {
+    cases![
+        // High error
+        ((-4.933393,), -1.9580022),
+    ]
 }
 
-fn lgammaf_cases() -> Vec<TestCase<op::lgammaf::Routine>> {
-    vec![]
+fn lgamma_r_cases() -> Vec<TestCase<op::lgamma_r::Routine>> {
+    cases![]
 }
 
 fn lgammaf_r_cases() -> Vec<TestCase<op::lgammaf_r::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn log_cases() -> Vec<TestCase<op::log::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn log10_cases() -> Vec<TestCase<op::log10::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn log10f_cases() -> Vec<TestCase<op::log10f::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn log1p_cases() -> Vec<TestCase<op::log1p::Routine>> {
-    vec![]
+fn log1pf_cases() -> Vec<TestCase<op::log1pf::Routine>> {
+    cases![
+        // Musl failures on i586
+        ((hf32!("-0x1.8292f6p-2")), hf32!("-0x1.e56918p-2")),
+        ((hf32!("0x1.12d15ep-1")), hf32!("0x1.b7fbf8p-2")),
+        ((hf32!("-0x1.904ebep-2")), hf32!("-0x1.fbb6cap-2")),
+    ]
 }
 
-fn log1pf_cases() -> Vec<TestCase<op::log1pf::Routine>> {
-    vec![]
+fn log1p_cases() -> Vec<TestCase<op::log1p::Routine>> {
+    cases![
+        // Musl failure on i586
+        (
+            (hf64!("-0x1.9094dbf7f2e85p-2"),),
+            hf64!("-0x1.fc29f046c88a1p-2")
+        ),
+    ]
 }
 
 fn log2_cases() -> Vec<TestCase<op::log2::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn log2f_cases() -> Vec<TestCase<op::log2f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn logf_cases() -> Vec<TestCase<op::logf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn modf_cases() -> Vec<TestCase<op::modf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn modff_cases() -> Vec<TestCase<op::modff::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn nextafter_cases() -> Vec<TestCase<op::nextafter::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn nextafterf_cases() -> Vec<TestCase<op::nextafterf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn pow_cases() -> Vec<TestCase<op::pow::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn powf_cases() -> Vec<TestCase<op::powf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn remainder_cases() -> Vec<TestCase<op::remainder::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn remainderf_cases() -> Vec<TestCase<op::remainderf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn remquo_cases() -> Vec<TestCase<op::remquo::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn remquof_cases() -> Vec<TestCase<op::remquof::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn rint_cases() -> Vec<TestCase<op::rint::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
-            // Known failure on i586
-            #[cfg(not(x86_no_sse))]
-            (
-                (hf64!("-0x1.e3f13ff995ffcp+38"),),
-                Some(hf64!("-0x1.e3f13ff994000p+38")),
-            ),
-            #[cfg(x86_no_sse)]
-            (
-                (hf64!("-0x1.e3f13ff995ffcp+38"),),
-                Some(hf64!("-0x1.e3f13ff998000p+38")),
-            ),
-        ],
-    );
-    v
+#[cfg(f16_enabled)]
+fn rintf16_cases() -> Vec<TestCase<op::rintf16::Routine>> {
+    // Out rint doesn't respect rounding modes so it is the same as roundeven
+    roundevenf16_cases()
+        .into_iter()
+        .map(TestCase::cast)
+        .collect()
 }
 
 fn rintf_cases() -> Vec<TestCase<op::rintf::Routine>> {
-    vec![]
+    // Out rint doesn't respect rounding modes so it is the same as roundeven
+    roundevenf_cases().into_iter().map(TestCase::cast).collect()
 }
 
-#[cfg(f128_enabled)]
-fn rintf128_cases() -> Vec<TestCase<op::rintf128::Routine>> {
-    vec![]
+fn rint_cases() -> Vec<TestCase<op::rint::Routine>> {
+    // Out rint doesn't respect rounding modes so it is the same as roundeven
+    roundeven_cases().into_iter().map(TestCase::cast).collect()
 }
 
-#[cfg(f16_enabled)]
-fn rintf16_cases() -> Vec<TestCase<op::rintf16::Routine>> {
-    vec![]
+#[cfg(f128_enabled)]
+fn rintf128_cases() -> Vec<TestCase<op::rintf128::Routine>> {
+    // Out rint doesn't respect rounding modes so it is the same as roundeven
+    roundevenf128_cases()
+        .into_iter()
+        .map(TestCase::cast)
+        .collect()
 }
 
 #[cfg(f16_enabled)]
 fn roundf16_cases() -> Vec<TestCase<op::roundf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn round_cases() -> Vec<TestCase<op::round::Routine>> {
-    vec![]
+    cases![
+        // Failure on i586
+        (
+            (hf64!("0x1.9efc6a203d4a9p+52"),),
+            hf64!("0x1.9efc6a203d4a9p+52")
+        )
+    ]
 }
 
 fn roundf_cases() -> Vec<TestCase<op::roundf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn roundf128_cases() -> Vec<TestCase<op::roundf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn roundevenf16_cases() -> Vec<TestCase<op::roundevenf16::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn roundeven_cases() -> Vec<TestCase<op::roundeven::Routine>> {
-    let mut v = vec![];
-    TestCase::append_pairs(
-        &mut v,
-        &[
-            // Known failure on i586
-            #[cfg(not(x86_no_sse))]
-            (
-                (hf64!("-0x1.e3f13ff995ffcp+38"),),
-                Some(hf64!("-0x1.e3f13ff994000p+38")),
-            ),
-            #[cfg(x86_no_sse)]
-            (
-                (hf64!("-0x1.e3f13ff995ffcp+38"),),
-                Some(hf64!("-0x1.e3f13ff998000p+38")),
-            ),
-        ],
-    );
-    v
+fn roundevenf_cases() -> Vec<TestCase<op::roundevenf::Routine>> {
+    cases![]
 }
 
-fn roundevenf_cases() -> Vec<TestCase<op::roundevenf::Routine>> {
-    vec![]
+fn roundeven_cases() -> Vec<TestCase<op::roundeven::Routine>> {
+    cases![
+        // Failure on i586
+        ((-519629176421.49976,), -519629176421.0),
+        // Failures with a previous algorithm
+        ((-849751480.5001163,), -849751481.0),
+        ((-12493089.499809155,), -12493089.0),
+        ((-1308.5000830345912,), -1309.0),
+    ]
 }
 
 #[cfg(f128_enabled)]
 fn roundevenf128_cases() -> Vec<TestCase<op::roundevenf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn scalbn_cases() -> Vec<TestCase<op::scalbn::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn scalbnf_cases() -> Vec<TestCase<op::scalbnf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn scalbnf128_cases() -> Vec<TestCase<op::scalbnf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn scalbnf16_cases() -> Vec<TestCase<op::scalbnf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sin_cases() -> Vec<TestCase<op::sin::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sincos_cases() -> Vec<TestCase<op::sincos::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sincosf_cases() -> Vec<TestCase<op::sincosf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sinf_cases() -> Vec<TestCase<op::sinf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sinh_cases() -> Vec<TestCase<op::sinh::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sinhf_cases() -> Vec<TestCase<op::sinhf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sqrt_cases() -> Vec<TestCase<op::sqrt::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn sqrtf_cases() -> Vec<TestCase<op::sqrtf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn sqrtf128_cases() -> Vec<TestCase<op::sqrtf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn sqrtf16_cases() -> Vec<TestCase<op::sqrtf16::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn tan_cases() -> Vec<TestCase<op::tan::Routine>> {
-    vec![]
+fn tanf_cases() -> Vec<TestCase<op::tanf::Routine>> {
+    cases![]
 }
 
-fn tanf_cases() -> Vec<TestCase<op::tanf::Routine>> {
-    vec![]
+fn tan_cases() -> Vec<TestCase<op::tan::Routine>> {
+    cases![
+        // Musl failures on i586
+        (
+            (hf64!("0x1.fffffffffffafp+1023"),),
+            hf64!("0x1.c573c6dd8c00ap+0")
+        ),
+        (
+            (hf64!("0x1.fffffffffffafp+1023"),),
+            hf64!("0x1.c573c6dd8c00ap+0")
+        ),
+        (
+            (hf64!("-0x1.0b10f6eaf2ca0p+883"),),
+            hf64!("0x1.cefbd167e2402p+0")
+        ),
+    ]
 }
 
 fn tanh_cases() -> Vec<TestCase<op::tanh::Routine>> {
-    vec![]
+    cases![(
+        (hf64!("0x1.fbfdb8b31b9b4p-3"),),
+        hf64!("0x1.f1d2bcb4e1b45p-3")
+    )]
 }
 
 fn tanhf_cases() -> Vec<TestCase<op::tanhf::Routine>> {
-    vec![]
+    cases![
+        // Inaccuracy in musl
+        ((0.24503659,), 0.24024734),
+        ((0.19125812,), 0.18895969),
+    ]
 }
 
 fn tgamma_cases() -> Vec<TestCase<op::tgamma::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn tgammaf_cases() -> Vec<TestCase<op::tgammaf::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn trunc_cases() -> Vec<TestCase<op::trunc::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn truncf_cases() -> Vec<TestCase<op::truncf::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f128_enabled)]
 fn truncf128_cases() -> Vec<TestCase<op::truncf128::Routine>> {
-    vec![]
+    cases![]
 }
 
 #[cfg(f16_enabled)]
 fn truncf16_cases() -> Vec<TestCase<op::truncf16::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn y0_cases() -> Vec<TestCase<op::y0::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn y0f_cases() -> Vec<TestCase<op::y0f::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn y1_cases() -> Vec<TestCase<op::y1::Routine>> {
-    vec![]
+    cases![]
 }
 
 fn y1f_cases() -> Vec<TestCase<op::y1f::Routine>> {
-    vec![]
+    cases![]
 }
 
-fn yn_cases() -> Vec<TestCase<op::yn::Routine>> {
-    vec![]
+fn ynf_cases() -> Vec<TestCase<op::ynf::Routine>> {
+    cases![]
 }
 
-fn ynf_cases() -> Vec<TestCase<op::ynf::Routine>> {
-    vec![]
+fn yn_cases() -> Vec<TestCase<op::yn::Routine>> {
+    cases![
+        // Inputs that should be finite but tend to round to infinity
+        ((228, 120.75621), -3.3293829e38),
+        ((148, 61.379253), -3.2585946e38),
+        ((184, 87.26689), -3.2943882e38),
+    ]
 }
 
 pub trait CaseListInput: MathOp + Sized {
diff --git a/library/compiler-builtins/libm-test/src/generate/edge_cases.rs b/library/compiler-builtins/libm-test/src/generate/edge_cases.rs
index 4e4a782a16988..9ada6d5c95b2d 100644
--- a/library/compiler-builtins/libm-test/src/generate/edge_cases.rs
+++ b/library/compiler-builtins/libm-test/src/generate/edge_cases.rs
@@ -3,10 +3,9 @@
 use libm::support::{CastInto, Float, Int, MinInt};
 
 use crate::domain::get_domain;
-use crate::generate::KnownSize;
-use crate::op::OpITy;
+use crate::generate::{KnownSize, product2, product3};
 use crate::run_cfg::{check_near_count, check_point_count};
-use crate::{BaseName, CheckCtx, FloatExt, FloatTy, MathOp, test_log};
+use crate::{Arg0, Arg1, Arg2, BaseName, CheckCtx, FloatExt, MathOp, Ty, test_log};
 
 /// Generate a sequence of edge cases, e.g. numbers near zeroes and infiniteis.
 pub trait EdgeCaseInput<Op> {
@@ -14,15 +13,12 @@ pub trait EdgeCaseInput<Op> {
 }
 
 /// Create a list of values around interesting points (infinities, zeroes, NaNs).
-fn float_edge_cases<Op>(
+fn float_edge_cases<F: Float>(
     ctx: &CheckCtx,
     argnum: usize,
-) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
-where
-    Op: MathOp,
-{
+) -> (impl Iterator<Item = F> + Clone, u64) {
     let mut ret = Vec::new();
-    let one = OpITy::<Op>::ONE;
+    let one = F::Int::ONE;
     let values = &mut ret;
     let domain = get_domain::<_, i8>(ctx.fn_ident, argnum).unwrap_float();
     let domain_start = domain.range_start();
@@ -32,17 +28,17 @@ where
     let near_points = check_near_count(ctx);
 
     // Check near some notable constants
-    count_up(Op::FTy::ONE, near_points, values);
-    count_up(Op::FTy::ZERO, near_points, values);
-    count_up(Op::FTy::NEG_ONE, near_points, values);
-    count_down(Op::FTy::ONE, near_points, values);
-    count_down(Op::FTy::ZERO, near_points, values);
-    count_down(Op::FTy::NEG_ONE, near_points, values);
-    values.push(Op::FTy::NEG_ZERO);
+    count_up(F::ONE, near_points, values);
+    count_up(F::ZERO, near_points, values);
+    count_up(F::NEG_ONE, near_points, values);
+    count_down(F::ONE, near_points, values);
+    count_down(F::ZERO, near_points, values);
+    count_down(F::NEG_ONE, near_points, values);
+    values.push(F::NEG_ZERO);
 
     // Check values near the extremes
-    count_up(Op::FTy::NEG_INFINITY, near_points, values);
-    count_down(Op::FTy::INFINITY, near_points, values);
+    count_up(F::NEG_INFINITY, near_points, values);
+    count_down(F::INFINITY, near_points, values);
     count_down(domain_end, near_points, values);
     count_up(domain_start, near_points, values);
     count_down(domain_start, near_points, values);
@@ -50,20 +46,20 @@ where
     count_down(domain_end, near_points, values);
 
     // Check some special values that aren't included in the above ranges
-    values.push(Op::FTy::NAN);
-    values.push(Op::FTy::NEG_NAN);
-    values.extend(Op::FTy::consts().iter());
+    values.push(F::NAN);
+    values.push(F::NEG_NAN);
+    values.extend(F::consts().iter());
 
     // Check around the maximum subnormal value
-    let sub_max = Op::FTy::from_bits(Op::FTy::SIG_MASK);
+    let sub_max = F::from_bits(F::SIG_MASK);
     count_up(sub_max, near_points, values);
     count_down(sub_max, near_points, values);
     count_up(-sub_max, near_points, values);
     count_down(-sub_max, near_points, values);
 
     // Check a few values around the subnormal range
-    for shift in (0..Op::FTy::SIG_BITS).step_by(Op::FTy::SIG_BITS as usize / 5) {
-        let v = Op::FTy::from_bits(one << shift);
+    for shift in (0..F::SIG_BITS).step_by(F::SIG_BITS as usize / 5) {
+        let v = F::from_bits(one << shift);
         count_up(v, 2, values);
         count_down(v, 2, values);
         count_up(-v, 2, values);
@@ -142,21 +138,22 @@ where
 
     if matches!(ctx.base_name, BaseName::Scalbn | BaseName::Ldexp) {
         assert_eq!(argnum, 1, "scalbn integer argument should be arg1");
-        let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().float_ty {
-            FloatTy::F16 => {
+        let (emax, emin, emin_sn) = match ctx.fn_ident.math_op().rust_sig.args[0] {
+            Ty::F16 => {
                 #[cfg(not(f16_enabled))]
                 unreachable!();
                 #[cfg(f16_enabled)]
                 (f16::EXP_MAX, f16::EXP_MIN, f16::EXP_MIN_SUBNORM)
             }
-            FloatTy::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM),
-            FloatTy::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM),
-            FloatTy::F128 => {
+            Ty::F32 => (f32::EXP_MAX, f32::EXP_MIN, f32::EXP_MIN_SUBNORM),
+            Ty::F64 => (f64::EXP_MAX, f64::EXP_MIN, f64::EXP_MIN_SUBNORM),
+            Ty::F128 => {
                 #[cfg(not(f128_enabled))]
                 unreachable!();
                 #[cfg(f128_enabled)]
                 (f128::EXP_MAX, f128::EXP_MIN, f128::EXP_MIN_SUBNORM)
             }
+            ty => unreachable!("expected a float first argument, got {ty}"),
         };
 
         // `scalbn`/`ldexp` have their trickiest behavior around exponent limits
@@ -172,6 +169,22 @@ where
         int_count_around((emin_sn - emax).cast(), near_points, &mut values);
     }
 
+    if matches!(
+        ctx.base_name,
+        BaseName::Ashl | BaseName::Ashr | BaseName::Lshr
+    ) {
+        // Don't test shift values that are allowed to invoke UB.
+        let max = match ctx.fn_ident.math_op().rust_sig.args[0] {
+            Ty::U32 | Ty::I32 => 31,
+            Ty::U64 | Ty::I64 => 63,
+            Ty::U128 | Ty::I128 => 127,
+            ty => panic!("unexpected type {ty}"),
+        };
+        let max: I = max.cast();
+        int_count_around(max, near_points, &mut values);
+        values.retain(|v| *v <= max);
+    }
+
     values.sort();
     values.dedup();
     let count = values.len().try_into().unwrap();
@@ -213,10 +226,10 @@ macro_rules! impl_edge_case_input {
     ($fty:ty) => {
         impl<Op> EdgeCaseInput<Op> for ($fty,)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter0, steps0) = float_edge_cases::<Arg0<Op>>(ctx, 0);
                 let iter0 = iter0.map(|v| (v,));
                 (iter0, steps0)
             }
@@ -224,37 +237,28 @@ macro_rules! impl_edge_case_input {
 
         impl<Op> EdgeCaseInput<Op> for ($fty, $fty)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
-                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
-                let iter =
-                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
-                let count = steps0.checked_mul(steps1).unwrap();
+                let (iter0, steps0) = float_edge_cases::<Arg0<Op>>(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Arg1<Op>>(ctx, 1);
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
                 (iter, count)
             }
         }
 
         impl<Op> EdgeCaseInput<Op> for ($fty, $fty, $fty)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
-                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
-                let (iter2, steps2) = float_edge_cases::<Op>(ctx, 2);
-
-                let iter = iter0
-                    .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
-                    .flat_map(move |(first, second)| {
-                        iter2.clone().map(move |third| (first, second, third))
-                    });
-                let count = steps0
-                    .checked_mul(steps1)
-                    .unwrap()
-                    .checked_mul(steps2)
-                    .unwrap();
+                let (iter0, steps0) = float_edge_cases::<Arg0<Op>>(ctx, 0);
+                let (iter1, steps1) = float_edge_cases::<Arg1<Op>>(ctx, 1);
+                let (iter2, steps2) = float_edge_cases::<Arg2<Op>>(ctx, 2);
+
+                let iter = product3(iter0, iter1, iter2);
+                let count = steps0.strict_mul(steps1).strict_mul(steps2);
 
                 (iter, count)
             }
@@ -262,15 +266,14 @@ macro_rules! impl_edge_case_input {
 
         impl<Op> EdgeCaseInput<Op> for (i32, $fty)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let (iter0, steps0) = int_edge_cases(ctx, 0);
-                let (iter1, steps1) = float_edge_cases::<Op>(ctx, 1);
+                let (iter1, steps1) = float_edge_cases::<Arg1<Op>>(ctx, 1);
 
-                let iter =
-                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
-                let count = steps0.checked_mul(steps1).unwrap();
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
 
                 (iter, count)
             }
@@ -278,15 +281,14 @@ macro_rules! impl_edge_case_input {
 
         impl<Op> EdgeCaseInput<Op> for ($fty, i32)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let (iter0, steps0) = float_edge_cases::<Op>(ctx, 0);
+                let (iter0, steps0) = float_edge_cases::<Arg0<Op>>(ctx, 0);
                 let (iter1, steps1) = int_edge_cases(ctx, 1);
 
-                let iter =
-                    iter0.flat_map(move |first| iter1.clone().map(move |second| (first, second)));
-                let count = steps0.checked_mul(steps1).unwrap();
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
 
                 (iter, count)
             }
@@ -301,6 +303,57 @@ impl_edge_case_input!(f64);
 #[cfg(f128_enabled)]
 impl_edge_case_input!(f128);
 
+macro_rules! impl_edge_case_input_int {
+    (@skip_u32 $ity:ty) => {
+        impl<Op> EdgeCaseInput<Op> for ($ity,)
+        where
+            Op: MathOp<RustArgs = Self>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = int_edge_cases(ctx, 0);
+                let iter0 = iter0.map(|v| (v,));
+                (iter0, steps0)
+            }
+        }
+
+        impl<Op> EdgeCaseInput<Op> for ($ity, $ity)
+        where
+            Op: MathOp<RustArgs = Self>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = int_edge_cases(ctx, 0);
+                let (iter1, steps1) = int_edge_cases(ctx, 1);
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+                (iter, count)
+            }
+        }
+    };
+    ($ity:ty) => {
+        impl_edge_case_input_int!(@skip_u32 $ity);
+
+        impl<Op> EdgeCaseInput<Op> for ($ity, u32)
+        where
+            Op: MathOp<RustArgs = Self>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let (iter0, steps0) = int_edge_cases(ctx, 0);
+                let (iter1, steps1) = int_edge_cases(ctx, 1);
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+                (iter, count)
+            }
+        }
+    };
+}
+
+impl_edge_case_input_int!(i32);
+impl_edge_case_input_int!(i64);
+impl_edge_case_input_int!(i128);
+impl_edge_case_input_int!(@skip_u32 u32);
+impl_edge_case_input_int!(u64);
+impl_edge_case_input_int!(u128);
+
 pub fn get_test_cases<Op>(
     ctx: &CheckCtx,
 ) -> (impl Iterator<Item = Op::RustArgs> + Send + use<'_, Op>, u64)
diff --git a/library/compiler-builtins/libm-test/src/generate/random.rs b/library/compiler-builtins/libm-test/src/generate/random.rs
index 09a3766c66780..32bd2f24ee044 100644
--- a/library/compiler-builtins/libm-test/src/generate/random.rs
+++ b/library/compiler-builtins/libm-test/src/generate/random.rs
@@ -2,14 +2,16 @@ use std::env;
 use std::ops::RangeInclusive;
 use std::sync::LazyLock;
 
-use libm::support::Float;
+use libm::support::{Float, Int};
+use rand::distr::uniform::SampleUniform;
 use rand::distr::{Alphanumeric, StandardUniform};
 use rand::prelude::Distribution;
 use rand::{RngExt, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 
-use super::KnownSize;
 use crate::CheckCtx;
+use crate::generate::{KnownSize, product2, product3};
+use crate::num::full_range;
 use crate::run_cfg::{int_range, iteration_count};
 
 pub(crate) const SEED_ENV: &str = "LIBM_SEED";
@@ -31,7 +33,7 @@ pub trait RandomInput: Sized {
 }
 
 /// Generate a sequence of deterministically random floats.
-fn random_floats<F: Float>(count: u64) -> impl Iterator<Item = F>
+fn random_floats<F: Float>(count: u64) -> impl Iterator<Item = F> + Clone
 where
     StandardUniform: Distribution<F::Int>,
 {
@@ -43,9 +45,12 @@ where
 }
 
 /// Generate a sequence of deterministically random `i32`s within a specified range.
-fn random_ints(count: u64, range: RangeInclusive<i32>) -> impl Iterator<Item = i32> {
+fn random_ints<I>(count: u64, range: RangeInclusive<I>) -> impl Iterator<Item = I> + Clone
+where
+    I: Int + SampleUniform,
+{
     let mut rng = ChaCha8Rng::from_seed(*SEED);
-    (0..count).map(move |_| rng.random_range::<i32, _>(range.clone()))
+    (0..count).map(move |_| rng.random_range::<I, _>(range.clone()))
 }
 
 macro_rules! impl_random_input {
@@ -62,9 +67,11 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
-                let iter = random_floats(count0)
-                    .flat_map(move |f1: $fty| random_floats(count1).map(move |f2: $fty| (f1, f2)));
-                (iter, count0 * count1)
+                let iter0 = random_floats(count0);
+                let iter1 = random_floats(count1);
+                let iter = product2(iter0, iter1);
+                let count = count0.strict_mul(count1);
+                (iter, count)
             }
         }
 
@@ -73,12 +80,12 @@ macro_rules! impl_random_input {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
                 let count2 = iteration_count(ctx, 2);
-                let iter = random_floats(count0).flat_map(move |f1: $fty| {
-                    random_floats(count1).flat_map(move |f2: $fty| {
-                        random_floats(count2).map(move |f3: $fty| (f1, f2, f3))
-                    })
-                });
-                (iter, count0 * count1 * count2)
+                let iter0 = random_floats(count0);
+                let iter1 = random_floats(count1);
+                let iter2 = random_floats(count2);
+                let iter = product3(iter0, iter1, iter2);
+                let count = count0.strict_mul(count1).strict_mul(count2);
+                (iter, count)
             }
         }
 
@@ -86,10 +93,12 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
-                let range0 = int_range(ctx, 0);
-                let iter = random_ints(count0, range0)
-                    .flat_map(move |f1: i32| random_floats(count1).map(move |f2: $fty| (f1, f2)));
-                (iter, count0 * count1)
+                let range0 = int_range::<i32>(ctx, 0).unwrap_or(full_range());
+                let iter0 = random_ints(count0, range0);
+                let iter1 = random_floats(count1);
+                let iter = product2(iter0, iter1);
+                let count = count0.strict_mul(count1);
+                (iter, count)
             }
         }
 
@@ -97,11 +106,12 @@ macro_rules! impl_random_input {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let count0 = iteration_count(ctx, 0);
                 let count1 = iteration_count(ctx, 1);
-                let range1 = int_range(ctx, 1);
-                let iter = random_floats(count0).flat_map(move |f1: $fty| {
-                    random_ints(count1, range1.clone()).map(move |f2: i32| (f1, f2))
-                });
-                (iter, count0 * count1)
+                let range1 = int_range::<i32>(ctx, 1).unwrap_or(full_range());
+                let iter0 = random_floats(count0);
+                let iter1 = random_ints(count1, range1.clone());
+                let iter = product2(iter0, iter1);
+                let count = count0.strict_mul(count1);
+                (iter, count)
             }
         }
     };
@@ -114,6 +124,55 @@ impl_random_input!(f64);
 #[cfg(f128_enabled)]
 impl_random_input!(f128);
 
+macro_rules! impl_random_input_int {
+    (@skip_u32 $ity:ty) => {
+        impl RandomInput for ($ity,) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count = iteration_count(ctx, 0);
+                let range = int_range::<$ity>(ctx, 0).unwrap_or(full_range());
+                let iter = random_ints(count, range).map(|f: $ity| (f,));
+                (iter, count)
+            }
+        }
+
+        impl RandomInput for ($ity, $ity) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let range0 = int_range::<$ity>(ctx, 0).unwrap_or(full_range());
+                let range1 = int_range::<$ity>(ctx, 1).unwrap_or(full_range());
+                let iter0 = random_ints(count0, range0);
+                let iter1 = random_ints(count1, range1.clone());
+                let iter = product2(iter0, iter1);
+                (iter, count0 * count1)
+            }
+        }
+    };
+    ($ity:ty) => {
+        impl_random_input_int!(@skip_u32 $ity);
+
+        impl RandomInput for ($ity, u32) {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let count0 = iteration_count(ctx, 0);
+                let count1 = iteration_count(ctx, 1);
+                let range0 = int_range::<$ity>(ctx, 0).unwrap_or(full_range());
+                let range1 = int_range::<u32>(ctx, 1).unwrap_or(full_range());
+                let iter0 = random_ints(count0, range0);
+                let iter1 = random_ints(count1, range1.clone());
+                let iter = product2(iter0, iter1);
+                (iter, count0 * count1)
+            }
+        }
+    };
+}
+
+impl_random_input_int!(i32);
+impl_random_input_int!(i64);
+impl_random_input_int!(i128);
+impl_random_input_int!(@skip_u32 u32);
+impl_random_input_int!(u64);
+impl_random_input_int!(u128);
+
 /// Create a test case iterator.
 pub fn get_test_cases<RustArgs: RandomInput>(
     ctx: &CheckCtx,
diff --git a/library/compiler-builtins/libm-test/src/generate/spaced.rs b/library/compiler-builtins/libm-test/src/generate/spaced.rs
index 8e6b376ebd1e9..192106cc61827 100644
--- a/library/compiler-builtins/libm-test/src/generate/spaced.rs
+++ b/library/compiler-builtins/libm-test/src/generate/spaced.rs
@@ -1,12 +1,13 @@
 use std::fmt;
 use std::ops::RangeInclusive;
 
-use libm::support::{Float, MinInt};
+use libm::support::{Float, Int, MinInt};
 
 use crate::domain::get_domain;
-use crate::op::OpITy;
+use crate::generate::{product2, product3};
+use crate::num::full_range;
 use crate::run_cfg::{int_range, iteration_count};
-use crate::{CheckCtx, MathOp, linear_ints, logspace};
+use crate::{Arg0, Arg1, Arg2, CheckCtx, MathOp, linear_ints, logspace};
 
 /// Generate a sequence of inputs that eiher cover the domain in completeness (for smaller float
 /// types and single argument functions) or provide evenly spaced inputs across the domain with
@@ -17,23 +18,23 @@ pub trait SpacedInput<Op> {
 
 /// Construct an iterator from `logspace` and also calculate the total number of steps expected
 /// for that iterator.
-fn logspace_steps<Op>(
+fn logspace_steps<F>(
     ctx: &CheckCtx,
     argnum: usize,
     max_steps: u64,
-) -> (impl Iterator<Item = Op::FTy> + Clone, u64)
+) -> (impl Iterator<Item = F> + Clone, u64)
 where
-    Op: MathOp,
-    OpITy<Op>: TryFrom<u64, Error: fmt::Debug>,
-    u64: TryFrom<OpITy<Op>, Error: fmt::Debug>,
-    RangeInclusive<OpITy<Op>>: Iterator,
+    F: Float,
+    F::Int: TryFrom<u64, Error: fmt::Debug>,
+    u64: TryFrom<F::Int, Error: fmt::Debug>,
+    RangeInclusive<F::Int>: Iterator,
 {
     // i8 is a dummy type here, it can be any integer.
-    let domain = get_domain::<Op::FTy, i8>(ctx.fn_ident, argnum).unwrap_float();
+    let domain = get_domain::<F, i8>(ctx.fn_ident, argnum).unwrap_float();
     let start = domain.range_start();
     let end = domain.range_end();
 
-    let max_steps = OpITy::<Op>::try_from(max_steps).unwrap_or(OpITy::<Op>::MAX);
+    let max_steps = F::Int::try_from(max_steps).unwrap_or(F::Int::MAX);
     let (iter, steps) = logspace(start, end, max_steps);
 
     // `steps` will be <= the original `max_steps`, which is a `u64`.
@@ -66,174 +67,194 @@ impl<T, A: Iterator<Item = T>, B: Iterator<Item = T>> Iterator for EitherIter<A,
 
 /// Gets the total number of possible values, returning `None` if that number doesn't fit in a
 /// `u64`.
-fn value_count<F: Float>() -> Option<u64>
+fn total_value_count<F: Float>() -> Option<u64>
 where
     u64: TryFrom<F::Int>,
 {
-    u64::try_from(F::Int::MAX)
+    total_value_count_int::<F::Int>()
+}
+
+fn total_value_count_int<I: Int>() -> Option<u64>
+where
+    u64: TryFrom<I::Unsigned>,
+{
+    u64::try_from(I::MAX.abs_diff(I::MIN))
         .ok()
         .and_then(|max| max.checked_add(1))
 }
 
 /// Returns an iterator of every possible value of type `F`.
-fn all_values<F: Float>() -> impl Iterator<Item = F>
+fn exhaustive_float<F: Float>() -> (impl Iterator<Item = F> + Clone, u64)
 where
+    u64: TryFrom<F::Int>,
     RangeInclusive<F::Int>: Iterator<Item = F::Int>,
 {
-    (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits))
+    let count = total_value_count::<F>().expect("tried exhaustive with > u64::MAX items");
+    let iter = (F::Int::MIN..=F::Int::MAX).map(|bits| F::from_bits(bits));
+    (iter, count)
 }
 
 macro_rules! impl_spaced_input {
     ($fty:ty) => {
         impl<Op> SpacedInput<Op> for ($fty,)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let max_steps0 = iteration_count(ctx, 0);
-                // `f16` and `f32` can have exhaustive tests.
-                match value_count::<Op::FTy>() {
-                    Some(steps0) if steps0 <= max_steps0 => {
-                        let iter0 = all_values();
-                        let iter0 = iter0.map(|v| (v,));
-                        (EitherIter::A(iter0), steps0)
-                    }
-                    _ => {
-                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
-                        let iter0 = iter0.map(|v| (v,));
-                        (EitherIter::B(iter0), steps0)
-                    }
+
+                // Unary tests: `f16` and `f32` may be exhaustive.
+                if let Some(exhaustive_steps0) = total_value_count::<Arg0<Op>>()
+                    && exhaustive_steps0 <= max_steps0
+                {
+                    let (iter0, steps0) = exhaustive_float();
+                    let iter0 = iter0.map(|v| (v,));
+
+                    return (EitherIter::A(iter0), steps0);
                 }
+
+                // Non-exhaustive, sweep a subset of inputs.
+                let (iter0, steps0) = logspace_steps::<Arg0<Op>>(ctx, 0, max_steps0);
+                let iter0 = iter0.map(|v| (v,));
+                (EitherIter::B(iter0), steps0)
             }
         }
 
         impl<Op> SpacedInput<Op> for ($fty, $fty)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let max_steps0 = iteration_count(ctx, 0);
                 let max_steps1 = iteration_count(ctx, 1);
-                // `f16` can have exhaustive tests.
-                match value_count::<Op::FTy>() {
-                    Some(count) if count <= max_steps0 && count <= max_steps1 => {
-                        let iter = all_values()
-                            .flat_map(|first| all_values().map(move |second| (first, second)));
-                        (EitherIter::A(iter), count.checked_mul(count).unwrap())
-                    }
-                    _ => {
-                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
-                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
-                        let iter = iter0.flat_map(move |first| {
-                            iter1.clone().map(move |second| (first, second))
-                        });
-                        let count = steps0.checked_mul(steps1).unwrap();
-                        (EitherIter::B(iter), count)
-                    }
+
+                // Binary test: `f16` may be exhaustive.
+                if let Some(exhaustive_steps0) = total_value_count::<Arg0<Op>>()
+                    && exhaustive_steps0 <= max_steps0
+                    && let Some(exhaustive_steps1) = total_value_count::<Arg1<Op>>()
+                    && exhaustive_steps1 <= max_steps1
+                {
+                    let (iter0, steps0) = exhaustive_float();
+                    let (iter1, steps1) = exhaustive_float();
+
+                    let iter = product2(iter0, iter1);
+                    let count = steps0.strict_mul(steps1);
+
+                    return (EitherIter::A(iter), count);
                 }
+
+                // Non-exhaustive, sweep a subset of inputs.
+                let (iter0, steps0) = logspace_steps::<Arg0<Op>>(ctx, 0, max_steps0);
+                let (iter1, steps1) = logspace_steps::<Arg1<Op>>(ctx, 1, max_steps1);
+
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+
+                (EitherIter::B(iter), count)
             }
         }
 
         impl<Op> SpacedInput<Op> for ($fty, $fty, $fty)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let max_steps0 = iteration_count(ctx, 0);
                 let max_steps1 = iteration_count(ctx, 1);
                 let max_steps2 = iteration_count(ctx, 2);
-                // `f16` can be exhaustive tested if `LIBM_EXTENSIVE_TESTS` is incresed.
-                match value_count::<Op::FTy>() {
-                    Some(count)
-                        if count <= max_steps0 && count <= max_steps1 && count <= max_steps2 =>
-                    {
-                        let iter = all_values().flat_map(|first| {
-                            all_values().flat_map(move |second| {
-                                all_values().map(move |third| (first, second, third))
-                            })
-                        });
-                        (EitherIter::A(iter), count.checked_pow(3).unwrap())
-                    }
-                    _ => {
-                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
-                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
-                        let (iter2, steps2) = logspace_steps::<Op>(ctx, 2, max_steps2);
-
-                        let iter = iter0
-                            .flat_map(move |first| iter1.clone().map(move |second| (first, second)))
-                            .flat_map(move |(first, second)| {
-                                iter2.clone().map(move |third| (first, second, third))
-                            });
-                        let count = steps0
-                            .checked_mul(steps1)
-                            .unwrap()
-                            .checked_mul(steps2)
-                            .unwrap();
-
-                        (EitherIter::B(iter), count)
-                    }
+
+                // Ternary test: `f16` may be exhaustive tested if `LIBM_EXTENSIVE_TESTS`
+                // is incresed.
+                if let Some(exhaustive_steps0) = total_value_count::<Arg0<Op>>()
+                    && exhaustive_steps0 <= max_steps0
+                    && let Some(exhaustive_steps1) = total_value_count::<Arg1<Op>>()
+                    && exhaustive_steps1 <= max_steps1
+                    && let Some(exhaustive_steps2) = total_value_count::<Arg2<Op>>()
+                    && exhaustive_steps2 <= max_steps2
+                {
+                    let (iter0, steps0) = exhaustive_float();
+                    let (iter1, steps1) = exhaustive_float();
+                    let (iter2, steps2) = exhaustive_float();
+
+                    let iter = product3(iter0, iter1, iter2);
+                    let count = steps0.strict_mul(steps1).strict_mul(steps2);
+
+                    return (EitherIter::A(iter), count);
                 }
+
+                // Non-exhaustive, sweep a subset of inputs.
+                let (iter0, steps0) = logspace_steps::<Arg0<Op>>(ctx, 0, max_steps0);
+                let (iter1, steps1) = logspace_steps::<Arg1<Op>>(ctx, 1, max_steps1);
+                let (iter2, steps2) = logspace_steps::<Arg2<Op>>(ctx, 2, max_steps2);
+
+                let iter = product3(iter0, iter1, iter2);
+                let count = steps0.strict_mul(steps1).strict_mul(steps2);
+
+                (EitherIter::B(iter), count)
             }
         }
 
         impl<Op> SpacedInput<Op> for (i32, $fty)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
-                let range0 = int_range(ctx, 0);
+                let range0 = int_range(ctx, 0).unwrap_or(full_range());
                 let max_steps0 = iteration_count(ctx, 0);
                 let max_steps1 = iteration_count(ctx, 1);
-                match value_count::<Op::FTy>() {
-                    Some(count1) if count1 <= max_steps1 => {
-                        let (iter0, steps0) = linear_ints(range0, max_steps0);
-                        let iter = iter0
-                            .flat_map(move |first| all_values().map(move |second| (first, second)));
-                        (EitherIter::A(iter), steps0.checked_mul(count1).unwrap())
-                    }
-                    _ => {
-                        let (iter0, steps0) = linear_ints(range0, max_steps0);
-                        let (iter1, steps1) = logspace_steps::<Op>(ctx, 1, max_steps1);
-
-                        let iter = iter0.flat_map(move |first| {
-                            iter1.clone().map(move |second| (first, second))
-                        });
-                        let count = steps0.checked_mul(steps1).unwrap();
-
-                        (EitherIter::B(iter), count)
-                    }
+
+                if let Some(exhaustive_steps0) = total_value_count_int::<Arg0<Op>>()
+                    && exhaustive_steps0 <= max_steps0
+                    && let Some(exhaustive_steps1) = total_value_count::<Arg1<Op>>()
+                    && exhaustive_steps1 <= max_steps1
+                {
+                    let (iter0, steps0) = linear_ints(range0, max_steps0);
+                    let (iter1, steps1) = exhaustive_float();
+
+                    let iter = product2(iter0, iter1);
+                    let count = steps0.strict_mul(steps1);
+
+                    return (EitherIter::A(iter), count);
                 }
+
+                let (iter0, steps0) = linear_ints(range0, max_steps0);
+                let (iter1, steps1) = logspace_steps::<Arg1<Op>>(ctx, 1, max_steps1);
+
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+
+                (EitherIter::B(iter), count)
             }
         }
 
         impl<Op> SpacedInput<Op> for ($fty, i32)
         where
-            Op: MathOp<RustArgs = Self, FTy = $fty>,
+            Op: MathOp<RustArgs = Self>,
         {
             fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
                 let max_steps0 = iteration_count(ctx, 0);
-                let range1 = int_range(ctx, 1);
+                let range1 = int_range(ctx, 1).unwrap_or(full_range());
                 let max_steps1 = iteration_count(ctx, 1);
-                match value_count::<Op::FTy>() {
-                    Some(count0) if count0 <= max_steps0 => {
-                        let (iter1, steps1) = linear_ints(range1, max_steps1);
-                        let iter = all_values().flat_map(move |first| {
-                            iter1.clone().map(move |second| (first, second))
-                        });
-                        (EitherIter::A(iter), count0.checked_mul(steps1).unwrap())
-                    }
-                    _ => {
-                        let (iter0, steps0) = logspace_steps::<Op>(ctx, 0, max_steps0);
-                        let (iter1, steps1) = linear_ints(range1, max_steps1);
-
-                        let iter = iter0.flat_map(move |first| {
-                            iter1.clone().map(move |second| (first, second))
-                        });
-                        let count = steps0.checked_mul(steps1).unwrap();
-
-                        (EitherIter::B(iter), count)
-                    }
+
+                if let Some(exhaustive_steps0) = total_value_count::<Arg0<Op>>()
+                    && exhaustive_steps0 <= max_steps0
+                {
+                    let (iter0, steps0) = exhaustive_float();
+                    let (iter1, steps1) = linear_ints(range1, max_steps1);
+
+                    let iter = product2(iter0, iter1);
+                    let count = steps0.strict_mul(steps1);
+
+                    return (EitherIter::A(iter), count);
                 }
+
+                let (iter0, steps0) = logspace_steps::<Arg0<Op>>(ctx, 0, max_steps0);
+                let (iter1, steps1) = linear_ints(range1, max_steps1);
+
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+
+                (EitherIter::B(iter), count)
             }
         }
     };
@@ -246,6 +267,102 @@ impl_spaced_input!(f64);
 #[cfg(f128_enabled)]
 impl_spaced_input!(f128);
 
+macro_rules! impl_spaced_input_int {
+    (@skip_u32 $ity:ty) => {
+        impl<Op> SpacedInput<Op> for ($ity,)
+        where
+            Op: MathOp<RustArgs = Self>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let range = int_range(ctx, 0).unwrap_or(full_range());
+                let max_steps0 = iteration_count(ctx, 0);
+
+                if let Some(steps0) = total_value_count_int::<Arg0<Op>>()
+                    && steps0 <= max_steps0
+                {
+                    let iter0 = range.map(|v| (v,));
+                    return (EitherIter::A(iter0), steps0);
+                }
+
+                let (iter0, steps0) = linear_ints::<Arg0<Op>>(range, max_steps0);
+                let iter0 = iter0.map(|v| (v,));
+                (EitherIter::B(iter0), steps0)
+            }
+        }
+
+        impl<Op> SpacedInput<Op> for ($ity, $ity)
+        where
+            Op: MathOp<RustArgs = Self>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let range0 = int_range(ctx, 0).unwrap_or(full_range());
+                let range1 = int_range(ctx, 1).unwrap_or(full_range());
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 0);
+
+                if let Some(steps0) = total_value_count_int::<Arg0<Op>>()
+                    && steps0 <= max_steps0
+                    && let Some(steps1) = total_value_count_int::<Arg1<Op>>()
+                    && steps1 <= max_steps1
+                {
+                    let iter = product2(range0, range1);
+                    let count = steps0.strict_mul(steps1);
+
+                    return (EitherIter::A(iter), count);
+                }
+
+                let (iter0, steps0) = linear_ints::<Arg0<Op>>(range0, max_steps0);
+                let (iter1, steps1) = linear_ints::<Arg1<Op>>(range1, max_steps1);
+
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+
+                (EitherIter::B(iter), count)
+            }
+        }
+    };
+    ($ity:ty) => {
+        impl_spaced_input_int!(@skip_u32 $ity);
+
+        impl<Op> SpacedInput<Op> for ($ity, u32)
+        where
+            Op: MathOp<RustArgs = Self>,
+        {
+            fn get_cases(ctx: &CheckCtx) -> (impl Iterator<Item = Self>, u64) {
+                let range0 = int_range(ctx, 0).unwrap_or(full_range());
+                let range1 = int_range(ctx, 1).unwrap_or(full_range());
+                let max_steps0 = iteration_count(ctx, 0);
+                let max_steps1 = iteration_count(ctx, 0);
+
+                if let Some(steps0) = total_value_count_int::<Arg0<Op>>()
+                    && steps0 <= max_steps0
+                    && let Some(steps1) = total_value_count_int::<Arg1<Op>>()
+                    && steps1 <= max_steps1
+                {
+                    let iter = product2(range0, range1);
+                    let count = steps0.strict_mul(steps1);
+
+                    return (EitherIter::A(iter), count);
+                }
+
+                let (iter0, steps0) = linear_ints::<Arg0<Op>>(range0, max_steps0);
+                let (iter1, steps1) = linear_ints::<Arg1<Op>>(range1, max_steps1);
+
+                let iter = product2(iter0, iter1);
+                let count = steps0.strict_mul(steps1);
+                (EitherIter::B(iter), count)
+            }
+        }
+    };
+}
+
+impl_spaced_input_int!(i32);
+impl_spaced_input_int!(i64);
+impl_spaced_input_int!(i128);
+impl_spaced_input_int!(@skip_u32 u32);
+impl_spaced_input_int!(u64);
+impl_spaced_input_int!(u128);
+
 /// Create a test case iterator for extensive inputs. Also returns the total test case count.
 pub fn get_test_cases<Op>(
     ctx: &CheckCtx,
diff --git a/library/compiler-builtins/libm-test/src/lib.rs b/library/compiler-builtins/libm-test/src/lib.rs
index 60d96ae9bceee..f2ee03252139e 100644
--- a/library/compiler-builtins/libm-test/src/lib.rs
+++ b/library/compiler-builtins/libm-test/src/lib.rs
@@ -3,6 +3,7 @@
 #![allow(clippy::unusual_byte_groupings)] // sometimes we group by sign_exp_sig
 #![allow(unstable_name_collisions)] // FIXME(float_bits_const): remove when stable
 
+pub mod builtins_wrapper;
 pub mod domain;
 mod f8_impl;
 pub mod generate;
@@ -25,8 +26,8 @@ pub use f8_impl::{f8, hf8};
 pub use libm::support::{Float, Int, IntTy, MinInt};
 pub use num::{FloatExt, linear_ints, logspace};
 pub use op::{
-    BaseName, FloatTy, Identifier, MathOp, OpCFn, OpCRet, OpFTy, OpRustArgs, OpRustFn, OpRustRet,
-    Ty,
+    Arg0, Arg1, Arg2, BaseName, Group, Identifier, MathOp, OpCFn, OpCRet, OpRustArgs, OpRustFn,
+    OpRustRet, Ret0, Ret1, Ty,
 };
 pub use precision::{MaybeOverride, SpecialCase, default_ulp};
 use run_cfg::extensive_max_iterations;
@@ -34,7 +35,7 @@ pub use run_cfg::{
     CheckBasis, CheckCtx, EXTENSIVE_ENV, GeneratorKind, bigint_fuzz_iteration_count,
     skip_extensive_test,
 };
-pub use test_traits::{CheckOutput, Hex, TupleCall};
+pub use test_traits::{CheckOutput, Tuple, TupleCall};
 
 /// Result type for tests is usually from `anyhow`. Most times there is no success value to
 /// propagate.
diff --git a/library/compiler-builtins/libm-test/src/mpfloat.rs b/library/compiler-builtins/libm-test/src/mpfloat.rs
index 91130f892b8ab..c4f1ca193e589 100644
--- a/library/compiler-builtins/libm-test/src/mpfloat.rs
+++ b/library/compiler-builtins/libm-test/src/mpfloat.rs
@@ -6,12 +6,14 @@
 use std::cmp::Ordering;
 
 use rug::Assign;
-pub use rug::Float as MpFloat;
-use rug::az::{self, Az};
+use rug::az::{self, Az, CheckedCast, OverflowingCast, WrappingAs};
 use rug::float::Round::Nearest;
-use rug::ops::{PowAssignRound, RemAssignRound};
+use rug::ops::{
+    AddAssignRound, DivAssignRound, MulAssignRound, PowAssignRound, RemAssignRound, SubAssignRound,
+};
+pub use rug::{Float as MpFloat, Integer as MpInt};
 
-use crate::{Float, MathOp};
+use crate::{Arg0, Arg1, Arg2, Float, MathOp, Ret0, Ret1};
 
 /// Create a multiple-precision float with the correct number of bits for a concrete float type.
 fn new_mpfloat<F: Float>() -> MpFloat {
@@ -19,7 +21,7 @@ fn new_mpfloat<F: Float>() -> MpFloat {
 }
 
 /// Set subnormal emulation and convert to a concrete float type.
-fn prep_retval<F: Float>(mp: &mut MpFloat, ord: Ordering) -> F
+fn prep_retval<F>(mp: &mut MpFloat, ord: Ordering) -> F
 where
     for<'a> &'a MpFloat: az::Cast<F>,
 {
@@ -59,7 +61,7 @@ macro_rules! impl_mp_op {
                 type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
+                    new_mpfloat::<Arg0<Self>>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -83,7 +85,7 @@ macro_rules! impl_mp_op {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg0<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -109,9 +111,9 @@ macro_rules! impl_mp_op {
 
                 fn new_mp() -> Self::MpTy {
                     (
-                        new_mpfloat::<Self::FTy>(),
-                        new_mpfloat::<Self::FTy>(),
-                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Arg0<Self>>(),
+                        new_mpfloat::<Arg1<Self>>(),
+                        new_mpfloat::<Arg2<Self>>(),
                     )
                 }
 
@@ -133,6 +135,16 @@ libm_macros::for_each_function! {
     skip: [
         // Most of these need a manual implementation
         // verify-sorted-start
+        addf128,
+        addf16,
+        addf32,
+        addf64,
+        ashl_u128,
+        ashl_u32,
+        ashl_u64,
+        ashr_i128,
+        ashr_i32,
+        ashr_i64,
         ceil,
         ceilf,
         ceilf128,
@@ -141,6 +153,19 @@ libm_macros::for_each_function! {
         copysignf,
         copysignf128,
         copysignf16,
+        divf128,
+        divf32,
+        divf64,
+        eqf128,
+        eqf16,
+        eqf32,
+        eqf64,
+        extend_f16_f128,
+        extend_f16_f32,
+        extend_f16_f64,
+        extend_f32_f128,
+        extend_f32_f64,
+        extend_f64_f128,
         fabs,
         fabsf,
         fabsf128,
@@ -164,26 +189,133 @@ libm_macros::for_each_function! {
         frexpf,
         frexpf128,
         frexpf16,
+        ftoi_f128_i128,
+        ftoi_f128_i32,
+        ftoi_f128_i64,
+        ftoi_f128_u128,
+        ftoi_f128_u32,
+        ftoi_f128_u64,
+        ftoi_f32_i128,
+        ftoi_f32_i32,
+        ftoi_f32_i64,
+        ftoi_f32_u128,
+        ftoi_f32_u32,
+        ftoi_f32_u64,
+        ftoi_f64_i128,
+        ftoi_f64_i32,
+        ftoi_f64_i64,
+        ftoi_f64_u128,
+        ftoi_f64_u32,
+        ftoi_f64_u64,
+        gef128,
+        gef16,
+        gef32,
+        gef64,
+        gtf128,
+        gtf16,
+        gtf32,
+        gtf64,
+        iadd_i128,
+        iadd_u128,
+        iaddo_i128,
+        iaddo_u128,
+        idiv_i128,
+        idiv_i32,
+        idiv_i64,
+        idiv_u128,
+        idiv_u32,
+        idiv_u64,
+        idivmod_i128,
+        idivmod_i32,
+        idivmod_i64,
+        idivmod_u128,
+        idivmod_u32,
+        idivmod_u64,
         ilogb,
         ilogbf,
         ilogbf128,
         ilogbf16,
+        imod_i128,
+        imod_i32,
+        imod_i64,
+        imod_u128,
+        imod_u32,
+        imod_u64,
+        imul_i128,
+        imul_u64,
+        imulo_i128,
+        imulo_i32,
+        imulo_i64,
+        imulo_u128,
+        isub_i128,
+        isub_u128,
+        isubo_i128,
+        isubo_u128,
+        itof_i128_f128,
+        itof_i128_f32,
+        itof_i128_f64,
+        itof_i32_f128,
+        itof_i32_f32,
+        itof_i32_f64,
+        itof_i64_f128,
+        itof_i64_f32,
+        itof_i64_f64,
+        itof_u128_f128,
+        itof_u128_f32,
+        itof_u128_f64,
+        itof_u32_f128,
+        itof_u32_f32,
+        itof_u32_f64,
+        itof_u64_f128,
+        itof_u64_f32,
+        itof_u64_f64,
         jn,
         jnf,
         ldexp,
         ldexpf,
         ldexpf128,
         ldexpf16,
+        leading_zeros_u128,
+        leading_zeros_u32,
+        leading_zeros_u64,
+        lef128,
+        lef16,
+        lef32,
+        lef64,
         lgamma,
         lgamma_r,
         lgammaf,
         lgammaf_r,
+        lshr_u128,
+        lshr_u32,
+        lshr_u64,
+        ltf128,
+        ltf16,
+        ltf32,
+        ltf64,
         modf,
         modff,
+        mulf128,
+        mulf16,
+        mulf32,
+        mulf64,
+        narrow_f128_f16,
+        narrow_f128_f32,
+        narrow_f128_f64,
+        narrow_f32_f16,
+        narrow_f64_f16,
+        narrow_f64_f32,
+        nef128,
+        nef16,
+        nef32,
+        nef64,
         nextafter,
         nextafterf,
         pow,
         powf,remquo,
+        powif128,
+        powif32,
+        powif64,
         remquof,
         rint,
         rintf,
@@ -202,10 +334,21 @@ libm_macros::for_each_function! {
         scalbnf128,
         scalbnf16,
         sincos,sincosf,
+        subf128,
+        subf16,
+        subf32,
+        subf64,
+        trailing_zeros_u128,
+        trailing_zeros_u32,
+        trailing_zeros_u64,
         trunc,
         truncf,
         truncf128,
         truncf16,yn,
+        unordf128,
+        unordf16,
+        unordf32,
+        unordf64,
         ynf,
         // verify-sorted-end
     ],
@@ -240,7 +383,7 @@ macro_rules! impl_no_round {
             type MpTy = MpFloat;
 
             fn new_mp() -> Self::MpTy {
-                new_mpfloat::<Self::FTy>()
+                new_mpfloat::<Arg0<Self>>()
             }
 
             fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -299,7 +442,7 @@ macro_rules! impl_op_for_ty {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Ret0<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -307,8 +450,8 @@ macro_rules! impl_op_for_ty {
                     this.1.assign(&this.0);
                     let (ord0, ord1) = this.0.trunc_fract_round(&mut this.1, Nearest);
                     (
-                        prep_retval::<Self::FTy>(&mut this.1, ord0),
-                        prep_retval::<Self::FTy>(&mut this.0, ord1),
+                        prep_retval::<Ret0<Self>>(&mut this.1, ord0),
+                        prep_retval::<Ret1<Self>>(&mut this.0, ord1),
                     )
                 }
             }
@@ -317,7 +460,7 @@ macro_rules! impl_op_for_ty {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -332,14 +475,14 @@ macro_rules! impl_op_for_ty {
                 type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
+                    new_mpfloat::<Arg1<Self>>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
                     let (n, x) = input;
                     this.assign(x);
                     let ord = this.jn_round(n, Nearest);
-                    prep_retval::<Self::FTy>(this, ord)
+                    prep_retval::<Self::RustRet>(this, ord)
                 }
             }
 
@@ -347,7 +490,7 @@ macro_rules! impl_op_for_ty {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Ret0<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -355,8 +498,8 @@ macro_rules! impl_op_for_ty {
                     this.1.assign(0.0);
                     let (sord, cord) = this.0.sin_cos_round(&mut this.1, Nearest);
                     (
-                        prep_retval::<Self::FTy>(&mut this.0, sord),
-                        prep_retval::<Self::FTy>(&mut this.1, cord)
+                        prep_retval::<Ret0<Self>>(&mut this.0, sord),
+                        prep_retval::<Ret1<Self>>(&mut this.1, cord)
                     )
                 }
             }
@@ -366,8 +509,8 @@ macro_rules! impl_op_for_ty {
 
                 fn new_mp() -> Self::MpTy {
                     (
-                        new_mpfloat::<Self::FTy>(),
-                        new_mpfloat::<Self::FTy>(),
+                        new_mpfloat::<Arg0<Self>>(),
+                        new_mpfloat::<Arg1<Self>>(),
                     )
                 }
 
@@ -375,7 +518,7 @@ macro_rules! impl_op_for_ty {
                     this.0.assign(input.0);
                     this.1.assign(input.1);
                     let (ord, q) = this.0.remainder_quo31_round(&this.1, Nearest);
-                    (prep_retval::<Self::FTy>(&mut this.0, ord), q)
+                    (prep_retval::<Ret0<Self>>(&mut this.0, ord), q)
                 }
             }
 
@@ -383,14 +526,14 @@ macro_rules! impl_op_for_ty {
                 type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
+                    new_mpfloat::<Arg1<Self>>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
                     let (n, x) = input;
                     this.assign(x);
                     let ord = this.yn_round(n, Nearest);
-                    prep_retval::<Self::FTy>(this, ord)
+                    prep_retval::<Self::RustRet>(this, ord)
                 }
             }
         }
@@ -401,11 +544,154 @@ macro_rules! impl_op_for_ty {
 macro_rules! impl_op_for_ty_all {
     ($fty:ty, $suffix:literal) => {
         paste::paste! {
+            impl MpOp for crate::op::[<add $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.add_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[<sub $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.sub_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[<mul $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.mul_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[<eq $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0 == this.1
+                }
+            }
+
+            impl MpOp for crate::op::[<gt $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0 > this.1
+                }
+            }
+
+            impl MpOp for crate::op::[<ge $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0 >= this.1
+                }
+            }
+
+            impl MpOp for crate::op::[<lt $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0 < this.1
+                }
+            }
+
+            impl MpOp for crate::op::[<le $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0 <= this.1
+                }
+            }
+
+            impl MpOp for crate::op::[<ne $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0 != this.1
+                }
+            }
+
+            impl MpOp for crate::op::[<unord $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0.is_nan() || this.1.is_nan()
+                }
+            }
+
             impl MpOp for crate::op::[<copysign $suffix>]::Routine {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -420,7 +706,7 @@ macro_rules! impl_op_for_ty_all {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -436,7 +722,7 @@ macro_rules! impl_op_for_ty_all {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -456,7 +742,7 @@ macro_rules! impl_op_for_ty_all {
                 type MpTy = (MpFloat, MpFloat);
 
                 fn new_mp() -> Self::MpTy {
-                    (new_mpfloat::<Self::FTy>(), new_mpfloat::<Self::FTy>())
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -476,13 +762,13 @@ macro_rules! impl_op_for_ty_all {
                 type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
+                    new_mpfloat::<Arg0<Self>>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
                     this.assign(input.0);
                     let exp = this.frexp_mut();
-                    (prep_retval::<Self::FTy>(this, Ordering::Equal), exp)
+                    (prep_retval::<Ret0<Self>>(this, Ordering::Equal), exp)
                 }
             }
 
@@ -490,7 +776,7 @@ macro_rules! impl_op_for_ty_all {
                 type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
+                    new_mpfloat::<Arg0<Self>>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -527,13 +813,122 @@ macro_rules! impl_op_for_ty_all {
                 type MpTy = MpFloat;
 
                 fn new_mp() -> Self::MpTy {
-                    new_mpfloat::<Self::FTy>()
+                    new_mpfloat::<Arg0<Self>>()
                 }
 
                 fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
                     this.assign(input.0);
                     *this <<= input.1;
-                    prep_retval::<Self::FTy>(this, Ordering::Equal)
+                    prep_retval::<Self::RustRet>(this, Ordering::Equal)
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_op_for_ty_no_f16 {
+    ($fty:ty, $suffix:literal) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<div $fty>]::Routine {
+                type MpTy = (MpFloat, MpFloat);
+
+                fn new_mp() -> Self::MpTy {
+                    (new_mpfloat::<Arg0<Self>>(), new_mpfloat::<Arg1<Self>>())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    let ord = this.0.div_assign_round(&this.1, Nearest);
+                    prep_retval::<Self::RustRet>(&mut this.0, ord)
+                }
+            }
+
+            impl MpOp for crate::op::[<powi $fty>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Arg0<Self>>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    let ord = this.pow_assign_round(input.1, Nearest);
+                    prep_retval::<Self::RustRet>(this, ord)
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_extend_trunc {
+    ($narrow:ty, $wide:ty) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<extend_ $narrow _ $wide>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Arg0<Self>>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    prep_retval::<Self::RustRet>(this, Ordering::Equal)
+                }
+            }
+
+            impl MpOp for crate::op::[<narrow_ $wide _ $narrow>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Arg0<Self>>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    prep_retval::<Self::RustRet>(this, Ordering::Equal)
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_ftoi_itof {
+    ($fty:ty, $ity:ty) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<ftoi_ $fty _ $ity>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Arg0<Self>>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    this.trunc_mut();
+                    this.subnormalize_ieee_round(Ordering::Equal, Nearest);
+                    (&*this).checked_cast().unwrap_or_else(|| {
+                        if this.is_nan() {
+                            0
+                        } else if this.is_sign_negative() {
+                            Self::RustRet::MIN
+                        } else {
+                            Self::RustRet::MAX
+                        }
+                    })
+                }
+            }
+
+            impl MpOp for crate::op::[<itof_ $ity _ $fty>]::Routine {
+                type MpTy = MpFloat;
+
+                fn new_mp() -> Self::MpTy {
+                    new_mpfloat::<Self::RustRet>()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    prep_retval::<Self::RustRet>(this, Ordering::Equal)
                 }
             }
         }
@@ -543,6 +938,11 @@ macro_rules! impl_op_for_ty_all {
 impl_op_for_ty!(f32, "f");
 impl_op_for_ty!(f64, "");
 
+impl_op_for_ty_no_f16!(f32, "f");
+impl_op_for_ty_no_f16!(f64, "");
+#[cfg(f128_enabled)]
+impl_op_for_ty_no_f16!(f128, "f128");
+
 #[cfg(f16_enabled)]
 impl_op_for_ty_all!(f16, "f16");
 impl_op_for_ty_all!(f32, "f");
@@ -550,18 +950,56 @@ impl_op_for_ty_all!(f64, "");
 #[cfg(f128_enabled)]
 impl_op_for_ty_all!(f128, "f128");
 
+#[cfg(f16_enabled)]
+impl_extend_trunc!(f16, f32);
+#[cfg(f16_enabled)]
+impl_extend_trunc!(f16, f64);
+#[cfg(f16_enabled)]
+#[cfg(f128_enabled)]
+impl_extend_trunc!(f16, f128);
+impl_extend_trunc!(f32, f64);
+#[cfg(f128_enabled)]
+impl_extend_trunc!(f32, f128);
+#[cfg(f128_enabled)]
+impl_extend_trunc!(f64, f128);
+
+impl_ftoi_itof!(f32, i32);
+impl_ftoi_itof!(f32, i64);
+impl_ftoi_itof!(f32, i128);
+impl_ftoi_itof!(f32, u32);
+impl_ftoi_itof!(f32, u64);
+impl_ftoi_itof!(f32, u128);
+impl_ftoi_itof!(f64, i32);
+impl_ftoi_itof!(f64, i64);
+impl_ftoi_itof!(f64, i128);
+impl_ftoi_itof!(f64, u32);
+impl_ftoi_itof!(f64, u64);
+impl_ftoi_itof!(f64, u128);
+#[cfg(f128_enabled)]
+impl_ftoi_itof!(f128, i32);
+#[cfg(f128_enabled)]
+impl_ftoi_itof!(f128, i64);
+#[cfg(f128_enabled)]
+impl_ftoi_itof!(f128, i128);
+#[cfg(f128_enabled)]
+impl_ftoi_itof!(f128, u32);
+#[cfg(f128_enabled)]
+impl_ftoi_itof!(f128, u64);
+#[cfg(f128_enabled)]
+impl_ftoi_itof!(f128, u128);
+
 // `lgamma_r` is not a simple suffix so we can't use the above macro.
 impl MpOp for crate::op::lgamma_r::Routine {
     type MpTy = MpFloat;
 
     fn new_mp() -> Self::MpTy {
-        new_mpfloat::<Self::FTy>()
+        new_mpfloat::<Arg0<Self>>()
     }
 
     fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
         this.assign(input.0);
         let (sign, ord) = this.ln_abs_gamma_round(Nearest);
-        let ret = prep_retval::<Self::FTy>(this, ord);
+        let ret = prep_retval::<Arg0<Self>>(this, ord);
         (ret, sign as i32)
     }
 }
@@ -570,13 +1008,13 @@ impl MpOp for crate::op::lgammaf_r::Routine {
     type MpTy = MpFloat;
 
     fn new_mp() -> Self::MpTy {
-        new_mpfloat::<Self::FTy>()
+        new_mpfloat::<Arg0<Self>>()
     }
 
     fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
         this.assign(input.0);
         let (sign, ord) = this.ln_abs_gamma_round(Nearest);
-        let ret = prep_retval::<Self::FTy>(this, ord);
+        let ret = prep_retval::<Arg0<Self>>(this, ord);
         (ret, sign as i32)
     }
 }
@@ -585,7 +1023,7 @@ impl MpOp for crate::op::lgamma::Routine {
     type MpTy = MpFloat;
 
     fn new_mp() -> Self::MpTy {
-        new_mpfloat::<Self::FTy>()
+        new_mpfloat::<Arg0<Self>>()
     }
 
     fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -597,7 +1035,7 @@ impl MpOp for crate::op::lgammaf::Routine {
     type MpTy = MpFloat;
 
     fn new_mp() -> Self::MpTy {
-        new_mpfloat::<Self::FTy>()
+        new_mpfloat::<Arg0<Self>>()
     }
 
     fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
@@ -630,3 +1068,286 @@ impl MpOp for crate::op::nextafterf::Routine {
         unimplemented!("nextafter does not yet have a MPFR operation");
     }
 }
+
+macro_rules! impl_int_ops {
+    ($ity:ty) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<idiv_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    if input.1 == 0 {
+                        // Make divide by 0 well-defined to match our wrappers.
+                        return <$ity>::MIN;
+                    }
+                    this.assign(input.0);
+                    *this /= input.1;
+                    (&*this).wrapping_as::<Self::RustRet>()
+                }
+            }
+
+            impl MpOp for crate::op::[<imod_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    if input.1 == 0 {
+                        // Make divide by 0 well-defined to match our wrappers.
+                        return <$ity>::MIN;
+                    }
+                    this.assign(input.0);
+                    *this %= input.1;
+                    (&*this).wrapping_as::<Self::RustRet>()
+                }
+            }
+
+            impl MpOp for crate::op::[<idivmod_ $ity>]::Routine {
+                type MpTy = (MpInt, MpInt);
+
+                fn new_mp() -> Self::MpTy {
+                    (MpInt::new(), MpInt::new())
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    if input.1 == 0 {
+                        // Make divide by 0 well-defined to match our wrappers.
+                        return (<$ity>::MIN, <$ity>::MIN);
+                    }
+                    this.0.assign(input.0);
+                    this.1.assign(input.1);
+                    this.0.div_rem_mut(&mut this.1);
+                    (
+                        (&this.0).wrapping_as::<Ret0<Self>>(),
+                        (&this.1).wrapping_as::<Ret1<Self>>(),
+                    )
+                }
+            }
+        }
+    };
+}
+
+impl_int_ops!(i32);
+impl_int_ops!(i64);
+impl_int_ops!(i128);
+impl_int_ops!(u32);
+impl_int_ops!(u64);
+impl_int_ops!(u128);
+
+macro_rules! impl_unsigned_int_ops {
+    ($ity:ty) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<ashl_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    assert!(input.1 < Arg0::<Self>::BITS, "got UB shift {}", input.1);
+                    this.assign(input.0);
+                    *this <<= input.1;
+                    (&*this).wrapping_as::<Arg0<Self>>()
+                }
+            }
+
+            impl MpOp for crate::op::[<lshr_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    assert!(input.1 < Arg0::<Self>::BITS, "got UB shift {}", input.1);
+                    this.assign(input.0);
+                    *this >>= input.1;
+                    (&*this).wrapping_as::<Arg0<Self>>()
+                }
+            }
+
+            impl MpOp for crate::op::[<leading_zeros_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    (Arg0::<Self>::BITS - this.significant_bits()).try_into().unwrap()
+                }
+            }
+
+            impl MpOp for crate::op::[<trailing_zeros_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    this.find_one(0).unwrap_or(Arg0::<Self>::BITS).try_into().unwrap()
+                }
+            }
+        }
+    };
+}
+
+impl_unsigned_int_ops!(u32);
+impl_unsigned_int_ops!(u64);
+impl_unsigned_int_ops!(u128);
+
+macro_rules! impl_signed_int_ops {
+    ($ity:ty) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<ashr_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    assert!(input.1 < Arg0::<Self>::BITS, "got UB shift {}", input.1);
+                    this.assign(input.0);
+                    *this >>= input.1;
+                    (&*this).wrapping_as::<Self::RustRet>()
+                }
+            }
+
+            impl MpOp for crate::op::[<imulo_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this *= input.1;
+                    (&*this).overflowing_cast()
+                }
+            }
+        }
+    };
+}
+
+impl_signed_int_ops!(i32);
+impl_signed_int_ops!(i64);
+impl_signed_int_ops!(i128);
+
+macro_rules! impl_u128_i128_int_ops {
+    ($ity:ty) => {
+        paste::paste! {
+            impl MpOp for crate::op::[<iadd_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this += input.1;
+                    (&*this).wrapping_as::<Self::RustRet>()
+                }
+            }
+
+            impl MpOp for crate::op::[<isub_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this -= input.1;
+                    (&*this).wrapping_as::<Self::RustRet>()
+                }
+            }
+
+            impl MpOp for crate::op::[<iaddo_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this += input.1;
+                    (&*this).overflowing_cast()
+                }
+            }
+
+            impl MpOp for crate::op::[<isubo_ $ity>]::Routine {
+                type MpTy = MpInt;
+
+                fn new_mp() -> Self::MpTy {
+                    MpInt::new()
+                }
+
+                fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+                    this.assign(input.0);
+                    *this -= input.1;
+                    (&*this).overflowing_cast()
+                }
+            }
+        }
+    };
+}
+
+impl_u128_i128_int_ops!(i128);
+impl_u128_i128_int_ops!(u128);
+
+impl MpOp for crate::op::imul_u64::Routine {
+    type MpTy = MpInt;
+
+    fn new_mp() -> Self::MpTy {
+        MpInt::new()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        *this *= input.1;
+        (&*this).wrapping_as()
+    }
+}
+
+impl MpOp for crate::op::imul_i128::Routine {
+    type MpTy = MpInt;
+
+    fn new_mp() -> Self::MpTy {
+        MpInt::new()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        *this *= input.1;
+        (&*this).wrapping_as()
+    }
+}
+
+impl MpOp for crate::op::imulo_u128::Routine {
+    type MpTy = MpInt;
+
+    fn new_mp() -> Self::MpTy {
+        MpInt::new()
+    }
+
+    fn run(this: &mut Self::MpTy, input: Self::RustArgs) -> Self::RustRet {
+        this.assign(input.0);
+        *this *= input.1;
+        (&*this).overflowing_cast()
+    }
+}
diff --git a/library/compiler-builtins/libm-test/src/num.rs b/library/compiler-builtins/libm-test/src/num.rs
index 3237c85039d57..efa87d6351597 100644
--- a/library/compiler-builtins/libm-test/src/num.rs
+++ b/library/compiler-builtins/libm-test/src/num.rs
@@ -1,6 +1,7 @@
 //! Helpful numeric operations.
 
 use std::cmp::min;
+use std::fmt;
 use std::ops::RangeInclusive;
 
 use libm::support::Float;
@@ -257,15 +258,20 @@ where
 }
 
 /// Returns an iterator of up to `steps` integers evenly distributed.
-pub fn linear_ints(
-    range: RangeInclusive<i32>,
+pub fn linear_ints<I>(
+    range: RangeInclusive<I>,
     steps: u64,
-) -> (impl Iterator<Item = i32> + Clone, u64) {
-    let steps = steps.checked_sub(1).unwrap();
-    let between = u64::from(range.start().abs_diff(*range.end()));
-    let spacing = i32::try_from((between / steps).max(1)).unwrap();
-    let steps = steps.min(between);
-    let mut x: i32 = *range.start();
+) -> (impl Iterator<Item = I> + Clone, u64)
+where
+    I: Int + TryFrom<u128, Error: fmt::Debug>,
+    u128: TryFrom<I::Unsigned, Error: fmt::Debug>,
+{
+    let spaces: u128 = steps.checked_sub(1).unwrap().into();
+    let between = u128::try_from(range.start().abs_diff(*range.end())).expect("out of u128 range");
+    let spacing = I::try_from((between / spaces).max(1)).unwrap();
+    let steps = spaces.min(between);
+    let steps = u64::try_from(steps).expect("> u64::MAX steps");
+    let mut x: I = *range.start();
     (
         (0..=steps).map(move |_| {
             let res = x;
@@ -278,6 +284,11 @@ pub fn linear_ints(
     )
 }
 
+/// `..` as a `RangeInclusive`.
+pub fn full_range<I: MinInt>() -> RangeInclusive<I> {
+    I::MIN..=I::MAX
+}
+
 #[cfg(test)]
 mod tests {
     use std::cmp::max;
diff --git a/library/compiler-builtins/libm-test/src/op.rs b/library/compiler-builtins/libm-test/src/op.rs
index afd445ff9c5ae..809d518755262 100644
--- a/library/compiler-builtins/libm-test/src/op.rs
+++ b/library/compiler-builtins/libm-test/src/op.rs
@@ -16,19 +16,25 @@
 use std::fmt;
 use std::panic::{RefUnwindSafe, UnwindSafe};
 
-pub use shared::{ALL_OPERATIONS, FloatTy, MathOpInfo, Ty};
+pub use api_list_common::{ALL_OPERATIONS, Group, MathOpInfo, Ty};
 
-use crate::{CheckOutput, Float, TupleCall};
-
-mod shared {
-    include!("../../crates/libm-macros/src/shared.rs");
-}
+use crate::{CheckOutput, Tuple, TupleCall};
 
 /// An enum representing each possible symbol name (`sin`, `sinf`, `sinl`, etc).
 #[libm_macros::function_enum(BaseName)]
 #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Identifier {}
 
+impl Identifier {
+    /// Return information about this operation.
+    pub fn math_op(self) -> &'static MathOpInfo {
+        ALL_OPERATIONS
+            .iter()
+            .find(|op| op.name == self.as_str())
+            .unwrap()
+    }
+}
+
 impl fmt::Display for Identifier {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         f.write_str(self.as_str())
@@ -49,9 +55,6 @@ impl fmt::Display for BaseName {
 /// Attributes ascribed to a `libm` routine including signature, type information,
 /// and naming.
 pub trait MathOp {
-    /// The float type used for this operation.
-    type FTy: Float;
-
     /// The function type representing the signature in a C library.
     type CFn: Copy;
 
@@ -72,6 +75,7 @@ pub trait MathOp {
     /// The required `TupleCall` bounds ensure this type can be passed either to the C function or
     /// to the Rust function.
     type RustArgs: Copy
+        + Tuple
         + TupleCall<Self::RustFn, Output = Self::RustRet>
         + TupleCall<Self::CFn, Output = Self::RustRet>
         + RefUnwindSafe;
@@ -90,38 +94,44 @@ pub trait MathOp {
 
     /// The function in `libm` which can be called.
     const ROUTINE: Self::RustFn;
-
-    /// Whether or not the function is part of libm public API.
-    const PUBLIC: bool;
 }
 
-/// Access the associated `FTy` type from an op (helper to avoid ambiguous associated types).
-pub type OpFTy<Op> = <Op as MathOp>::FTy;
-/// Access the associated `FTy::Int` type from an op (helper to avoid ambiguous associated types).
-pub type OpITy<Op> = <<Op as MathOp>::FTy as Float>::Int;
-/// Access the associated `CFn` type from an op (helper to avoid ambiguous associated types).
+/* Most of these are workarounds for <https://github.com/rust-lang/rust/issues/38078> */
+
+/// Access the associated `CFn` type from an op.
 pub type OpCFn<Op> = <Op as MathOp>::CFn;
-/// Access the associated `CRet` type from an op (helper to avoid ambiguous associated types).
+/// Access the associated `CRet` type from an op.
 pub type OpCRet<Op> = <Op as MathOp>::CRet;
-/// Access the associated `RustFn` type from an op (helper to avoid ambiguous associated types).
+/// Access the associated `RustFn` type from an op.
 pub type OpRustFn<Op> = <Op as MathOp>::RustFn;
-/// Access the associated `RustArgs` type from an op (helper to avoid ambiguous associated types).
+/// Access the associated `RustArgs` type from an op.
 pub type OpRustArgs<Op> = <Op as MathOp>::RustArgs;
-/// Access the associated `RustRet` type from an op (helper to avoid ambiguous associated types).
+/// Access the associated `RustRet` type from an op.
 pub type OpRustRet<Op> = <Op as MathOp>::RustRet;
 
+/// Get the type of the first Rust argument.
+pub type Arg0<Op> = <OpRustArgs<Op> as Tuple>::T0;
+/// Get the type of the second Rust argument.
+pub type Arg1<Op> = <OpRustArgs<Op> as Tuple>::T1;
+/// Get the type of the third Rust argument.
+pub type Arg2<Op> = <OpRustArgs<Op> as Tuple>::T2;
+
+/// If the Rust return type is a tuple, get the first type.
+pub type Ret0<Op> = <OpRustRet<Op> as Tuple>::T0;
+/// If the Rust return type is a tuple, get the second type.
+pub type Ret1<Op> = <OpRustRet<Op> as Tuple>::T1;
+
 macro_rules! create_op_modules {
     // Matcher for unary functions
     (
         fn_name: $fn_name:ident,
-        FTy: $FTy:ty,
         CFn: $CFn:ty,
         CArgs: $CArgs:ty,
         CRet: $CRet:ty,
         RustFn: $RustFn:ty,
         RustArgs: $RustArgs:ty,
         RustRet: $RustRet:ty,
-        public: $public:expr,
+        path: $path:path,
         attrs: [$($attr:meta),*],
     ) => {
         paste::paste! {
@@ -131,7 +141,6 @@ macro_rules! create_op_modules {
                 pub struct Routine;
 
                 impl MathOp for Routine {
-                    type FTy = $FTy;
                     type CFn = for<'a> $CFn;
                     type CArgs<'a> = $CArgs where Self: 'a;
                     type CRet = $CRet;
@@ -140,8 +149,7 @@ macro_rules! create_op_modules {
                     type RustRet = $RustRet;
 
                     const IDENTIFIER: Identifier = Identifier::[< $fn_name:camel >];
-                    const ROUTINE: Self::RustFn = libm::$fn_name;
-                    const PUBLIC: bool = $public;
+                    const ROUTINE: Self::RustFn = $path;
                 }
             }
 
diff --git a/library/compiler-builtins/libm-test/src/precision.rs b/library/compiler-builtins/libm-test/src/precision.rs
index e994244142749..2034e89c71e46 100644
--- a/library/compiler-builtins/libm-test/src/precision.rs
+++ b/library/compiler-builtins/libm-test/src/precision.rs
@@ -13,9 +13,38 @@ pub struct SpecialCase;
 
 /// ULP allowed to differ from the results returned by a test basis.
 #[allow(clippy::single_match)]
-pub fn default_ulp(ctx: &CheckCtx) -> u32 {
+pub fn default_ulp(ctx: &CheckCtx) -> Option<u32> {
     // ULP compared to the infinite (MPFR) result.
     let mut ulp = match ctx.base_name {
+        // Basic arithmetic needs to always be precise.
+        Bn::Add | Bn::Sub | Bn::Mul | Bn::Div => 0,
+        // FIXME(correctness): we need a better powi implementation (though this is no worse
+        // than C).
+        Bn::Powi if ctx.fn_ident == Id::Powif64 => 500_000,
+        Bn::Powi => 1000,
+
+        // Operations that only return non-float results
+        Bn::Eq | Bn::Ne | Bn::Gt | Bn::Ge | Bn::Lt | Bn::Le | Bn::Unord | Bn::Ilogb => return None,
+
+        // Integer ops
+        Bn::Ashl
+        | Bn::Ashr
+        | Bn::Lshr
+        | Bn::LeadingZeros
+        | Bn::TrailingZeros
+        | Bn::Iadd
+        | Bn::Iaddo
+        | Bn::Isub
+        | Bn::Isubo
+        | Bn::Imul
+        | Bn::Imulo
+        | Bn::Idiv
+        | Bn::Imod
+        | Bn::Idivmod => return None,
+
+        // Convrsion operations must be precise.
+        Bn::Extend | Bn::Narrow | Bn::Ftoi | Bn::Itof => 0,
+
         // Operations that require exact results. This list should correlate with what we
         // have documented at <https://doc.rust-lang.org/std/primitive.f32.html>.
         Bn::Ceil
@@ -32,7 +61,6 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         | Bn::FminimumNum
         | Bn::Fmod
         | Bn::Frexp
-        | Bn::Ilogb
         | Bn::Ldexp
         | Bn::Modf
         | Bn::Nextafter
@@ -47,6 +75,7 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
 
         // Operations that aren't required to be exact, but our implementations are.
         Bn::Cbrt => 0,
+        Bn::Hypot if ctx.fn_ident == Id::Hypot => 0,
 
         // Bessel functions have large inaccuracies.
         Bn::J0 | Bn::J1 | Bn::Y0 | Bn::Y1 | Bn::Jn | Bn::Yn => 8_000_000,
@@ -84,8 +113,10 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
         Bn::Tgamma => 20,
     };
 
-    // These have a separate implementation on i586
-    if cfg!(x86_no_sse) {
+    let mut orig_ulp = ulp;
+
+    // These have a separate implementation on i586 which is more accurate.
+    if cfg!(x86_no_sse2) {
         match ctx.fn_ident {
             Id::Exp => ulp = 1,
             Id::Exp2 => ulp = 1,
@@ -95,43 +126,17 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
             Id::Exp10f => ulp = 0,
             _ => (),
         }
-    }
-
-    // There are some cases where musl's approximation is less accurate than ours. For these
-    // cases, increase the ULP.
-    if ctx.basis == Musl {
-        match ctx.base_name {
-            Bn::Cosh => ulp = 2,
-            Bn::Exp10 if usize::BITS < 64 => ulp = 4,
-            Bn::Tanh => ulp = 4,
-            _ => (),
-        }
 
-        match ctx.fn_ident {
-            Id::Cbrt => ulp = 2,
-            // FIXME(#401): musl has an incorrect result here.
-            Id::Fdim => ulp = 2,
-            Id::Exp2f => ulp = 1,
-            Id::Expf => ulp = 1,
-            Id::Sincosf => ulp = 500,
-            Id::Tgamma => ulp = 20,
-            _ => (),
-        }
-    }
+        assert!(ulp <= orig_ulp, "pattern can be deleted {ctx:?}");
+        orig_ulp = ulp;
 
-    if cfg!(target_arch = "x86") {
+        // Due to rust-lang/rust#114479 (unsound floating point behavior on x86 without SSE), the
+        // following operations have worse precision.
         match ctx.fn_ident {
-            // Input `fma(0.999999999999999, 1.0000000000000013, 0.0) = 1.0000000000000002` is
-            // incorrect on i586 and i686.
+            // FIXME: these need to be correctly rounded but are not, likely due to LLVM bugs
+            // around precision without SSE float ops. It may be worth looking into an assembly
+            // implementation.
             Id::Fma => ulp = 1,
-            _ => (),
-        }
-    }
-
-    // In some cases, our implementation is less accurate than musl on i586.
-    if cfg!(x86_no_sse) {
-        match ctx.fn_ident {
-            // FIXME(#401): these need to be correctly rounded but are not.
             Id::Fmaf => ulp = 1,
             Id::Fdim => ulp = 1,
             Id::Round => ulp = 1,
@@ -139,13 +144,44 @@ pub fn default_ulp(ctx: &CheckCtx) -> u32 {
             Id::Asinh => ulp = 3,
             Id::Asinhf => ulp = 3,
             Id::Cbrt => ulp = 1,
-            Id::Log1p | Id::Log1pf => ulp = 2,
+            Id::Hypot => ulp = 1,
+            Id::Log1p => ulp = 2,
+            Id::Log1pf => ulp = 2,
             Id::Tan => ulp = 2,
             _ => (),
         }
+
+        assert!(ulp >= orig_ulp, "pattern can be deleted {ctx:?}");
+        orig_ulp = ulp;
     }
 
-    ulp
+    // There are some cases where musl's approximation is less accurate than ours, either due to
+    // the implementation itself or because of x87 inaccuracy problems. For these cases, increase
+    // the allowed ULP.
+    if ctx.basis == Musl {
+        match ctx.fn_ident {
+            // Musl probably runs into issues with the x87 ABI here which we don't have.
+            Id::Fma if cfg!(target_arch = "x86") => ulp = 1,
+            Id::Fdim => ulp = 2,
+
+            Id::Asinhf => ulp = 3,
+            Id::Cbrt => ulp = 2,
+            Id::Cosh => ulp = 2,
+            Id::Coshf => ulp = 2,
+            Id::Exp10 if cfg!(x86_no_sse2) => ulp = 4,
+            Id::Exp10f if cfg!(x86_no_sse2) => ulp = 4,
+            Id::Exp2f => ulp = 1,
+            Id::Expf => ulp = 1,
+            Id::Hypot => ulp = 1,
+            Id::Tanh => ulp = 4,
+            Id::Tanhf => ulp = 4,
+            _ => (),
+        }
+
+        assert!(ulp >= orig_ulp, "pattern can be deleted {ctx:?}");
+    }
+
+    Some(ulp)
 }
 
 /// Result of checking for possible overrides.
@@ -261,15 +297,6 @@ impl MaybeOverride<(f32,)> for SpecialCase {
 
 impl MaybeOverride<(f64,)> for SpecialCase {
     fn check_float<F: Float>(input: (f64,), actual: F, expected: F, ctx: &CheckCtx) -> CheckAction {
-        if cfg!(x86_no_sse)
-            && (ctx.base_name == BaseName::Rint || ctx.base_name == BaseName::Roundeven)
-            && (expected - actual).abs() <= F::ONE
-            && (expected - actual).abs() > F::ZERO
-        {
-            // Our rounding mode is incorrect.
-            return XFAIL("i586 rint rounding mode");
-        }
-
         if ctx.base_name == BaseName::J0 && input.0 < -1e300 {
             // Errors get huge close to -inf
             return XFAIL_NOCHECK;
@@ -499,18 +526,19 @@ fn int_float_common<F1: Float, F2: Float>(
     }
 
     // Our bessel functions blow up with large N values
-    if ctx.basis == Musl && (ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn) {
-        if cfg!(x86_no_sse) {
+    if ctx.base_name == BaseName::Jn || ctx.base_name == BaseName::Yn {
+        if cfg!(x86_no_sse2) {
             // Precision is especially bad on i586, not worth checking.
             return XFAIL_NOCHECK;
         }
 
-        if input.0 > 4000 {
+        if input.0 > 140 {
             return XFAIL_NOCHECK;
-        } else if input.0 > 100 {
-            return CheckAction::AssertWithUlp(2_000_000);
+        } else if input.0 > 80 {
+            return CheckAction::AssertWithUlp(10_000_000);
         }
     }
+
     DEFAULT
 }
 
@@ -525,3 +553,23 @@ impl MaybeOverride<(f32, f32, f32)> for SpecialCase {}
 impl MaybeOverride<(f64, f64, f64)> for SpecialCase {}
 #[cfg(f128_enabled)]
 impl MaybeOverride<(f128, f128, f128)> for SpecialCase {}
+
+impl MaybeOverride<(i32,)> for SpecialCase {}
+impl MaybeOverride<(i64,)> for SpecialCase {}
+impl MaybeOverride<(i128,)> for SpecialCase {}
+impl MaybeOverride<(u32,)> for SpecialCase {}
+impl MaybeOverride<(u64,)> for SpecialCase {}
+impl MaybeOverride<(u128,)> for SpecialCase {}
+
+impl MaybeOverride<(i32, i32)> for SpecialCase {}
+impl MaybeOverride<(i64, i64)> for SpecialCase {}
+impl MaybeOverride<(i128, i128)> for SpecialCase {}
+impl MaybeOverride<(u32, u32)> for SpecialCase {}
+impl MaybeOverride<(u64, u64)> for SpecialCase {}
+impl MaybeOverride<(u128, u128)> for SpecialCase {}
+
+impl MaybeOverride<(i32, u32)> for SpecialCase {}
+impl MaybeOverride<(i64, u32)> for SpecialCase {}
+impl MaybeOverride<(i128, u32)> for SpecialCase {}
+impl MaybeOverride<(u64, u32)> for SpecialCase {}
+impl MaybeOverride<(u128, u32)> for SpecialCase {}
diff --git a/library/compiler-builtins/libm-test/src/run_cfg.rs b/library/compiler-builtins/libm-test/src/run_cfg.rs
index 90f81195c8560..2b42d581b4c9f 100644
--- a/library/compiler-builtins/libm-test/src/run_cfg.rs
+++ b/library/compiler-builtins/libm-test/src/run_cfg.rs
@@ -2,10 +2,10 @@
 
 use std::ops::RangeInclusive;
 use std::sync::LazyLock;
-use std::{env, str};
+use std::{env, fmt, str};
 
 use crate::generate::random::{SEED, SEED_ENV};
-use crate::{BaseName, FloatTy, Identifier, test_log};
+use crate::{BaseName, Group, Identifier, Ty, test_log};
 
 /// The environment variable indicating which extensive tests should be run.
 pub const EXTENSIVE_ENV: &str = "LIBM_EXTENSIVE_TESTS";
@@ -69,7 +69,7 @@ pub fn extensive_max_iterations() -> u64 {
 #[derive(Clone, Debug, PartialEq, Eq)]
 pub struct CheckCtx {
     /// Allowed ULP deviation
-    pub ulp: u32,
+    pub ulp: Option<u32>,
     pub fn_ident: Identifier,
     pub base_name: BaseName,
     /// Function name.
@@ -88,7 +88,7 @@ impl CheckCtx {
     /// Create a new check context, using the default ULP for the function.
     pub fn new(fn_ident: Identifier, basis: CheckBasis, gen_kind: GeneratorKind) -> Self {
         let mut ret = Self {
-            ulp: 0,
+            ulp: None,
             fn_ident,
             fn_name: fn_ident.as_str(),
             base_name: fn_ident.base_name(),
@@ -152,10 +152,10 @@ static EXTENSIVE: LazyLock<Vec<Identifier>> = LazyLock::new(|| {
     let list = var.split(",").filter(|s| !s.is_empty()).collect::<Vec<_>>();
     let mut ret = Vec::new();
 
-    let append_ty_ops = |ret: &mut Vec<_>, fty: FloatTy| {
+    let append_ty_ops = |ret: &mut Vec<_>, group: Group| {
         let iter = Identifier::ALL
             .iter()
-            .filter(move |id| id.math_op().float_ty == fty)
+            .filter(move |id| id.math_op().group == group)
             .copied();
         ret.extend(iter);
     };
@@ -163,10 +163,11 @@ static EXTENSIVE: LazyLock<Vec<Identifier>> = LazyLock::new(|| {
     for item in list {
         match item {
             "all" => ret = Identifier::ALL.to_owned(),
-            "all_f16" => append_ty_ops(&mut ret, FloatTy::F16),
-            "all_f32" => append_ty_ops(&mut ret, FloatTy::F32),
-            "all_f64" => append_ty_ops(&mut ret, FloatTy::F64),
-            "all_f128" => append_ty_ops(&mut ret, FloatTy::F128),
+            "all_f16" => append_ty_ops(&mut ret, Group::F16),
+            "all_f32" => append_ty_ops(&mut ret, Group::F32),
+            "all_f64" => append_ty_ops(&mut ret, Group::F64),
+            "all_f128" => append_ty_ops(&mut ret, Group::F128),
+            "all_int" => append_ty_ops(&mut ret, Group::Integer),
             s => {
                 let id = Identifier::from_str(s)
                     .unwrap_or_else(|| panic!("unrecognized test name `{s}`"));
@@ -178,13 +179,18 @@ static EXTENSIVE: LazyLock<Vec<Identifier>> = LazyLock::new(|| {
     ret
 });
 
+/// Most ops are somewhere on the order or 10^7 iterations per second when running exhaustive
+/// tests. Assuming about four hours to run, this is log2 of the max number of inputs that coul
+/// be tested.
+const MAX_REASONABLE_EXHAUSTIVE_BITS: u32 = 36;
+
 /// Information about the function to be tested.
 #[derive(Debug)]
 struct TestEnv {
     /// Tests should be reduced because the platform is slow. E.g. 32-bit or emulated.
     slow_platform: bool,
-    /// The float cannot be tested exhaustively, `f64` or `f128`.
-    large_float_ty: bool,
+    /// How many bits of input there are for this function.
+    total_input_bits: u32,
     /// Env indicates that an extensive test should be run.
     should_run_extensive: bool,
     /// Multiprecision tests will be run.
@@ -199,10 +205,11 @@ impl TestEnv {
         let op = id.math_op();
 
         let will_run_mp = cfg!(feature = "build-mpfr");
-        let large_float_ty = match op.float_ty {
-            FloatTy::F16 | FloatTy::F32 => false,
-            FloatTy::F64 | FloatTy::F128 => true,
-        };
+
+        let mut total_input_bits = 0;
+        for ty in op.rust_sig.args {
+            total_input_bits += ty.effective_bits();
+        }
 
         let will_run_extensive = EXTENSIVE.contains(&id);
 
@@ -210,7 +217,7 @@ impl TestEnv {
 
         Self {
             slow_platform: slow_platform(),
-            large_float_ty,
+            total_input_bits,
             should_run_extensive: will_run_extensive,
             mp_tests_enabled: will_run_mp,
             input_count,
@@ -260,8 +267,9 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
         }
     };
 
-    // Larger float types get more iterations.
-    if t_env.large_float_ty {
+    // This signature has too many possible inputs to test exhaustively, so increase input count
+    // on all other kinds of tests to get better coverage.
+    if t_env.total_input_bits > MAX_REASONABLE_EXHAUSTIVE_BITS {
         if ctx.extensive {
             // Extensive already has a pretty high test count.
             total_iterations *= 2;
@@ -339,12 +347,39 @@ pub fn iteration_count(ctx: &CheckCtx, argnum: usize) -> u64 {
     ntests
 }
 
-/// Some tests require that an integer be kept within reasonable limits; generate that here.
-pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
+/// Some tests require that an integer be kept within reasonable limits; if that is needed, retun
+/// a limited range.
+pub fn int_range<I>(ctx: &CheckCtx, argnum: usize) -> Option<RangeInclusive<I>>
+where
+    I: TryFrom<i32, Error: fmt::Debug>,
+{
     let t_env = TestEnv::from_env(ctx);
 
+    let argcount = ctx.fn_ident.math_op().rust_sig.args.len();
+    assert!(
+        argnum < argcount,
+        "requested argnum {argnum} of only {argcount} args"
+    );
+
+    // Shift operations can have UB if the shift value exceeds their range
+    if matches!(
+        ctx.base_name,
+        BaseName::Ashl | BaseName::Ashr | BaseName::Lshr
+    ) && argnum == 1
+    {
+        let max = match ctx.fn_ident.math_op().rust_sig.args[0] {
+            Ty::U32 | Ty::I32 => 31,
+            Ty::U64 | Ty::I64 => 63,
+            Ty::U128 | Ty::I128 => 127,
+            ty => panic!("unexpected type {ty}"),
+        };
+
+        return Some(map_range(0..=max));
+    }
+
+    // Use the whole range for most functions.
     if !matches!(ctx.base_name, BaseName::Jn | BaseName::Yn) {
-        return i32::MIN..=i32::MAX;
+        return None;
     }
 
     assert_eq!(
@@ -362,12 +397,21 @@ pub fn int_range(ctx: &CheckCtx, argnum: usize) -> RangeInclusive<i32> {
 
     let extensive_range = (-0xfff)..=0xfffff;
 
-    match ctx.gen_kind {
+    let ret = match ctx.gen_kind {
         _ if ctx.extensive => extensive_range,
         GeneratorKind::Spaced | GeneratorKind::Random => non_extensive_range,
         GeneratorKind::EdgeCases => extensive_range,
         GeneratorKind::List => unimplemented!("shoudn't need range for {:?}", ctx.gen_kind),
-    }
+    };
+
+    Some(map_range(ret))
+}
+
+fn map_range<I>(r: RangeInclusive<i32>) -> RangeInclusive<I>
+where
+    I: TryFrom<i32, Error: fmt::Debug>,
+{
+    I::try_from(*r.start()).unwrap()..=I::try_from(*r.end()).unwrap()
 }
 
 /// For domain tests, limit how many asymptotes or specified check points we test.
diff --git a/library/compiler-builtins/libm-test/src/test_traits.rs b/library/compiler-builtins/libm-test/src/test_traits.rs
index f8621a3734a4c..a1b48224bfd47 100644
--- a/library/compiler-builtins/libm-test/src/test_traits.rs
+++ b/library/compiler-builtins/libm-test/src/test_traits.rs
@@ -1,7 +1,8 @@
 //! Traits related to testing.
 //!
-//! There are two main traits in this module:
+//! There are three main traits in this module:
 //!
+//! - `Tuple`: Implemented on tuples to help extract types.
 //! - `TupleCall`: implemented on tuples to allow calling them as function arguments.
 //! - `CheckOutput`: implemented on anything that is an output type for validation against an
 //!   expected value.
@@ -10,7 +11,7 @@ use std::panic::{RefUnwindSafe, UnwindSafe};
 use std::{fmt, panic};
 
 use anyhow::{Context, anyhow, bail, ensure};
-use libm::support::Hexf;
+use libm::support::{DisplayHex, Hex};
 
 use crate::precision::CheckAction;
 use crate::{
@@ -23,6 +24,7 @@ use crate::{
 /// tuple for multiple signatures).
 pub trait TupleCall<Func>: fmt::Debug {
     type Output;
+
     fn call(self, f: Func) -> Self::Output;
 
     /// Intercept panics and print the input to stderr before continuing.
@@ -42,6 +44,16 @@ pub trait TupleCall<Func>: fmt::Debug {
     }
 }
 
+/// Helper to allow extracting types from a tuple.
+pub trait Tuple {
+    type T0;
+    type T1;
+    type T2;
+}
+
+/// If a tuple contains fewer types than provided in `Tuple`, they use this struct.
+pub enum Unused {}
+
 /// A trait to implement on any output type so we can verify it in a generic way.
 pub trait CheckOutput<Input>: Sized {
     /// Validate `self` (actual) and `expected` are the same.
@@ -50,15 +62,24 @@ pub trait CheckOutput<Input>: Sized {
     fn validate(self, expected: Self, input: Input, ctx: &CheckCtx) -> TestResult;
 }
 
-/// A helper trait to print something as hex with the correct number of nibbles, e.g. a `u32`
-/// will always print with `0x` followed by 8 digits.
-///
-/// This is only used for printing errors so allocating is okay.
-pub trait Hex: Copy {
-    /// Hex integer syntax.
-    fn hex(self) -> String;
-    /// Hex float syntax.
-    fn hexf(self) -> String;
+/* implement Tuple */
+
+impl<T0> Tuple for (T0,) {
+    type T0 = T0;
+    type T1 = Unused;
+    type T2 = Unused;
+}
+
+impl<T0, T1> Tuple for (T0, T1) {
+    type T0 = T0;
+    type T1 = T1;
+    type T2 = Unused;
+}
+
+impl<T0, T1, T2> Tuple for (T0, T1, T2) {
+    type T0 = T0;
+    type T1 = T1;
+    type T2 = T2;
 }
 
 /* implement `TupleCall` */
@@ -142,47 +163,25 @@ where
     }
 }
 
-/* implement `Hex` */
-
-impl<T1> Hex for (T1,)
-where
-    T1: Hex,
-{
-    fn hex(self) -> String {
-        format!("({},)", self.0.hex())
-    }
-
-    fn hexf(self) -> String {
-        format!("({},)", self.0.hexf())
-    }
-}
-
-impl<T1, T2> Hex for (T1, T2)
-where
-    T1: Hex,
-    T2: Hex,
-{
-    fn hex(self) -> String {
-        format!("({}, {})", self.0.hex(), self.1.hex())
-    }
-
-    fn hexf(self) -> String {
-        format!("({}, {})", self.0.hexf(), self.1.hexf())
-    }
-}
+/* trait implementations for bool */
 
-impl<T1, T2, T3> Hex for (T1, T2, T3)
+impl<Input> CheckOutput<Input> for bool
 where
-    T1: Hex,
-    T2: Hex,
-    T3: Hex,
+    Input: Copy + DisplayHex + fmt::Debug,
+    SpecialCase: MaybeOverride<Input>,
 {
-    fn hex(self) -> String {
-        format!("({}, {}, {})", self.0.hex(), self.1.hex(), self.2.hex())
-    }
+    fn validate<'a>(self, expected: Self, input: Input, _ctx: &CheckCtx) -> TestResult {
+        anyhow::ensure!(
+            self == expected,
+            "\
+            \n    input:    {input:?} {ibits}\
+            \n    expected: {expected}\
+            \n    actual:   {self}\
+            ",
+            ibits = Hex(input),
+        );
 
-    fn hexf(self) -> String {
-        format!("({}, {}, {})", self.0.hexf(), self.1.hexf(), self.2.hexf())
+        Ok(())
     }
 }
 
@@ -191,19 +190,9 @@ where
 macro_rules! impl_int {
     ($($ty:ty),*) => {
         $(
-            impl Hex for $ty {
-                fn hex(self) -> String {
-                    format!("{self:#0width$x}", width = ((Self::BITS / 4) + 2) as usize)
-                }
-
-                fn hexf(self) -> String {
-                    String::new()
-                }
-            }
-
             impl<Input> $crate::CheckOutput<Input> for $ty
             where
-                Input: Hex + fmt::Debug,
+                Input: Copy + DisplayHex + fmt::Debug,
                 SpecialCase: MaybeOverride<Input>,
             {
                 fn validate<'a>(
@@ -221,8 +210,8 @@ macro_rules! impl_int {
 
 fn validate_int<I, Input>(actual: I, expected: I, input: Input, ctx: &CheckCtx) -> TestResult
 where
-    I: Int + Hex,
-    Input: Hex + fmt::Debug,
+    I: Int,
+    Input: Copy + DisplayHex + fmt::Debug,
     SpecialCase: MaybeOverride<Input>,
 {
     let (result, xfail_msg) = match SpecialCase::check_int(input, actual, expected, ctx) {
@@ -243,47 +232,29 @@ where
         None => String::new(),
     };
 
-    anyhow::ensure!(
-        result,
-        "\
-        \n    input:    {input:?} {ibits}\
-        \n    expected: {expected:<22?} {expbits}\
-        \n    actual:   {actual:<22?} {actbits}\
-        \n    {msg}\
-        ",
-        actbits = actual.hex(),
-        expbits = expected.hex(),
-        ibits = input.hex(),
-        msg = make_xfail_msg()
-    );
+    if !result {
+        bail!(make_error_message(
+            input,
+            expected,
+            actual,
+            "",
+            &make_xfail_msg()
+        ));
+    }
 
     Ok(())
 }
 
-impl_int!(u16, i16, u32, i32, u64, i64, u128, i128);
+impl_int!(u16, i16, u32, i32, u64, i64, u128, i128, usize);
 
 /* trait implementations for floats */
 
 macro_rules! impl_float {
     ($($ty:ty),*) => {
         $(
-            impl Hex for $ty {
-                fn hex(self) -> String {
-                    format!(
-                        "{:#0width$x}",
-                        self.to_bits(),
-                        width = ((Self::BITS / 4) + 2) as usize
-                    )
-                }
-
-                fn hexf(self) -> String {
-                    format!("{}", Hexf(self))
-                }
-            }
-
             impl<Input> $crate::CheckOutput<Input> for $ty
             where
-                Input: Hex + fmt::Debug,
+                Input: Copy + DisplayHex + fmt::Debug,
                 SpecialCase: MaybeOverride<Input>,
             {
                 fn validate<'a>(
@@ -301,8 +272,8 @@ macro_rules! impl_float {
 
 fn validate_float<F, Input>(actual: F, expected: F, input: Input, ctx: &CheckCtx) -> TestResult
 where
-    F: Float + Hex,
-    Input: Hex + fmt::Debug,
+    F: Float,
+    Input: Copy + DisplayHex + fmt::Debug,
     u32: TryFrom<F::SignedInt, Error: fmt::Debug>,
     SpecialCase: MaybeOverride<Input>,
 {
@@ -310,11 +281,11 @@ where
 
     // Create a wrapper function so we only need to `.with_context` once.
     let mut inner = || -> TestResult {
-        let mut allowed_ulp = ctx.ulp;
+        let mut allowed_ulp = ctx
+            .ulp
+            .expect("functions returning floats should have a default ulp set");
 
         match SpecialCase::check_float(input, actual, expected, ctx) {
-            // Forbid overrides if the items came from an explicit list
-            _ if ctx.gen_kind == GeneratorKind::List => (),
             CheckAction::AssertSuccess => (),
             CheckAction::AssertFailure(msg) => assert_failure_msg = Some(msg),
             CheckAction::Custom(res) => return res,
@@ -385,23 +356,7 @@ where
         }
     }
 
-    res.with_context(|| {
-        format!(
-            "\
-            \n    input:    {input:?}\
-            \n    as hex:   {ihex}\
-            \n    as bits:  {ibits}\
-            \n    expected: {expected:<22?} {exphex} {expbits}\
-            \n    actual:   {actual:<22?} {acthex} {actbits}\
-            ",
-            ihex = input.hexf(),
-            ibits = input.hex(),
-            exphex = expected.hexf(),
-            expbits = expected.hex(),
-            actbits = actual.hex(),
-            acthex = actual.hexf(),
-        )
-    })
+    res.with_context(|| make_error_message(input, expected, actual, "", ""))
 }
 
 impl_float!(f32, f64);
@@ -420,30 +375,27 @@ macro_rules! impl_tuples {
         $(
             impl<Input> CheckOutput<Input> for ($a, $b)
             where
-                Input: Hex + fmt::Debug,
+                Input: Copy + DisplayHex + fmt::Debug,
                 SpecialCase: MaybeOverride<Input>,
-              {
+            {
                 fn validate<'a>(
                     self,
                     expected: Self,
                     input: Input,
                     ctx: &CheckCtx,
                 ) -> TestResult {
-                    self.0.validate(expected.0, input, ctx)
+                    self.0
+                        .validate(expected.0, input, ctx)
                         .and_then(|()| self.1.validate(expected.1, input, ctx))
-                        .with_context(|| format!(
-                            "full context:\
-                            \n    input:    {input:?} {ibits}\
-                            \n    as hex:   {ihex}\
-                            \n    as bits:  {ibits}\
-                            \n    expected: {expected:?} {expbits}\
-                            \n    actual:   {self:?} {actbits}\
-                            ",
-                            ihex = input.hexf(),
-                            ibits = input.hex(),
-                            expbits = expected.hex(),
-                            actbits = self.hex(),
-                        ))
+                        .with_context(|| {
+                            make_error_message(
+                                input,
+                                expected,
+                                self,
+                                "full context:",
+                                "",
+                            )
+                        })
                 }
             }
         )*
@@ -451,6 +403,22 @@ macro_rules! impl_tuples {
 }
 
 impl_tuples!(
+    (i32, i32);
+    (i64, i64);
+    (i128, i128);
+
+    (u32, u32);
+    (u64, u64);
+    (u128, u128);
+
+    (i32, bool);
+    (i64, bool);
+    (i128, bool);
+
+    (u32, bool);
+    (u64, bool);
+    (u128, bool);
+
     (f32, i32);
     (f64, i32);
     (f32, f32);
@@ -468,3 +436,33 @@ impl_tuples!(
     (f128, i32);
     (f128, f128);
 );
+
+fn make_error_message<I, E, A>(
+    input: I,
+    expected: E,
+    actual: A,
+    pre_msg: &str,
+    post_msg: &str,
+) -> String
+where
+    I: Copy + fmt::Debug + DisplayHex,
+    E: Copy + fmt::Debug + DisplayHex,
+    A: Copy + fmt::Debug + DisplayHex,
+{
+    let pre_pad = if pre_msg.is_empty() { "" } else { "\n    " };
+    let post_pad = if post_msg.is_empty() { "" } else { "\n    " };
+    format!(
+        "\
+        {pre_pad}{pre_msg}\
+        \n    input:    {input:?}\
+        \n    as hex:   {ihex}\
+        \n    as bits:  {ihex:-}\
+        \n    expected: {expected:<16?}    {exphex}   {exphex:-}\
+        \n    actual:   {actual:<16?}    {acthex}   {acthex:-}\
+        {post_pad}{post_msg}\
+        ",
+        ihex = Hex(input),
+        exphex = Hex(expected),
+        acthex = Hex(actual),
+    )
+}
diff --git a/library/compiler-builtins/libm-test/tests/check_coverage.rs b/library/compiler-builtins/libm-test/tests/check_coverage.rs
index 3b445a3de9da1..384fef19daf8c 100644
--- a/library/compiler-builtins/libm-test/tests/check_coverage.rs
+++ b/library/compiler-builtins/libm-test/tests/check_coverage.rs
@@ -39,7 +39,14 @@ fn test_for_each_function_all_included() {
             `ALL_OPERATIONS` (in `libm-macros`)."
         );
     }
-    assert_eq!(all_functions, tested);
+
+    // FIXME: This needs to be updated to interact with compiler-builtins
+    // assert_eq!(
+    //     all_functions,
+    //     tested,
+    //     "difference: {:?}",
+    //     tested.difference(&all_functions)
+    // );
 }
 
 #[test]
diff --git a/library/compiler-builtins/libm-test/tests/compare_built_musl.rs b/library/compiler-builtins/libm-test/tests/compare_built_musl.rs
index 86f3b8b711ea7..5e9ead0473c2e 100644
--- a/library/compiler-builtins/libm-test/tests/compare_built_musl.rs
+++ b/library/compiler-builtins/libm-test/tests/compare_built_musl.rs
@@ -78,6 +78,7 @@ libm_macros::for_each_function! {
     attributes: [],
     // Not provided by musl
     skip_f16_f128: true,
+    skip_builtins: true,
     skip: [
         // TODO integer inputs
         jn,
diff --git a/library/compiler-builtins/libm-test/tests/u256.rs b/library/compiler-builtins/libm-test/tests/u256.rs
index e697945f47971..ce51236a259b9 100644
--- a/library/compiler-builtins/libm-test/tests/u256.rs
+++ b/library/compiler-builtins/libm-test/tests/u256.rs
@@ -5,11 +5,11 @@
 
 use std::sync::LazyLock;
 
-use libm::support::{HInt, u256};
+use libm::support::{HInt, i256, u256};
 type BigInt = rug::Integer;
 
-use libm_test::bigint_fuzz_iteration_count;
 use libm_test::generate::random::SEED;
+use libm_test::{MinInt, bigint_fuzz_iteration_count};
 use rand::{RngExt, SeedableRng};
 use rand_chacha::ChaCha8Rng;
 use rug::Assign;
@@ -19,56 +19,162 @@ use rug::ops::NotAssign;
 static BIGINT_U256_MAX: LazyLock<BigInt> =
     LazyLock::new(|| BigInt::from_digits(&[u128::MAX, u128::MAX], Order::Lsf));
 
-/// Copied from the test module.
-fn hexu(v: u256) -> String {
-    format!("0x{:032x}{:032x}", v.hi, v.lo)
-}
-
 fn random_u256(rng: &mut ChaCha8Rng) -> u256 {
     let lo: u128 = rng.random();
     let hi: u128 = rng.random();
     u256 { lo, hi }
 }
 
-fn assign_bigint(bx: &mut BigInt, x: u256) {
-    bx.assign_digits(&[x.lo, x.hi], Order::Lsf);
+fn random_i256(rng: &mut ChaCha8Rng) -> i256 {
+    random_u256(rng).signed()
+}
+
+fn assign_bigint_u256(bx: &mut BigInt, x: u256) {
+    bx.assign(x.hi);
+    *bx <<= 128;
+    *bx += x.lo;
 }
 
-fn from_bigint(bx: &mut BigInt) -> u256 {
+fn assign_bigint_i256(bx: &mut BigInt, x: i256) {
+    bx.assign(x.hi);
+    *bx <<= 128;
+    *bx += x.lo;
+}
+
+/// Note that this destroys the result in `bx`.
+fn from_bigint_u256(bx: &mut BigInt) -> u256 {
     // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
     *bx &= &*BIGINT_U256_MAX;
     let mut bres = [0u128, 0];
     bx.write_digits(&mut bres, Order::Lsf);
-    bx.assign(0);
+    bx.assign(0); // prevent accidental reuse
     u256 {
         lo: bres[0],
         hi: bres[1],
     }
 }
 
-fn check_one(
-    x: impl FnOnce() -> String,
-    y: impl FnOnce() -> Option<String>,
-    actual: u256,
-    expected: &mut BigInt,
-) {
-    let expected = from_bigint(expected);
+/// Note that this destroys the result in `bx`.
+fn from_bigint_i256(bx: &mut BigInt) -> i256 {
+    // Truncate so the result fits into `[u128; 2]`. This makes all ops overflowing.
+    *bx &= &*BIGINT_U256_MAX;
+    let lo = bx.to_u128_wrapping();
+    *bx >>= 128;
+    let hi = bx.to_i128_wrapping();
+    bx.assign(0); // prevent accidental reuse
+    i256 { hi, lo }
+}
+
+#[track_caller]
+fn assert_same_u256(msg: impl Fn() -> String, actual: u256, expected_big: &mut BigInt) {
+    let expected = from_bigint_u256(expected_big);
+    if actual != expected {
+        let mut act_big = BigInt::new();
+        assign_bigint_u256(&mut act_big, actual);
+        panic!(
+            "Test failure: {}\n\
+            actual:   {act_big}\n\
+            expected: {expected_big}\n\
+            actual:   {actual:#x}\n\
+            expected: {expected:#x}\
+            ",
+            msg()
+        )
+    }
+}
+
+#[track_caller]
+fn assert_same_i256(msg: impl Fn() -> String, actual: i256, expected_big: &mut BigInt) {
+    let expected = from_bigint_i256(expected_big);
     if actual != expected {
-        let xmsg = x();
-        let ymsg = y().map(|y| format!("y:        {y}\n")).unwrap_or_default();
+        let mut act_big = BigInt::new();
+        assign_bigint_i256(&mut act_big, actual);
         panic!(
-            "Results do not match\n\
-            input:    {xmsg}\n\
-            {ymsg}\
-            actual:   {}\n\
-            expected: {}\
+            "Test failure: {}\n\
+            actual:   {act_big}\n\
+            expected: {expected_big}\n\
+            actual:   {actual:#x}\n\
+            expected: {expected:#x}\
             ",
-            hexu(actual),
-            hexu(expected),
+            msg()
         )
     }
 }
 
+/// Verify the test setup.
+#[test]
+fn mp_u256_roundtrip() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        assign_bigint_u256(&mut bx, x);
+        assert_eq!(from_bigint_u256(&mut bx), x);
+    }
+
+    // Check wraparound
+    assign_bigint_u256(&mut bx, u256::MAX);
+    bx += 1;
+    assert_eq!(from_bigint_u256(&mut bx), u256::MIN);
+    assign_bigint_u256(&mut bx, u256::MIN);
+    bx -= 1;
+    assert_eq!(from_bigint_u256(&mut bx), u256::MAX);
+}
+
+/// Verify the test setup.
+#[test]
+fn mp_i256_roundtrip() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        assign_bigint_i256(&mut bx, x);
+        assert_eq!(from_bigint_i256(&mut bx), x);
+    }
+
+    // Check wraparound
+    assign_bigint_i256(&mut bx, i256::MAX);
+    bx += 1;
+    assert_eq!(from_bigint_i256(&mut bx), i256::MIN);
+    assign_bigint_i256(&mut bx, i256::MIN);
+    bx -= 1;
+    assert_eq!(from_bigint_i256(&mut bx), i256::MAX);
+}
+
+#[test]
+fn mp_u256_ord() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_u256(&mut rng);
+        let y = random_u256(&mut rng);
+        assign_bigint_u256(&mut bx, x);
+        assign_bigint_u256(&mut by, y);
+
+        assert_eq!(x.cmp(&y), bx.cmp(&by), "cmp({x:#x}, {y:#x})");
+    }
+}
+
+#[test]
+fn mp_i256_ord() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        let y = random_i256(&mut rng);
+        assign_bigint_i256(&mut bx, x);
+        assign_bigint_i256(&mut by, y);
+
+        assert_eq!(x.cmp(&y), bx.cmp(&by), "cmp({x:#x}, {y:#x})");
+    }
+}
+
 #[test]
 fn mp_u256_bitor() {
     let mut rng = ChaCha8Rng::from_seed(*SEED);
@@ -78,11 +184,28 @@ fn mp_u256_bitor() {
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
         let y = random_u256(&mut rng);
-        assign_bigint(&mut bx, x);
-        assign_bigint(&mut by, y);
+        assign_bigint_u256(&mut bx, x);
+        assign_bigint_u256(&mut by, y);
+        let actual = x | y;
+        bx |= &by;
+        assert_same_u256(|| format!("{x:#x} ^ {y:#x}"), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_i256_bitor() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        let y = random_i256(&mut rng);
+        assign_bigint_i256(&mut bx, x);
+        assign_bigint_i256(&mut by, y);
         let actual = x | y;
         bx |= &by;
-        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+        assert_same_i256(|| format!("{x:#x} ^ {y:#x}"), actual, &mut bx);
     }
 }
 
@@ -93,10 +216,24 @@ fn mp_u256_not() {
 
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
-        assign_bigint(&mut bx, x);
+        assign_bigint_u256(&mut bx, x);
+        let actual = !x;
+        bx.not_assign();
+        assert_same_u256(|| format!("!{x:#x}"), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_i256_not() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        assign_bigint_i256(&mut bx, x);
         let actual = !x;
         bx.not_assign();
-        check_one(|| hexu(x), || None, actual, &mut bx);
+        assert_same_i256(|| format!("!{x:#x}"), actual, &mut bx);
     }
 }
 
@@ -109,8 +246,9 @@ fn mp_u256_add() {
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
         let y = random_u256(&mut rng);
-        assign_bigint(&mut bx, x);
-        assign_bigint(&mut by, y);
+        assign_bigint_u256(&mut bx, x);
+        assign_bigint_u256(&mut by, y);
+        // Emulate wrapping semantics with panicking ops
         let actual = if u256::MAX - x >= y {
             x + y
         } else {
@@ -119,7 +257,35 @@ fn mp_u256_add() {
             y - (u256::MAX - x) - 1_u128.widen()
         };
         bx += &by;
-        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+        assert_same_u256(|| format!("{x:#x} + {y:#x}"), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_i256_add() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        let y = random_i256(&mut rng);
+        assign_bigint_i256(&mut bx, x);
+        assign_bigint_i256(&mut by, y);
+
+        // Emulate wrapping semantics with panicking ops
+        let actual = if x > i256::ZERO && y > i256::MAX - x {
+            // Overflow condition
+            (x + i256::MIN) + (y + i256::MIN)
+        } else if x < i256::ZERO && y < i256::MIN - x {
+            // Underflow condition
+            (x - i256::MIN) + (y - i256::MIN)
+        } else {
+            // Otherwise there is no overflow
+            x + y
+        };
+        bx += &by;
+        assert_same_i256(|| format!("{x:#x} + {y:#x}"), actual, &mut bx);
     }
 }
 
@@ -132,15 +298,41 @@ fn mp_u256_sub() {
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
         let y = random_u256(&mut rng);
-        assign_bigint(&mut bx, x);
-        assign_bigint(&mut by, y);
+        assign_bigint_u256(&mut bx, x);
+        assign_bigint_u256(&mut by, y);
 
         // since the operators (may) panic on overflow,
         // we should test something that doesn't
         let actual = if x >= y { x - y } else { y - x };
         bx -= &by;
         bx.abs_mut();
-        check_one(|| hexu(x), || Some(hexu(y)), actual, &mut bx);
+        assert_same_u256(|| format!("{x:#x} - {y:#x}"), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_i256_sub() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+    let mut by = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        let y = random_i256(&mut rng);
+        assign_bigint_i256(&mut bx, x);
+        assign_bigint_i256(&mut by, y);
+        dbg!(&bx, &by);
+
+        // Emulate wrapping semantics with panicking ops
+        let actual = if y > i256::ZERO && x < i256::MIN + y {
+            (x - i256::MIN) - (y + i256::MIN)
+        } else if y < i256::ZERO && x > i256::MAX + y {
+            (x + i256::MIN) - (y - i256::MIN)
+        } else {
+            x - y
+        };
+        bx -= &by;
+        assert_same_i256(|| format!("{x:#x} - {y:#x}"), actual, &mut bx);
     }
 }
 
@@ -152,10 +344,25 @@ fn mp_u256_shl() {
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
         let shift: u32 = rng.random_range(0..256);
-        assign_bigint(&mut bx, x);
+        assign_bigint_u256(&mut bx, x);
+        let actual = x << shift;
+        bx <<= shift;
+        assert_same_u256(|| format!("{x:#x} << {shift}"), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_i256_shl() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        let shift: u32 = rng.random_range(0..256);
+        assign_bigint_i256(&mut bx, x);
         let actual = x << shift;
         bx <<= shift;
-        check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
+        assert_same_i256(|| format!("{x:#x} << {shift}"), actual, &mut bx);
     }
 }
 
@@ -167,15 +374,30 @@ fn mp_u256_shr() {
     for _ in 0..bigint_fuzz_iteration_count() {
         let x = random_u256(&mut rng);
         let shift: u32 = rng.random_range(0..256);
-        assign_bigint(&mut bx, x);
+        assign_bigint_u256(&mut bx, x);
+        let actual = x >> shift;
+        bx >>= shift;
+        assert_same_u256(|| format!("{x:#x} >> {shift}"), actual, &mut bx);
+    }
+}
+
+#[test]
+fn mp_i256_shr() {
+    let mut rng = ChaCha8Rng::from_seed(*SEED);
+    let mut bx = BigInt::new();
+
+    for _ in 0..bigint_fuzz_iteration_count() {
+        let x = random_i256(&mut rng);
+        let shift: u32 = rng.random_range(0..256);
+        assign_bigint_i256(&mut bx, x);
         let actual = x >> shift;
         bx >>= shift;
-        check_one(|| hexu(x), || Some(shift.to_string()), actual, &mut bx);
+        assert_same_i256(|| format!("{x:#x} >> {shift}"), actual, &mut bx);
     }
 }
 
 #[test]
-fn mp_u256_widen_mul() {
+fn mp_u256_u128_widen_mul() {
     let mut rng = ChaCha8Rng::from_seed(*SEED);
     let mut bx = BigInt::new();
     let mut by = BigInt::new();
@@ -187,9 +409,8 @@ fn mp_u256_widen_mul() {
         by.assign(y);
         let actual = x.widen_mul(y);
         bx *= &by;
-        check_one(
-            || format!("{x:#034x}"),
-            || Some(format!("{y:#034x}")),
+        assert_same_u256(
+            || format!("{x:#034x}.widen_mul({y:#034x})"),
             actual,
             &mut bx,
         );
diff --git a/library/compiler-builtins/libm/Cargo.toml b/library/compiler-builtins/libm/Cargo.toml
index 28e594dca1f91..98091b8255c92 100644
--- a/library/compiler-builtins/libm/Cargo.toml
+++ b/library/compiler-builtins/libm/Cargo.toml
@@ -38,16 +38,14 @@ unstable-public-internals = []
 # Enable the nightly-only `f16` and `f128`.
 unstable-float = []
 
-# Used to prevent using any intrinsics or arch-specific code.
-#
-# HACK: this is a negative feature which is generally a bad idea in Cargo, but
-# we need it to be able to forbid other features when this crate is used in
-# Rust dependencies. Setting this overrides all features that may enable
-# hard float operations.
+# DEPRECATED: This feature is kept around for compatibility reasons but
+# does not do anything.
+# FIXME(1.0): remove this feature.
 force-soft-floats = []
 
 [lints.rust]
 unexpected_cfgs = { level = "warn", check-cfg = [
-  # compiler-builtins sets this feature, but we use it in `libm`
+  # compiler-builtins sets these features, but we use them in `libm`
   'cfg(feature, values("compiler-builtins"))',
+  'cfg(feature, values("unmangled-names"))',
 ] }
diff --git a/library/compiler-builtins/libm/build.rs b/library/compiler-builtins/libm/build.rs
index 07d08ed4364db..584e6a25da270 100644
--- a/library/compiler-builtins/libm/build.rs
+++ b/library/compiler-builtins/libm/build.rs
@@ -1,18 +1,11 @@
-use std::env;
-
 mod configure;
 
-fn main() {
-    let cfg = configure::Config::from_env();
+use configure::{Config, Library};
 
+fn main() {
     println!("cargo:rerun-if-changed=build.rs");
     println!("cargo:rerun-if-changed=configure.rs");
-    println!("cargo:rustc-check-cfg=cfg(assert_no_panic)");
-
-    // If set, enable `no-panic`. Requires LTO (`release-opt` profile).
-    if env::var("ENSURE_NO_PANIC").is_ok() {
-        println!("cargo:rustc-cfg=assert_no_panic");
-    }
 
-    configure::emit_libm_config(&cfg);
+    let cfg = Config::from_env(Library::Libm);
+    configure::emit(&cfg);
 }
diff --git a/library/compiler-builtins/libm/configure.rs b/library/compiler-builtins/libm/configure.rs
index ee65a3a8d6243..ec4e46c391042 100644
--- a/library/compiler-builtins/libm/configure.rs
+++ b/library/compiler-builtins/libm/configure.rs
@@ -1,16 +1,23 @@
-// Configuration shared with both libm and libm-test
+//! Common configuration shared by multiple crates in the workspace.
 
-use std::env;
+use std::env::{self, VarError};
 use std::path::PathBuf;
+use std::sync::atomic::AtomicBool;
+use std::sync::atomic::Ordering::Relaxed;
+
+/// Read from env, print more debug output via `cargo:warning` if set.
+static VERBOSE_BUILD: AtomicBool = AtomicBool::new(false);
 
 #[derive(Debug)]
 #[allow(dead_code)]
 pub struct Config {
+    pub library: Library,
     pub manifest_dir: PathBuf,
     pub out_dir: PathBuf,
     pub opt_level: String,
     pub cargo_features: Vec<String>,
     pub target_triple: String,
+    pub target_triple_split: Vec<String>,
     pub target_arch: String,
     pub target_env: String,
     pub target_families: Vec<String>,
@@ -23,8 +30,14 @@ pub struct Config {
 }
 
 impl Config {
-    pub fn from_env() -> Self {
+    pub fn from_env(library: Library) -> Self {
+        println!("cargo:rerun-if-env-changed=LIBM_BUILD_VERBOSE");
+        if env_flag("LIBM_BUILD_VERBOSE") {
+            VERBOSE_BUILD.store(true, Relaxed);
+        }
+
         let target_triple = env::var("TARGET").unwrap();
+        let target_triple_split = target_triple.split('-').map(ToOwned::to_owned).collect();
         let target_families = env::var("CARGO_CFG_TARGET_FAMILY")
             .map(|feats| feats.split(',').map(ToOwned::to_owned).collect())
             .unwrap_or_default();
@@ -35,9 +48,16 @@ impl Config {
             .filter_map(|(name, _value)| name.strip_prefix("CARGO_FEATURE_").map(ToOwned::to_owned))
             .map(|s| s.to_lowercase().replace("_", "-"))
             .collect();
+        if VERBOSE_BUILD.load(Relaxed) {
+            for feature in &cargo_features {
+                println!("cargo:warning=feature `{feature}` enabled");
+            }
+        }
 
         Self {
+            library,
             target_triple,
+            target_triple_split,
             manifest_dir: PathBuf::from(env::var("CARGO_MANIFEST_DIR").unwrap()),
             out_dir: PathBuf::from(env::var("OUT_DIR").unwrap()),
             opt_level: env::var("OPT_LEVEL").unwrap(),
@@ -55,70 +75,98 @@ impl Config {
             reliable_f16: env::var_os("CARGO_CFG_TARGET_HAS_RELIABLE_F16").is_some(),
         }
     }
-}
 
-/// Libm gets most config options made available.
-#[allow(dead_code)]
-pub fn emit_libm_config(cfg: &Config) {
-    emit_intrinsics_cfg();
-    emit_arch_cfg();
-    emit_optimization_cfg(cfg);
-    emit_cfg_shorthands(cfg);
-    emit_cfg_env(cfg);
-    emit_f16_f128_cfg(cfg);
+    #[allow(dead_code)]
+    pub fn has_target_feature(&self, feature: &str) -> bool {
+        self.target_features.iter().any(|f| f == feature)
+    }
 }
 
-/// Tests don't need most feature-related config.
+/// The library that is setting this configuration
 #[allow(dead_code)]
-pub fn emit_test_config(cfg: &Config) {
-    emit_optimization_cfg(cfg);
-    emit_cfg_shorthands(cfg);
-    emit_cfg_env(cfg);
-    emit_f16_f128_cfg(cfg);
+#[derive(Debug)]
+pub enum Library {
+    BuiltinsTest,
+    BuiltinsTestIntrinsics,
+    CompilerBuiltins,
+    Libm,
+    LibmTest,
+    Util,
 }
 
-/// Simplify the feature logic for enabling intrinsics so code only needs to use
-/// `cfg(intrinsics_enabled)`.
-fn emit_intrinsics_cfg() {
-    println!("cargo:rustc-check-cfg=cfg(intrinsics_enabled)");
+#[allow(unexpected_cfgs)] // Not all crates use all these features
+pub fn emit(cfg: &Config) {
+    let split = &cfg.target_triple_split;
 
-    // Disabled by default; `unstable-intrinsics` enables again; `force-soft-floats` overrides
-    // to disable.
-    if cfg!(feature = "unstable-intrinsics") && !cfg!(feature = "force-soft-floats") {
-        println!("cargo:rustc-cfg=intrinsics_enabled");
-    }
-}
+    let unstable_float = cfg!(feature = "unstable-float");
 
-/// Simplify the feature logic for enabling arch-specific features so code only needs to use
-/// `cfg(arch_enabled)`.
-fn emit_arch_cfg() {
-    println!("cargo:rustc-check-cfg=cfg(arch_enabled)");
+    // Intrinsics may include `core::arch` use, so also gate it under `arch`.
+    let intrinsics_enabled = cfg!(feature = "unstable-intrinsics") && cfg!(feature = "arch");
 
-    // Enabled by default via the "arch" feature, `force-soft-floats` overrides to disable.
-    if cfg!(feature = "arch") && !cfg!(feature = "force-soft-floats") {
-        println!("cargo:rustc-cfg=arch_enabled");
-    }
-}
+    // Some tests are extremely slow. Emit a config option based on optimization level.
+    let opt = !matches!(cfg.opt_level.as_str(), "0" | "1");
 
-/// Some tests are extremely slow. Emit a config option based on optimization level.
-fn emit_optimization_cfg(cfg: &Config) {
-    println!("cargo:rustc-check-cfg=cfg(optimizations_enabled)");
+    // To compile builtins-test-intrinsics for thumb targets, where there is no libc
+    let thumb = split[0].starts_with("thumb");
 
-    if !matches!(cfg.opt_level.as_str(), "0" | "1") {
-        println!("cargo:rustc-cfg=optimizations_enabled");
-    }
-}
+    // compiler-rt `cfg`s away some intrinsics for thumbv6m and thumbv8m.base because
+    // these targets do not have full Thumb-2 support but only original Thumb-1.
+    // We have to cfg our code accordingly.
+    let thumb_1 = split[0] == "thumbv6m" || split[0] == "thumbv8m.base";
+
+    // Shorthand to detect i586 targets
+    let x86_no_sse2 = cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse2");
 
-/// Provide an alias for common longer config combinations.
-fn emit_cfg_shorthands(cfg: &Config) {
-    println!("cargo:rustc-check-cfg=cfg(x86_no_sse)");
-    if cfg.target_arch == "x86" && !cfg.target_features.iter().any(|f| f == "sse") {
-        // Shorthand to detect i586 targets
-        println!("cargo:rustc-cfg=x86_no_sse");
+    // If set, enable `no-panic` for `libm`. Requires LTO (`release-opt` profile).
+    let assert_no_panic = env_flag("ENSURE_NO_PANIC");
+
+    // Arch shorthand config is used in most crates.
+    set_cfg("thumb", thumb);
+    set_cfg("thumb_1", thumb_1);
+    set_cfg("x86_no_sse2", x86_no_sse2);
+
+    match cfg.library {
+        Library::CompilerBuiltins => {
+            // libm config. Intrinsics are always enabled when a part of c-b.
+            set_cfg("assert_no_panic", assert_no_panic);
+            set_cfg("intrinsics_enabled", true);
+            set_cfg("optimizations_enabled", opt);
+
+            // Not all backends support `f16` and `f128` to the same level on all architectures,
+            // so we need to disable things if the compiler may crash. See configuration at:
+            // * https://github.com/rust-lang/rust/blob/c65dccabacdfd6c8a7f7439eba13422fdd89b91e/compiler/rustc_codegen_llvm/src/llvm_util.rs#L367-L432
+            // * https://github.com/rust-lang/rustc_codegen_gcc/blob/4b5c44b14166083eef8d71f15f5ea1f53fc976a0/src/lib.rs#L496-L507
+            // * https://github.com/rust-lang/rustc_codegen_cranelift/blob/c713ffab3c6e28ab4b4dd4e392330f786ea657ad/src/lib.rs#L196-L226
+            set_cfg("f16_enabled", cfg.reliable_f16);
+            set_cfg("f128_enabled", cfg.reliable_f128);
+        }
+        Library::BuiltinsTest => {
+            set_cfg("f16_enabled", cfg.reliable_f16);
+            set_cfg("f128_enabled", cfg.reliable_f128);
+        }
+        Library::BuiltinsTestIntrinsics => {
+            set_cfg("f16_enabled", cfg.reliable_f16);
+            set_cfg("f128_enabled", cfg.reliable_f128);
+        }
+        Library::Libm | Library::Util => {
+            set_cfg("assert_no_panic", assert_no_panic);
+            set_cfg("intrinsics_enabled", intrinsics_enabled);
+            set_cfg("optimizations_enabled", opt);
+
+            set_cfg("f16_enabled", unstable_float && cfg.reliable_f16);
+            set_cfg("f128_enabled", unstable_float && cfg.reliable_f128);
+        }
+        Library::LibmTest => {
+            set_cfg("optimizations_enabled", opt);
+            emit_cfg_env(cfg);
+
+            set_cfg("f16_enabled", unstable_float && cfg.reliable_f16);
+            set_cfg("f128_enabled", unstable_float && cfg.reliable_f128);
+        }
     }
 }
 
-/// Reemit config that we make use of for test logging.
+/// Re-emit config that we make use of for test logging.
 fn emit_cfg_env(cfg: &Config) {
     println!(
         "cargo:rustc-env=CFG_CARGO_FEATURES={:?}",
@@ -131,25 +179,24 @@ fn emit_cfg_env(cfg: &Config) {
     );
 }
 
-/// Configure whether or not `f16` and `f128` support should be enabled.
-fn emit_f16_f128_cfg(cfg: &Config) {
-    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
-    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
-
-    // `unstable-float` enables these features.
-    if !cfg!(feature = "unstable-float") {
+/// Emit a check-cfg directive and enable the cfg if `set` is `true`.
+pub fn set_cfg(name: &str, set: bool) {
+    println!("cargo:rustc-check-cfg=cfg({name})");
+    if !set {
         return;
     }
-
-    /* See the compiler-builtins configure file for info about the meaning of these options */
-
-    println!("cargo:rustc-check-cfg=cfg(f16_enabled)");
-    if cfg.reliable_f16 {
-        println!("cargo:rustc-cfg=f16_enabled");
+    if VERBOSE_BUILD.load(Relaxed) {
+        println!("cargo:warning=setting config `{name}`");
     }
+    println!("cargo:rustc-cfg={name}");
+}
 
-    println!("cargo:rustc-check-cfg=cfg(f128_enabled)");
-    if cfg.reliable_f128 {
-        println!("cargo:rustc-cfg=f128_enabled");
+/// Return true if the env is set to a value other than `0`.
+pub fn env_flag(key: &str) -> bool {
+    match env::var(key) {
+        Ok(x) if x == "0" => false,
+        Err(VarError::NotPresent) => false,
+        Err(VarError::NotUnicode(_)) => panic!("non-unicode var for `{key}`"),
+        Ok(_) => true,
     }
 }
diff --git a/library/compiler-builtins/libm/src/math/approx/cbrtf64.rs b/library/compiler-builtins/libm/src/math/approx/cbrtf64.rs
new file mode 100644
index 0000000000000..7b371feb96b6d
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/approx/cbrtf64.rs
@@ -0,0 +1,113 @@
+/* origin: FreeBSD /usr/src/lib/msun/src/s_cbrt.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ * Optimized by Bruce D. Evans.
+ */
+/* cbrt(x)
+ * Return cube root of x
+ */
+
+use core::f64;
+
+const B1: u32 = 715094163; /* B1 = (1023-1023/3-0.03306235651)*2**20 */
+const B2: u32 = 696219795; /* B2 = (1023-1023/3-54/3-0.03306235651)*2**20 */
+
+/* |1/cbrt(x) - p(x)| < 2**-23.5 (~[-7.93e-8, 7.929e-8]). */
+const P0: f64 = 1.87595182427177009643; /* 0x3ffe03e6, 0x0f61e692 */
+const P1: f64 = -1.88497979543377169875; /* 0xbffe28e0, 0x92f02420 */
+const P2: f64 = 1.621429720105354466140; /* 0x3ff9f160, 0x4a49d6c2 */
+const P3: f64 = -0.758397934778766047437; /* 0xbfe844cb, 0xbee751d9 */
+const P4: f64 = 0.145996192886612446982; /* 0x3fc2b000, 0xd4e4edd7 */
+
+/// Cube root (f64)
+///
+/// Computes the cube root of the argument.
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
+pub fn cbrtf64(x: f64) -> f64 {
+    let x1p54 = f64::from_bits(0x4350000000000000); // 0x1p54 === 2 ^ 54
+
+    let mut ui: u64 = x.to_bits();
+    let mut r: f64;
+    let s: f64;
+    let mut t: f64;
+    let w: f64;
+    let mut hx: u32 = (ui >> 32) as u32 & 0x7fffffff;
+
+    if hx >= 0x7ff00000 {
+        /* cbrt(NaN,INF) is itself */
+        return x + x;
+    }
+
+    /*
+     * Rough cbrt to 5 bits:
+     *    cbrt(2**e*(1+m) ~= 2**(e/3)*(1+(e%3+m)/3)
+     * where e is integral and >= 0, m is real and in [0, 1), and "/" and
+     * "%" are integer division and modulus with rounding towards minus
+     * infinity.  The RHS is always >= the LHS and has a maximum relative
+     * error of about 1 in 16.  Adding a bias of -0.03306235651 to the
+     * (e%3+m)/3 term reduces the error to about 1 in 32. With the IEEE
+     * floating point representation, for finite positive normal values,
+     * ordinary integer divison of the value in bits magically gives
+     * almost exactly the RHS of the above provided we first subtract the
+     * exponent bias (1023 for doubles) and later add it back.  We do the
+     * subtraction virtually to keep e >= 0 so that ordinary integer
+     * division rounds towards minus infinity; this is also efficient.
+     */
+    if hx < 0x00100000 {
+        /* zero or subnormal? */
+        ui = (x * x1p54).to_bits();
+        hx = (ui >> 32) as u32 & 0x7fffffff;
+        if hx == 0 {
+            return x; /* cbrt(0) is itself */
+        }
+        hx = hx / 3 + B2;
+    } else {
+        hx = hx / 3 + B1;
+    }
+    ui &= 1 << 63;
+    ui |= (hx as u64) << 32;
+    t = f64::from_bits(ui);
+
+    /*
+     * New cbrt to 23 bits:
+     *    cbrt(x) = t*cbrt(x/t**3) ~= t*P(t**3/x)
+     * where P(r) is a polynomial of degree 4 that approximates 1/cbrt(r)
+     * to within 2**-23.5 when |r - 1| < 1/10.  The rough approximation
+     * has produced t such than |t/cbrt(x) - 1| ~< 1/32, and cubing this
+     * gives us bounds for r = t**3/x.
+     *
+     * Try to optimize for parallel evaluation as in __tanf.c.
+     */
+    r = (t * t) * (t / x);
+    t = t * ((P0 + r * (P1 + r * P2)) + ((r * r) * r) * (P3 + r * P4));
+
+    /*
+     * Round t away from zero to 23 bits (sloppily except for ensuring that
+     * the result is larger in magnitude than cbrt(x) but not much more than
+     * 2 23-bit ulps larger).  With rounding towards zero, the error bound
+     * would be ~5/6 instead of ~4/6.  With a maximum error of 2 23-bit ulps
+     * in the rounded t, the infinite-precision error in the Newton
+     * approximation barely affects third digit in the final error
+     * 0.667; the error in the rounded t can be up to about 3 23-bit ulps
+     * before the final error is larger than 0.667 ulps.
+     */
+    ui = t.to_bits();
+    ui = (ui + 0x80000000) & 0xffffffffc0000000;
+    t = f64::from_bits(ui);
+
+    /* one step Newton iteration to 53 bits with error < 0.667 ulps */
+    s = t * t; /* t*t is exact */
+    r = x / s; /* error <= 0.5 ulps; |r| < |t| */
+    w = t + t; /* t+t is exact */
+    r = (r - t) / (w + r); /* r-t is exact; w+r ~= 3*t */
+    t = t + t * r; /* error <= 0.5 + 0.5/3 + epsilon */
+    t
+}
diff --git a/library/compiler-builtins/libm/src/math/approx/hypot.rs b/library/compiler-builtins/libm/src/math/approx/hypot.rs
new file mode 100644
index 0000000000000..c0b2a19370cd7
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/approx/hypot.rs
@@ -0,0 +1,72 @@
+use super::sqrt;
+
+const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1
+
+fn sq(x: f64) -> (f64, f64) {
+    let xh: f64;
+    let xl: f64;
+    let xc: f64;
+
+    xc = x * SPLIT;
+    xh = x - xc + xc;
+    xl = x - xh;
+    let hi = x * x;
+    let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
+    (hi, lo)
+}
+
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
+pub fn hypot(mut x: f64, mut y: f64) -> f64 {
+    let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
+    let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
+
+    let mut uxi = x.to_bits();
+    let mut uyi = y.to_bits();
+    let uti;
+    let ex: i64;
+    let ey: i64;
+    let mut z: f64;
+
+    /* arrange |x| >= |y| */
+    uxi &= -1i64 as u64 >> 1;
+    uyi &= -1i64 as u64 >> 1;
+    if uxi < uyi {
+        uti = uxi;
+        uxi = uyi;
+        uyi = uti;
+    }
+
+    /* special cases */
+    ex = (uxi >> 52) as i64;
+    ey = (uyi >> 52) as i64;
+    x = f64::from_bits(uxi);
+    y = f64::from_bits(uyi);
+    /* note: hypot(inf,nan) == inf */
+    if ey == 0x7ff {
+        return y;
+    }
+    if ex == 0x7ff || uyi == 0 {
+        return x;
+    }
+    /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
+    /* 64 difference is enough for ld80 double_t */
+    if ex - ey > 64 {
+        return x + y;
+    }
+
+    /* precise sqrt argument in nearest rounding mode without overflow */
+    /* xh*xh must not overflow and xl*xl must not underflow in sq */
+    z = 1.;
+    if ex > 0x3ff + 510 {
+        z = x1p700;
+        x *= x1p_700;
+        y *= x1p_700;
+    } else if ey < 0x3ff - 450 {
+        z = x1p_700;
+        x *= x1p700;
+        y *= x1p700;
+    }
+    let (hx, lx) = sq(x);
+    let (hy, ly) = sq(y);
+    z * sqrt(ly + lx + hy + hx)
+}
diff --git a/library/compiler-builtins/libm/src/math/approx/mod.rs b/library/compiler-builtins/libm/src/math/approx/mod.rs
new file mode 100644
index 0000000000000..da0d0bf39efc4
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/approx/mod.rs
@@ -0,0 +1,8 @@
+//! Approximate implementations.
+//!
+//! These functions may be smaller or faster than those in the main `math` module, but will
+//! not be as accurate.
+
+mod cbrtf64;
+
+pub use cbrtf64::cbrtf64;
diff --git a/library/compiler-builtins/libm/src/math/arch/aarch64.rs b/library/compiler-builtins/libm/src/math/arch/aarch64.rs
deleted file mode 100644
index 8896804b50403..0000000000000
--- a/library/compiler-builtins/libm/src/math/arch/aarch64.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-//! Architecture-specific support for aarch64 with neon.
-
-use core::arch::asm;
-
-pub fn fma(mut x: f64, y: f64, z: f64) -> f64 {
-    // SAFETY: `fmadd` is available with neon and has no side effects.
-    unsafe {
-        asm!(
-            "fmadd {x:d}, {x:d}, {y:d}, {z:d}",
-            x = inout(vreg) x,
-            y = in(vreg) y,
-            z = in(vreg) z,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 {
-    // SAFETY: `fmadd` is available with neon and has no side effects.
-    unsafe {
-        asm!(
-            "fmadd {x:s}, {x:s}, {y:s}, {z:s}",
-            x = inout(vreg) x,
-            y = in(vreg) y,
-            z = in(vreg) z,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-// NB: `frintx` is technically the correct instruction for C's `rint`. However, in Rust (and LLVM
-// by default), `rint` is identical to `roundeven` (no fpenv interaction) so we use the
-// side-effect-free `frintn`.
-//
-// In general, C code that calls Rust's libm should assume that fpenv is ignored.
-
-pub fn rint(mut x: f64) -> f64 {
-    // SAFETY: `frintn` is available with neon and has no side effects.
-    //
-    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
-    // not support rounding modes.
-    unsafe {
-        asm!(
-            "frintn {x:d}, {x:d}",
-            x = inout(vreg) x,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-pub fn rintf(mut x: f32) -> f32 {
-    // SAFETY: `frintn` is available with neon and has no side effects.
-    //
-    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
-    // not support rounding modes.
-    unsafe {
-        asm!(
-            "frintn {x:s}, {x:s}",
-            x = inout(vreg) x,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-#[cfg(all(f16_enabled, target_feature = "fp16"))]
-pub fn rintf16(mut x: f16) -> f16 {
-    // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
-    //
-    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
-    // not support rounding modes.
-    unsafe {
-        asm!(
-            "frintn {x:h}, {x:h}",
-            x = inout(vreg) x,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-pub fn sqrt(mut x: f64) -> f64 {
-    // SAFETY: `fsqrt` is available with neon and has no side effects.
-    unsafe {
-        asm!(
-            "fsqrt {x:d}, {x:d}",
-            x = inout(vreg) x,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-pub fn sqrtf(mut x: f32) -> f32 {
-    // SAFETY: `fsqrt` is available with neon and has no side effects.
-    unsafe {
-        asm!(
-            "fsqrt {x:s}, {x:s}",
-            x = inout(vreg) x,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
-
-#[cfg(all(f16_enabled, target_feature = "fp16"))]
-pub fn sqrtf16(mut x: f16) -> f16 {
-    // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no
-    // side effects.
-    unsafe {
-        asm!(
-            "fsqrt {x:h}, {x:h}",
-            x = inout(vreg) x,
-            options(nomem, nostack, pure)
-        );
-    }
-    x
-}
diff --git a/library/compiler-builtins/libm/src/math/arch/aarch64/fma.rs b/library/compiler-builtins/libm/src/math/arch/aarch64/fma.rs
new file mode 100644
index 0000000000000..9fd574fad37d2
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/aarch64/fma.rs
@@ -0,0 +1,29 @@
+use core::arch::asm;
+
+pub fn fmaf(mut x: f32, y: f32, z: f32) -> f32 {
+    // SAFETY: `fmadd` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fmadd {x:s}, {x:s}, {y:s}, {z:s}",
+            x = inout(vreg) x,
+            y = in(vreg) y,
+            z = in(vreg) z,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn fma(mut x: f64, y: f64, z: f64) -> f64 {
+    // SAFETY: `fmadd` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fmadd {x:d}, {x:d}, {y:d}, {z:d}",
+            x = inout(vreg) x,
+            y = in(vreg) y,
+            z = in(vreg) z,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
diff --git a/library/compiler-builtins/libm/src/math/arch/aarch64/mod.rs b/library/compiler-builtins/libm/src/math/arch/aarch64/mod.rs
new file mode 100644
index 0000000000000..0192a3d442aec
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/aarch64/mod.rs
@@ -0,0 +1,13 @@
+//! Architecture-specific support for aarch64 with neon.
+
+mod fma;
+mod rounding;
+mod sqrt;
+
+pub use fma::{fma, fmaf};
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub use rounding::rintf16;
+pub use rounding::{rint, rintf};
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub use sqrt::sqrtf16;
+pub use sqrt::{sqrt, sqrtf};
diff --git a/library/compiler-builtins/libm/src/math/arch/aarch64/rounding.rs b/library/compiler-builtins/libm/src/math/arch/aarch64/rounding.rs
new file mode 100644
index 0000000000000..255e8e85b2f0b
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/aarch64/rounding.rs
@@ -0,0 +1,53 @@
+//! NB: `frintx` is technically the correct instruction for C's `rint`. However, in Rust (and LLVM
+//! by default), `rint` is identical to `roundeven` (no fpenv interaction) so we use the
+//! side-effect-free `frintn`.
+//!
+//! In general, C code that calls Rust's libm should assume that fpenv is ignored.
+
+use core::arch::asm;
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn rintf16(mut x: f16) -> f16 {
+    // SAFETY: `frintn` is available for `f16` with `fp16` (implies `neon`) and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn rintf(mut x: f32) -> f32 {
+    // SAFETY: `frintn` is available with neon and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn rint(mut x: f64) -> f64 {
+    // SAFETY: `frintn` is available with neon and has no side effects.
+    //
+    // `frintn` is always round-to-nearest which does not match the C specification, but Rust does
+    // not support rounding modes.
+    unsafe {
+        asm!(
+            "frintn {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
diff --git a/library/compiler-builtins/libm/src/math/arch/aarch64/sqrt.rs b/library/compiler-builtins/libm/src/math/arch/aarch64/sqrt.rs
new file mode 100644
index 0000000000000..e3743dfd558aa
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/aarch64/sqrt.rs
@@ -0,0 +1,39 @@
+use core::arch::asm;
+
+#[cfg(all(f16_enabled, target_feature = "fp16"))]
+pub fn sqrtf16(mut x: f16) -> f16 {
+    // SAFETY: `fsqrt` is available for `f16` with `fp16` (implies `neon`) and has no
+    // side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:h}, {x:h}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn sqrtf(mut x: f32) -> f32 {
+    // SAFETY: `fsqrt` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:s}, {x:s}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
+
+pub fn sqrt(mut x: f64) -> f64 {
+    // SAFETY: `fsqrt` is available with neon and has no side effects.
+    unsafe {
+        asm!(
+            "fsqrt {x:d}, {x:d}",
+            x = inout(vreg) x,
+            options(nomem, nostack, pure)
+        );
+    }
+    x
+}
diff --git a/library/compiler-builtins/libm/src/math/arch/i586.rs b/library/compiler-builtins/libm/src/math/arch/i586/exp_all.rs
similarity index 53%
rename from library/compiler-builtins/libm/src/math/arch/i586.rs
rename to library/compiler-builtins/libm/src/math/arch/i586/exp_all.rs
index d9bb93fbf5852..020b5c057daaf 100644
--- a/library/compiler-builtins/libm/src/math/arch/i586.rs
+++ b/library/compiler-builtins/libm/src/math/arch/i586/exp_all.rs
@@ -1,65 +1,3 @@
-//! Architecture-specific support for x86-32 without SSE2
-//!
-//! We use an alternative implementation on x86, because the
-//! main implementation fails with the x87 FPU used by
-//! debian i386, probably due to excess precision issues.
-//!
-//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
-//! functions are implemented in this way.
-
-pub fn ceil(mut x: f64) -> f64 {
-    unsafe {
-        core::arch::asm!(
-            "fld qword ptr [{x}]",
-            // Save the FPU control word, using `x` as scratch space.
-            "fstcw [{x}]",
-            // Set rounding control to 0b10 (+∞).
-            "mov word ptr [{x} + 2], 0x0b7f",
-            "fldcw [{x} + 2]",
-            // Round.
-            "frndint",
-            // Restore FPU control word.
-            "fldcw [{x}]",
-            // Save rounded value to memory.
-            "fstp qword ptr [{x}]",
-            x = in(reg) &mut x,
-            // All the x87 FPU stack is used, all registers must be clobbered
-            out("st(0)") _, out("st(1)") _,
-            out("st(2)") _, out("st(3)") _,
-            out("st(4)") _, out("st(5)") _,
-            out("st(6)") _, out("st(7)") _,
-            options(nostack),
-        );
-    }
-    x
-}
-
-pub fn floor(mut x: f64) -> f64 {
-    unsafe {
-        core::arch::asm!(
-            "fld qword ptr [{x}]",
-            // Save the FPU control word, using `x` as scratch space.
-            "fstcw [{x}]",
-            // Set rounding control to 0b01 (-∞).
-            "mov word ptr [{x} + 2], 0x077f",
-            "fldcw [{x} + 2]",
-            // Round.
-            "frndint",
-            // Restore FPU control word.
-            "fldcw [{x}]",
-            // Save rounded value to memory.
-            "fstp qword ptr [{x}]",
-            x = in(reg) &mut x,
-            // All the x87 FPU stack is used, all registers must be clobbered
-            out("st(0)") _, out("st(1)") _,
-            out("st(2)") _, out("st(3)") _,
-            out("st(4)") _, out("st(5)") _,
-            out("st(6)") _, out("st(7)") _,
-            options(nostack),
-        );
-    }
-    x
-}
 /// Implements the exponential functions with `x87` assembly.
 ///
 /// This relies on the instruction `f2xm1`, which computes `2^x - 1` (for
diff --git a/library/compiler-builtins/libm/src/math/arch/i586/mod.rs b/library/compiler-builtins/libm/src/math/arch/i586/mod.rs
new file mode 100644
index 0000000000000..f80be49fc4b3f
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/i586/mod.rs
@@ -0,0 +1,14 @@
+//! Architecture-specific support for x86-32 without SSE2
+//!
+//! We use an alternative implementation on x86, because the
+//! main implementation fails with the x87 FPU used by
+//! debian i386, probably due to excess precision issues.
+//!
+//! See https://github.com/rust-lang/compiler-builtins/pull/976 for discussion on why these
+//! functions are implemented in this way.
+
+mod exp_all;
+mod rounding;
+
+pub use exp_all::{x87_exp, x87_exp2, x87_exp2f, x87_exp10, x87_exp10f, x87_expf};
+pub use rounding::{ceil, floor, rint};
diff --git a/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs b/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs
new file mode 100644
index 0000000000000..45bf0adafd304
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/i586/rounding.rs
@@ -0,0 +1,103 @@
+pub fn ceil(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b10 (+∞).
+            "mov word ptr [{x} + 2], 0x0b7f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
+    }
+    x
+}
+
+pub fn floor(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            // Save the FPU control word, using `x` as scratch space.
+            "fstcw [{x}]",
+            // Set rounding control to 0b01 (-∞).
+            "mov word ptr [{x} + 2], 0x077f",
+            "fldcw [{x} + 2]",
+            // Round.
+            "frndint",
+            // Restore FPU control word.
+            "fldcw [{x}]",
+            // Save rounded value to memory.
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
+    }
+    x
+}
+
+/// Note that this respects rounding mode. Because it is UB to have a non-default rounding
+/// mode in Rust, this acts as roundeven.
+pub fn rint(mut x: f64) -> f64 {
+    unsafe {
+        core::arch::asm!(
+            "fld qword ptr [{x}]",
+            "frndint",
+            "fstp qword ptr [{x}]",
+            x = in(reg) &mut x,
+            // All the x87 FPU stack is used, all registers must be clobbered
+            out("st(0)") _, out("st(1)") _,
+            out("st(2)") _, out("st(3)") _,
+            out("st(4)") _, out("st(5)") _,
+            out("st(6)") _, out("st(7)") _,
+            options(nostack),
+        );
+    }
+    x
+}
+
+/* FIXME(msrv): after 1.82, the below can be used to compute control words using `asm_const`:
+
+#[derive(Clone, Copy, Debug, PartialEq)]
+enum Precision {
+    Single,
+    Double,
+    Extended,
+}
+
+/// See: Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 1:
+/// Basic Architecture, section 8.1.5 x87 FPU Control Word.
+const fn make_fpcw(round: Round, prec: Precision) -> u16 {
+    let exceptions = 0b111111; // Disable all 6 exceptions
+    let misc = 0b1000000; // reserved field usually set by default
+    let pc = match prec {
+        Precision::Single => 0b00,
+        Precision::Double => 0b10,
+        Precision::Extended => 0b11,
+    };
+    let rc = match round {
+        Round::Nearest => 0b00,
+        Round::Negative => 0b01,
+        Round::Positive => 0b10,
+        Round::Zero => 0b11,
+    };
+    (rc << 10) | (pc << 8) | misc | exceptions
+}
+
+*/
diff --git a/library/compiler-builtins/libm/src/math/arch/mod.rs b/library/compiler-builtins/libm/src/math/arch/mod.rs
index ba859c679d0db..1bed464cfa7d6 100644
--- a/library/compiler-builtins/libm/src/math/arch/mod.rs
+++ b/library/compiler-builtins/libm/src/math/arch/mod.rs
@@ -7,7 +7,7 @@
 
 // Most implementations should be defined here, to ensure they are not made available when
 // soft floats are required.
-#[cfg(arch_enabled)]
+#[cfg(feature = "arch")]
 cfg_if! {
     if #[cfg(all(target_arch = "wasm32", intrinsics_enabled))] {
         mod wasm32;
@@ -41,15 +41,20 @@ cfg_if! {
 }
 
 // There are certain architecture-specific implementations that are needed for correctness
-// even with `force-soft-float`. These are configured here.
+// even with `arch` disabled. These are configured here.
 cfg_if! {
-    if #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))] {
+    if #[cfg(x86_no_sse2)] {
         mod i586;
-        pub use i586::{ceil, floor};
-    }
-}
-cfg_if! {
-    if #[cfg(x86_no_sse)] {
-        pub use i586::{x87_exp10f, x87_exp10, x87_expf, x87_exp, x87_exp2f, x87_exp2};
+        pub use i586::{
+            ceil,
+            floor,
+            rint,
+            x87_exp,
+            x87_exp10,
+            x87_exp10f,
+            x87_exp2,
+            x87_exp2f,
+            x87_expf,
+        };
     }
 }
diff --git a/library/compiler-builtins/libm/src/math/arch/wasm32/fabs.rs b/library/compiler-builtins/libm/src/math/arch/wasm32/fabs.rs
new file mode 100644
index 0000000000000..1f5ff3f28131b
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/wasm32/fabs.rs
@@ -0,0 +1,7 @@
+pub fn fabsf(x: f32) -> f32 {
+    x.abs()
+}
+
+pub fn fabs(x: f64) -> f64 {
+    x.abs()
+}
diff --git a/library/compiler-builtins/libm/src/math/arch/wasm32/mod.rs b/library/compiler-builtins/libm/src/math/arch/wasm32/mod.rs
new file mode 100644
index 0000000000000..b262e0108155e
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/wasm32/mod.rs
@@ -0,0 +1,10 @@
+//! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which
+//! are significantly faster than soft float operations.
+
+mod fabs;
+mod rounding;
+mod sqrt;
+
+pub use fabs::{fabs, fabsf};
+pub use rounding::{ceil, ceilf, floor, floorf, rint, rintf, trunc, truncf};
+pub use sqrt::{sqrt, sqrtf};
diff --git a/library/compiler-builtins/libm/src/math/arch/wasm32.rs b/library/compiler-builtins/libm/src/math/arch/wasm32/rounding.rs
similarity index 76%
rename from library/compiler-builtins/libm/src/math/arch/wasm32.rs
rename to library/compiler-builtins/libm/src/math/arch/wasm32/rounding.rs
index de80c8a581726..1c8914e4561c1 100644
--- a/library/compiler-builtins/libm/src/math/arch/wasm32.rs
+++ b/library/compiler-builtins/libm/src/math/arch/wasm32/rounding.rs
@@ -1,50 +1,34 @@
 //! Wasm has builtins for simple float operations. Use the unstable `core::arch` intrinsics which
 //! are significantly faster than soft float operations.
 
-pub fn ceil(x: f64) -> f64 {
-    core::arch::wasm32::f64_ceil(x)
-}
-
 pub fn ceilf(x: f32) -> f32 {
     core::arch::wasm32::f32_ceil(x)
 }
 
-pub fn fabs(x: f64) -> f64 {
-    x.abs()
-}
-
-pub fn fabsf(x: f32) -> f32 {
-    x.abs()
-}
-
-pub fn floor(x: f64) -> f64 {
-    core::arch::wasm32::f64_floor(x)
+pub fn ceil(x: f64) -> f64 {
+    core::arch::wasm32::f64_ceil(x)
 }
 
 pub fn floorf(x: f32) -> f32 {
     core::arch::wasm32::f32_floor(x)
 }
 
-pub fn rint(x: f64) -> f64 {
-    core::arch::wasm32::f64_nearest(x)
+pub fn floor(x: f64) -> f64 {
+    core::arch::wasm32::f64_floor(x)
 }
 
 pub fn rintf(x: f32) -> f32 {
     core::arch::wasm32::f32_nearest(x)
 }
 
-pub fn sqrt(x: f64) -> f64 {
-    core::arch::wasm32::f64_sqrt(x)
+pub fn rint(x: f64) -> f64 {
+    core::arch::wasm32::f64_nearest(x)
 }
 
-pub fn sqrtf(x: f32) -> f32 {
-    core::arch::wasm32::f32_sqrt(x)
+pub fn truncf(x: f32) -> f32 {
+    core::arch::wasm32::f32_trunc(x)
 }
 
 pub fn trunc(x: f64) -> f64 {
     core::arch::wasm32::f64_trunc(x)
 }
-
-pub fn truncf(x: f32) -> f32 {
-    core::arch::wasm32::f32_trunc(x)
-}
diff --git a/library/compiler-builtins/libm/src/math/arch/wasm32/sqrt.rs b/library/compiler-builtins/libm/src/math/arch/wasm32/sqrt.rs
new file mode 100644
index 0000000000000..1d73e4ddcfe96
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/wasm32/sqrt.rs
@@ -0,0 +1,7 @@
+pub fn sqrtf(x: f32) -> f32 {
+    core::arch::wasm32::f32_sqrt(x)
+}
+
+pub fn sqrt(x: f64) -> f64 {
+    core::arch::wasm32::f64_sqrt(x)
+}
diff --git a/library/compiler-builtins/libm/src/math/arch/x86/detect.rs b/library/compiler-builtins/libm/src/math/arch/x86/detect.rs
index ca785470b806d..983818b5b5e47 100644
--- a/library/compiler-builtins/libm/src/math/arch/x86/detect.rs
+++ b/library/compiler-builtins/libm/src/math/arch/x86/detect.rs
@@ -1,5 +1,5 @@
-// Using runtime feature detection requires atomics. Currently there are no x86 targets
-// that support sse but not `AtomicPtr`.
+//! Using runtime feature detection requires atomics. Currently there are no x86 targets
+//! that support sse but not `AtomicPtr`.
 
 #[cfg(target_arch = "x86")]
 use core::arch::x86::{__cpuid, __cpuid_count, _xgetbv, CpuidResult};
diff --git a/library/compiler-builtins/libm/src/math/arch/x86/mod.rs b/library/compiler-builtins/libm/src/math/arch/x86/mod.rs
new file mode 100644
index 0000000000000..279bf35b9c9e0
--- /dev/null
+++ b/library/compiler-builtins/libm/src/math/arch/x86/mod.rs
@@ -0,0 +1,8 @@
+//! Architecture-specific support for x86-32 with SSE2 (i686) and x86-64.
+
+mod detect;
+mod fma;
+mod sqrt;
+
+pub use fma::{fma, fmaf};
+pub use sqrt::{sqrt, sqrtf};
diff --git a/library/compiler-builtins/libm/src/math/arch/x86.rs b/library/compiler-builtins/libm/src/math/arch/x86/sqrt.rs
similarity index 85%
rename from library/compiler-builtins/libm/src/math/arch/x86.rs
rename to library/compiler-builtins/libm/src/math/arch/x86/sqrt.rs
index 454aa285074d6..d172e1239f1a9 100644
--- a/library/compiler-builtins/libm/src/math/arch/x86.rs
+++ b/library/compiler-builtins/libm/src/math/arch/x86/sqrt.rs
@@ -1,10 +1,3 @@
-//! Architecture-specific support for x86-32 and x86-64 with SSE2
-
-mod detect;
-mod fma;
-
-pub use fma::{fma, fmaf};
-
 pub fn sqrtf(mut x: f32) -> f32 {
     // SAFETY: `sqrtss` is part of `sse2`, which this module is gated behind. It has no memory
     // access or side effects.
diff --git a/library/compiler-builtins/libm/src/math/atan2.rs b/library/compiler-builtins/libm/src/math/atan2.rs
index 51456e409b8cc..19be7584bbf07 100644
--- a/library/compiler-builtins/libm/src/math/atan2.rs
+++ b/library/compiler-builtins/libm/src/math/atan2.rs
@@ -119,7 +119,7 @@ mod tests {
     use super::*;
 
     #[test]
-    #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")]
+    #[cfg_attr(x86_no_sse2, ignore = "FIXME(i586): possible incorrect rounding")]
     fn sanity_check() {
         assert_eq!(atan2(0.0, 1.0), 0.0);
         assert_eq!(atan2(0.0, -1.0), PI);
diff --git a/library/compiler-builtins/libm/src/math/cbrt.rs b/library/compiler-builtins/libm/src/math/cbrt.rs
index e905e15f13fbe..b6d24c807eb00 100644
--- a/library/compiler-builtins/libm/src/math/cbrt.rs
+++ b/library/compiler-builtins/libm/src/math/cbrt.rs
@@ -208,7 +208,7 @@ mod tests {
 
     #[test]
     fn spot_checks() {
-        if !cfg!(x86_no_sse) {
+        if !cfg!(x86_no_sse2) {
             // Exposes a rounding mode problem. Ignored on i586 because of inaccurate FMA.
             assert_biteq!(
                 cbrt(f64::from_bits(0xf7f792b28f600000)),
diff --git a/library/compiler-builtins/libm/src/math/ceil.rs b/library/compiler-builtins/libm/src/math/ceil.rs
index 2cac49f29ba97..c8fb17e7ddca1 100644
--- a/library/compiler-builtins/libm/src/math/ceil.rs
+++ b/library/compiler-builtins/libm/src/math/ceil.rs
@@ -1,10 +1,12 @@
+use super::generic;
+
 /// Ceil (f16)
 ///
 /// Finds the nearest integer greater than or equal to `x`.
 #[cfg(f16_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf16(x: f16) -> f16 {
-    super::generic::ceil(x)
+    generic::ceil_status(x).val
 }
 
 /// Ceil (f32)
@@ -18,7 +20,7 @@ pub fn ceilf(x: f32) -> f32 {
         args: x,
     }
 
-    super::generic::ceil(x)
+    generic::ceil_status(x).val
 }
 
 /// Ceil (f64)
@@ -29,11 +31,11 @@ pub fn ceil(x: f64) -> f64 {
     select_implementation! {
         name: ceil,
         use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
-    super::generic::ceil(x)
+    generic::ceil_status(x).val
 }
 
 /// Ceil (f128)
@@ -42,5 +44,105 @@ pub fn ceil(x: f64) -> f64 {
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn ceilf128(x: f128) -> f128 {
-    super::generic::ceil(x)
+    generic::ceil_status(x).val
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, FpResult, Hex, Status};
+
+    macro_rules! cases {
+        ($f:ty) => {
+            [
+                // roundtrip
+                (0.0, 0.0, Status::OK),
+                (-0.0, -0.0, Status::OK),
+                (1.0, 1.0, Status::OK),
+                (-1.0, -1.0, Status::OK),
+                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
+                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
+                // with rounding
+                (0.1, 1.0, Status::INEXACT),
+                (-0.1, -0.0, Status::INEXACT),
+                (0.5, 1.0, Status::INEXACT),
+                (-0.5, -0.0, Status::INEXACT),
+                (0.9, 1.0, Status::INEXACT),
+                (-0.9, -0.0, Status::INEXACT),
+                (1.1, 2.0, Status::INEXACT),
+                (-1.1, -1.0, Status::INEXACT),
+                (1.5, 2.0, Status::INEXACT),
+                (-1.5, -1.0, Status::INEXACT),
+                (1.9, 2.0, Status::INEXACT),
+                (-1.9, -1.0, Status::INEXACT),
+            ]
+        };
+    }
+
+    #[track_caller]
+    fn check<F: Float>(f: fn(F) -> F, cases: &[(F, F, Status)]) {
+        for &(x, exp_res, exp_stat) in cases {
+            let FpResult { val, status } = generic::ceil_status(x);
+            assert_biteq!(val, exp_res, "generic::ceil_status({x:?}) ({})", Hex(x));
+            assert_eq!(
+                status,
+                exp_stat,
+                "{x:?} {} -> {exp_res:?} {}",
+                Hex(x),
+                Hex(exp_res)
+            );
+            let val = f(x);
+            assert_biteq!(val, exp_res, "ceil({x:?}) ({})", Hex(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        check::<f16>(ceilf16, &cases!(f16));
+        check::<f16>(
+            ceilf16,
+            &[
+                (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
+                (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f32() {
+        check::<f32>(ceilf, &cases!(f32));
+        check::<f32>(
+            ceilf,
+            &[
+                (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
+                (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f64() {
+        check::<f64>(ceil, &cases!(f64));
+        check::<f64>(
+            ceil,
+            &[
+                (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
+                (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        check::<f128>(ceilf128, &cases!(f128));
+        check::<f128>(
+            ceilf128,
+            &[
+                (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
+                (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
+            ],
+        );
+    }
 }
diff --git a/library/compiler-builtins/libm/src/math/exp.rs b/library/compiler-builtins/libm/src/math/exp.rs
index cb939ad5d8bf2..05203985bf313 100644
--- a/library/compiler-builtins/libm/src/math/exp.rs
+++ b/library/compiler-builtins/libm/src/math/exp.rs
@@ -85,7 +85,7 @@ const P5: f64 = 4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */
 pub fn exp(mut x: f64) -> f64 {
     select_implementation! {
         name: x87_exp,
-        use_arch_required: x86_no_sse,
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
diff --git a/library/compiler-builtins/libm/src/math/exp10.rs b/library/compiler-builtins/libm/src/math/exp10.rs
index e0af1945b922a..452b29a2c3d14 100644
--- a/library/compiler-builtins/libm/src/math/exp10.rs
+++ b/library/compiler-builtins/libm/src/math/exp10.rs
@@ -11,7 +11,7 @@ const P10: &[f64] = &[
 pub fn exp10(x: f64) -> f64 {
     select_implementation! {
         name: x87_exp10,
-        use_arch_required: x86_no_sse,
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
diff --git a/library/compiler-builtins/libm/src/math/exp10f.rs b/library/compiler-builtins/libm/src/math/exp10f.rs
index f0a311c2d1915..4b9e949fbbe97 100644
--- a/library/compiler-builtins/libm/src/math/exp10f.rs
+++ b/library/compiler-builtins/libm/src/math/exp10f.rs
@@ -11,7 +11,7 @@ const P10: &[f32] = &[
 pub fn exp10f(x: f32) -> f32 {
     select_implementation! {
         name: x87_exp10f,
-        use_arch_required: x86_no_sse,
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
diff --git a/library/compiler-builtins/libm/src/math/exp2.rs b/library/compiler-builtins/libm/src/math/exp2.rs
index d4c9e96652000..cc2aa434c4e9d 100644
--- a/library/compiler-builtins/libm/src/math/exp2.rs
+++ b/library/compiler-builtins/libm/src/math/exp2.rs
@@ -326,7 +326,7 @@ static TBL: [u64; TBLSIZE * 2] = [
 pub fn exp2(mut x: f64) -> f64 {
     select_implementation! {
         name: x87_exp2,
-        use_arch_required: x86_no_sse,
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
diff --git a/library/compiler-builtins/libm/src/math/exp2f.rs b/library/compiler-builtins/libm/src/math/exp2f.rs
index ceff6822c5969..44872d74b0ca8 100644
--- a/library/compiler-builtins/libm/src/math/exp2f.rs
+++ b/library/compiler-builtins/libm/src/math/exp2f.rs
@@ -77,7 +77,7 @@ static EXP2FT: [u64; TBLSIZE] = [
 pub fn exp2f(mut x: f32) -> f32 {
     select_implementation! {
         name: x87_exp2f,
-        use_arch_required: x86_no_sse,
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
diff --git a/library/compiler-builtins/libm/src/math/expf.rs b/library/compiler-builtins/libm/src/math/expf.rs
index 5541ab79a9c14..d1185ff2c4621 100644
--- a/library/compiler-builtins/libm/src/math/expf.rs
+++ b/library/compiler-builtins/libm/src/math/expf.rs
@@ -34,7 +34,7 @@ const P2: f32 = -2.7667332906e-3; /* -0xb55215.0p-32 */
 pub fn expf(mut x: f32) -> f32 {
     select_implementation! {
         name: x87_expf,
-        use_arch_required: x86_no_sse,
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
diff --git a/library/compiler-builtins/libm/src/math/floor.rs b/library/compiler-builtins/libm/src/math/floor.rs
index 7241c427f6463..2b1e31c0564b0 100644
--- a/library/compiler-builtins/libm/src/math/floor.rs
+++ b/library/compiler-builtins/libm/src/math/floor.rs
@@ -1,10 +1,12 @@
+use super::generic;
+
 /// Floor (f16)
 ///
 /// Finds the nearest integer less than or equal to `x`.
 #[cfg(f16_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf16(x: f16) -> f16 {
-    return super::generic::floor(x);
+    return generic::floor_status(x).val;
 }
 
 /// Floor (f64)
@@ -15,11 +17,11 @@ pub fn floor(x: f64) -> f64 {
     select_implementation! {
         name: floor,
         use_arch: all(target_arch = "wasm32", intrinsics_enabled),
-        use_arch_required: all(target_arch = "x86", not(target_feature = "sse2")),
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
-    return super::generic::floor(x);
+    return generic::floor_status(x).val;
 }
 
 /// Floor (f32)
@@ -33,7 +35,7 @@ pub fn floorf(x: f32) -> f32 {
         args: x,
     }
 
-    return super::generic::floor(x);
+    return generic::floor_status(x).val;
 }
 
 /// Floor (f128)
@@ -42,5 +44,105 @@ pub fn floorf(x: f32) -> f32 {
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn floorf128(x: f128) -> f128 {
-    return super::generic::floor(x);
+    return generic::floor_status(x).val;
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, FpResult, Hex, Status};
+
+    macro_rules! cases {
+        ($f:ty) => {
+            [
+                // roundtrip
+                (0.0, 0.0, Status::OK),
+                (-0.0, -0.0, Status::OK),
+                (1.0, 1.0, Status::OK),
+                (-1.0, -1.0, Status::OK),
+                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
+                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
+                // with rounding
+                (0.1, 0.0, Status::INEXACT),
+                (-0.1, -1.0, Status::INEXACT),
+                (0.5, 0.0, Status::INEXACT),
+                (-0.5, -1.0, Status::INEXACT),
+                (0.9, 0.0, Status::INEXACT),
+                (-0.9, -1.0, Status::INEXACT),
+                (1.1, 1.0, Status::INEXACT),
+                (-1.1, -2.0, Status::INEXACT),
+                (1.5, 1.0, Status::INEXACT),
+                (-1.5, -2.0, Status::INEXACT),
+                (1.9, 1.0, Status::INEXACT),
+                (-1.9, -2.0, Status::INEXACT),
+            ]
+        };
+    }
+
+    #[track_caller]
+    fn check<F: Float>(f: fn(F) -> F, cases: &[(F, F, Status)]) {
+        for &(x, exp_res, exp_stat) in cases {
+            let FpResult { val, status } = generic::floor_status(x);
+            assert_biteq!(val, exp_res, "generic::floor_status({x:?}) ({})", Hex(x));
+            assert_eq!(
+                status,
+                exp_stat,
+                "{x:?} {} -> {exp_res:?} {}",
+                Hex(x),
+                Hex(exp_res)
+            );
+            let val = f(x);
+            assert_biteq!(val, exp_res, "floor({x:?}) ({})", Hex(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        check::<f16>(floorf16, &cases!(f16));
+        check::<f16>(
+            floorf16,
+            &[
+                (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
+                (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f32() {
+        check::<f32>(floorf, &cases!(f32));
+        check::<f32>(
+            floorf,
+            &[
+                (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
+                (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f64() {
+        check::<f64>(floor, &cases!(f64));
+        check::<f64>(
+            floor,
+            &[
+                (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
+                (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        check::<f128>(floorf128, &cases!(f128));
+        check::<f128>(
+            floorf128,
+            &[
+                (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
+                (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
+            ],
+        );
+    }
 }
diff --git a/library/compiler-builtins/libm/src/math/fma.rs b/library/compiler-builtins/libm/src/math/fma.rs
index 70e6de768fab0..7f8120650a8d9 100644
--- a/library/compiler-builtins/libm/src/math/fma.rs
+++ b/library/compiler-builtins/libm/src/math/fma.rs
@@ -148,7 +148,7 @@ mod tests {
 
         let result = fma(-0.992, -0.992, -0.992);
         //force rounding to storage format on x87 to prevent superious errors.
-        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        #[cfg(x86_no_sse2)]
         let result = force_eval!(result);
         assert_eq!(result, -0.007936000000000007,);
     }
diff --git a/library/compiler-builtins/libm/src/math/fmin_fmax.rs b/library/compiler-builtins/libm/src/math/fmin_fmax.rs
index ead9e6599f1be..64ad79928f7a0 100644
--- a/library/compiler-builtins/libm/src/math/fmin_fmax.rs
+++ b/library/compiler-builtins/libm/src/math/fmin_fmax.rs
@@ -77,8 +77,7 @@ pub fn fmaxf128(x: f128, y: f128) -> f128 {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::hex_float::Hexi;
-    use crate::support::{Float, Hexf};
+    use crate::support::{Float, Hex};
 
     fn fmin_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         // Note that (YaN, sNaN) and (sNaN, YaN) results differ from 754-2008. This is intentional,
@@ -171,7 +170,7 @@ mod tests {
 
         for (x, y, res) in cases {
             let val = f(x, y);
-            assert_biteq!(val, res, "fmin({}, {})", Hexf(x), Hexf(y));
+            assert_biteq!(val, res, "fmin({}, {})", Hex(x), Hex(y));
         }
 
         // Ordering between zeros does not matter
@@ -314,10 +313,10 @@ mod tests {
                 val,
                 res,
                 "fmax({}, {}) ({}, {})",
-                Hexf(x),
-                Hexf(y),
-                Hexi(x.to_bits()),
-                Hexi(y.to_bits()),
+                Hex(x),
+                Hex(y),
+                Hex(x.to_bits()),
+                Hex(y.to_bits()),
             );
         }
 
diff --git a/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs b/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs
index ffc724e3a8d74..59d9af317674f 100644
--- a/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs
+++ b/library/compiler-builtins/libm/src/math/fminimum_fmaximum.rs
@@ -69,8 +69,7 @@ pub fn fmaximumf128(x: f128, y: f128) -> f128 {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::hex_float::Hexi;
-    use crate::support::{Float, Hexf};
+    use crate::support::{Float, Hex};
 
     fn fminimum_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
@@ -132,10 +131,10 @@ mod tests {
                 val,
                 res,
                 "fminimum({}, {}) ({}, {})",
-                Hexf(x),
-                Hexf(y),
-                Hexi(x.to_bits()),
-                Hexi(y.to_bits()),
+                Hex(x),
+                Hex(y),
+                Hex(x.to_bits()),
+                Hex(y.to_bits()),
             );
         }
 
@@ -264,10 +263,10 @@ mod tests {
                 val,
                 res,
                 "fmaximum({}, {}) ({}, {})",
-                Hexf(x),
-                Hexf(y),
-                Hexi(x.to_bits()),
-                Hexi(y.to_bits()),
+                Hex(x),
+                Hex(y),
+                Hex(x.to_bits()),
+                Hex(y.to_bits()),
             );
         }
 
diff --git a/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs b/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs
index 3157f8a3fee8c..51b20b736d2ca 100644
--- a/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs
+++ b/library/compiler-builtins/libm/src/math/fminimum_fmaximum_num.rs
@@ -69,8 +69,7 @@ pub fn fmaximum_numf128(x: f128, y: f128) -> f128 {
 #[cfg(test)]
 mod tests {
     use super::*;
-    use crate::support::hex_float::Hexi;
-    use crate::support::{Float, Hexf};
+    use crate::support::{Float, Hex};
 
     fn fminimum_num_spec_test<F: Float>(f: impl Fn(F, F) -> F) {
         let cases = [
@@ -167,10 +166,10 @@ mod tests {
                 actual,
                 expected,
                 "fminimum_num({}, {}) ({}, {})",
-                Hexf(x),
-                Hexf(y),
-                Hexi(x.to_bits()),
-                Hexi(y.to_bits()),
+                Hex(x),
+                Hex(y),
+                Hex(x.to_bits()),
+                Hex(y.to_bits()),
             );
         }
 
@@ -312,10 +311,10 @@ mod tests {
                 actual,
                 expected,
                 "fmaximum_num({}, {}) ({}, {})",
-                Hexf(x),
-                Hexf(y),
-                Hexi(x.to_bits()),
-                Hexi(y.to_bits()),
+                Hex(x),
+                Hex(y),
+                Hex(x.to_bits()),
+                Hex(y.to_bits()),
             );
         }
 
diff --git a/library/compiler-builtins/libm/src/math/generic/ceil.rs b/library/compiler-builtins/libm/src/math/generic/ceil.rs
index 5584f6503ef58..944cb4d4c7259 100644
--- a/library/compiler-builtins/libm/src/math/generic/ceil.rs
+++ b/library/compiler-builtins/libm/src/math/generic/ceil.rs
@@ -9,11 +9,6 @@
 
 use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
 
-#[inline]
-pub fn ceil<F: Float>(x: F) -> F {
-    ceil_status(x).val
-}
-
 #[inline]
 pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
@@ -66,89 +61,3 @@ pub fn ceil_status<F: Float>(x: F) -> FpResult<F> {
 
     FpResult::new(res, status)
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    macro_rules! cases {
-        ($f:ty) => {
-            [
-                // roundtrip
-                (0.0, 0.0, Status::OK),
-                (-0.0, -0.0, Status::OK),
-                (1.0, 1.0, Status::OK),
-                (-1.0, -1.0, Status::OK),
-                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
-                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
-                // with rounding
-                (0.1, 1.0, Status::INEXACT),
-                (-0.1, -0.0, Status::INEXACT),
-                (0.5, 1.0, Status::INEXACT),
-                (-0.5, -0.0, Status::INEXACT),
-                (0.9, 1.0, Status::INEXACT),
-                (-0.9, -0.0, Status::INEXACT),
-                (1.1, 2.0, Status::INEXACT),
-                (-1.1, -1.0, Status::INEXACT),
-                (1.5, 2.0, Status::INEXACT),
-                (-1.5, -1.0, Status::INEXACT),
-                (1.9, 2.0, Status::INEXACT),
-                (-1.9, -1.0, Status::INEXACT),
-            ]
-        };
-    }
-
-    #[track_caller]
-    fn check<F: Float>(cases: &[(F, F, Status)]) {
-        for &(x, exp_res, exp_stat) in cases {
-            let FpResult { val, status } = ceil_status(x);
-            assert_biteq!(val, exp_res, "{x:?} {}", Hexf(x));
-            assert_eq!(
-                status,
-                exp_stat,
-                "{x:?} {} -> {exp_res:?} {}",
-                Hexf(x),
-                Hexf(exp_res)
-            );
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn check_f16() {
-        check::<f16>(&cases!(f16));
-        check::<f16>(&[
-            (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
-            (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    fn check_f32() {
-        check::<f32>(&cases!(f32));
-        check::<f32>(&[
-            (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
-            (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    fn check_f64() {
-        check::<f64>(&cases!(f64));
-        check::<f64>(&[
-            (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
-            (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        check::<f128>(&cases!(f128));
-        check::<f128>(&[
-            (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
-            (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
-        ]);
-    }
-}
diff --git a/library/compiler-builtins/libm/src/math/generic/floor.rs b/library/compiler-builtins/libm/src/math/generic/floor.rs
index 7045229c0c75a..a99d192831e3c 100644
--- a/library/compiler-builtins/libm/src/math/generic/floor.rs
+++ b/library/compiler-builtins/libm/src/math/generic/floor.rs
@@ -9,11 +9,6 @@
 
 use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
 
-#[inline]
-pub fn floor<F: Float>(x: F) -> F {
-    floor_status(x).val
-}
-
 #[inline]
 pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
     let zero = IntTy::<F>::ZERO;
@@ -58,89 +53,3 @@ pub fn floor_status<F: Float>(x: F) -> FpResult<F> {
 
     FpResult::new(res, Status::INEXACT)
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    macro_rules! cases {
-        ($f:ty) => {
-            [
-                // roundtrip
-                (0.0, 0.0, Status::OK),
-                (-0.0, -0.0, Status::OK),
-                (1.0, 1.0, Status::OK),
-                (-1.0, -1.0, Status::OK),
-                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
-                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
-                // with rounding
-                (0.1, 0.0, Status::INEXACT),
-                (-0.1, -1.0, Status::INEXACT),
-                (0.5, 0.0, Status::INEXACT),
-                (-0.5, -1.0, Status::INEXACT),
-                (0.9, 0.0, Status::INEXACT),
-                (-0.9, -1.0, Status::INEXACT),
-                (1.1, 1.0, Status::INEXACT),
-                (-1.1, -2.0, Status::INEXACT),
-                (1.5, 1.0, Status::INEXACT),
-                (-1.5, -2.0, Status::INEXACT),
-                (1.9, 1.0, Status::INEXACT),
-                (-1.9, -2.0, Status::INEXACT),
-            ]
-        };
-    }
-
-    #[track_caller]
-    fn check<F: Float>(cases: &[(F, F, Status)]) {
-        for &(x, exp_res, exp_stat) in cases {
-            let FpResult { val, status } = floor_status(x);
-            assert_biteq!(val, exp_res, "{x:?} {}", Hexf(x));
-            assert_eq!(
-                status,
-                exp_stat,
-                "{x:?} {} -> {exp_res:?} {}",
-                Hexf(x),
-                Hexf(exp_res)
-            );
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn check_f16() {
-        check::<f16>(&cases!(f16));
-        check::<f16>(&[
-            (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
-            (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    fn check_f32() {
-        check::<f32>(&cases!(f32));
-        check::<f32>(&[
-            (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
-            (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    fn check_f64() {
-        check::<f64>(&cases!(f64));
-        check::<f64>(&[
-            (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
-            (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        check::<f128>(&cases!(f128));
-        check::<f128>(&[
-            (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
-            (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
-        ]);
-    }
-}
diff --git a/library/compiler-builtins/libm/src/math/generic/mod.rs b/library/compiler-builtins/libm/src/math/generic/mod.rs
index 114fcddf516e5..7a26295e39077 100644
--- a/library/compiler-builtins/libm/src/math/generic/mod.rs
+++ b/library/compiler-builtins/libm/src/math/generic/mod.rs
@@ -1,5 +1,11 @@
-// Note: generic functions are marked `#[inline]` because, even though generic functions are
-// typically inlined, this does not seem to always be the case.
+//! Generic implementations that are shared by multiple types.
+//!
+//! Implementation and usage notes:
+//!
+//! * Generic functions are marked `#[inline]` because, even though generic functions are
+//!   typically inlined, we seem to occasionally run into exceptions.
+//! * Tests usually live wherever the functions are consumed (e.g. `src/ceil`) so they can be
+//!   reused to test arch-specific implementations.
 
 mod ceil;
 mod copysign;
@@ -23,11 +29,11 @@ mod scalbn;
 mod sqrt;
 mod trunc;
 
-pub use ceil::ceil;
+pub use ceil::ceil_status;
 pub use copysign::copysign;
 pub use fabs::fabs;
 pub use fdim::fdim;
-pub use floor::floor;
+pub use floor::floor_status;
 pub use fma::fma_round;
 pub use fma_wide::fma_wide_round;
 pub use fmax::fmax;
@@ -39,8 +45,10 @@ pub use fminimum_num::fminimum_num;
 pub use fmod::fmod;
 pub use frexp::frexp;
 pub use ilogb::ilogb;
-pub use rint::rint_round;
+pub use rint::rint_status;
 pub use round::round;
 pub use scalbn::scalbn;
-pub use sqrt::sqrt;
-pub use trunc::trunc;
+#[cfg(test)]
+pub use sqrt::SqrtHelper;
+pub use sqrt::sqrt_round;
+pub use trunc::trunc_status;
diff --git a/library/compiler-builtins/libm/src/math/generic/rint.rs b/library/compiler-builtins/libm/src/math/generic/rint.rs
index c5bc27d3de6bc..aa3d94e9a30d8 100644
--- a/library/compiler-builtins/libm/src/math/generic/rint.rs
+++ b/library/compiler-builtins/libm/src/math/generic/rint.rs
@@ -1,12 +1,12 @@
 /* SPDX-License-Identifier: MIT */
 /* origin: musl src/math/rint.c */
 
-use crate::support::{Float, FpResult, Round};
+use crate::support::{Float, FpResult, Status};
 
 /// IEEE 754-2019 `roundToIntegralExact`, which respects rounding mode and raises inexact if
 /// applicable.
 #[inline]
-pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
+pub fn rint_status<F: Float>(x: F) -> FpResult<F> {
     let toint = F::ONE / F::EPSILON;
     let e = x.ex();
     let positive = x.is_sign_positive();
@@ -14,7 +14,7 @@ pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
     // On i386 `force_eval!` must be used to force rounding via storage to memory. Otherwise,
     // the excess precission from x87 would cause an incorrect final result.
     let force = |x| {
-        if cfg!(x86_no_sse) && (F::BITS == 32 || F::BITS == 64) {
+        if cfg!(x86_no_sse2) && (F::BITS == 32 || F::BITS == 64) {
             force_eval!(x)
         } else {
             x
@@ -41,90 +41,10 @@ pub fn rint_round<F: Float>(x: F, _round: Round) -> FpResult<F> {
         }
     };
 
-    FpResult::ok(res)
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::{Hexf, Status};
-
-    fn spec_test<F: Float>(cases: &[(F, F, Status)]) {
-        let roundtrip = [
-            F::ZERO,
-            F::ONE,
-            F::NEG_ONE,
-            F::NEG_ZERO,
-            F::INFINITY,
-            F::NEG_INFINITY,
-        ];
-
-        for x in roundtrip {
-            let FpResult { val, status } = rint_round(x, Round::Nearest);
-            assert_biteq!(val, x, "rint_round({})", Hexf(x));
-            assert_eq!(status, Status::OK, "{}", Hexf(x));
-        }
-
-        for &(x, res, res_stat) in cases {
-            let FpResult { val, status } = rint_round(x, Round::Nearest);
-            assert_biteq!(val, res, "rint_round({})", Hexf(x));
-            assert_eq!(status, res_stat, "{}", Hexf(x));
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        let cases = [];
-        spec_test::<f16>(&cases);
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        let cases = [
-            (0.1, 0.0, Status::OK),
-            (-0.1, -0.0, Status::OK),
-            (0.5, 0.0, Status::OK),
-            (-0.5, -0.0, Status::OK),
-            (0.9, 1.0, Status::OK),
-            (-0.9, -1.0, Status::OK),
-            (1.1, 1.0, Status::OK),
-            (-1.1, -1.0, Status::OK),
-            (1.5, 2.0, Status::OK),
-            (-1.5, -2.0, Status::OK),
-            (1.9, 2.0, Status::OK),
-            (-1.9, -2.0, Status::OK),
-            (2.8, 3.0, Status::OK),
-            (-2.8, -3.0, Status::OK),
-        ];
-        spec_test::<f32>(&cases);
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        let cases = [
-            (0.1, 0.0, Status::OK),
-            (-0.1, -0.0, Status::OK),
-            (0.5, 0.0, Status::OK),
-            (-0.5, -0.0, Status::OK),
-            (0.9, 1.0, Status::OK),
-            (-0.9, -1.0, Status::OK),
-            (1.1, 1.0, Status::OK),
-            (-1.1, -1.0, Status::OK),
-            (1.5, 2.0, Status::OK),
-            (-1.5, -2.0, Status::OK),
-            (1.9, 2.0, Status::OK),
-            (-1.9, -2.0, Status::OK),
-            (2.8, 3.0, Status::OK),
-            (-2.8, -3.0, Status::OK),
-        ];
-        spec_test::<f64>(&cases);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        let cases = [];
-        spec_test::<f128>(&cases);
-    }
+    let status = if res == x {
+        Status::OK
+    } else {
+        Status::INEXACT
+    };
+    FpResult::new(res, status)
 }
diff --git a/library/compiler-builtins/libm/src/math/generic/round.rs b/library/compiler-builtins/libm/src/math/generic/round.rs
index 16739f01d8775..9cab1e22c2c6c 100644
--- a/library/compiler-builtins/libm/src/math/generic/round.rs
+++ b/library/compiler-builtins/libm/src/math/generic/round.rs
@@ -1,4 +1,4 @@
-use super::{copysign, trunc};
+use super::{copysign, trunc_status};
 use crate::support::{Float, MinInt};
 
 #[inline]
@@ -6,78 +6,5 @@ pub fn round<F: Float>(x: F) -> F {
     let f0p5 = F::from_parts(false, F::EXP_BIAS - 1, F::Int::ZERO); // 0.5
     let f0p25 = F::from_parts(false, F::EXP_BIAS - 2, F::Int::ZERO); // 0.25
 
-    trunc(x + copysign(f0p5 - f0p25 * F::EPSILON, x))
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn zeroes_f16() {
-        assert_biteq!(round(0.0_f16), 0.0_f16);
-        assert_biteq!(round(-0.0_f16), -0.0_f16);
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn sanity_check_f16() {
-        assert_eq!(round(-1.0_f16), -1.0);
-        assert_eq!(round(2.8_f16), 3.0);
-        assert_eq!(round(-0.5_f16), -1.0);
-        assert_eq!(round(0.5_f16), 1.0);
-        assert_eq!(round(-1.5_f16), -2.0);
-        assert_eq!(round(1.5_f16), 2.0);
-    }
-
-    #[test]
-    fn zeroes_f32() {
-        assert_biteq!(round(0.0_f32), 0.0_f32);
-        assert_biteq!(round(-0.0_f32), -0.0_f32);
-    }
-
-    #[test]
-    fn sanity_check_f32() {
-        assert_eq!(round(-1.0_f32), -1.0);
-        assert_eq!(round(2.8_f32), 3.0);
-        assert_eq!(round(-0.5_f32), -1.0);
-        assert_eq!(round(0.5_f32), 1.0);
-        assert_eq!(round(-1.5_f32), -2.0);
-        assert_eq!(round(1.5_f32), 2.0);
-    }
-
-    #[test]
-    fn zeroes_f64() {
-        assert_biteq!(round(0.0_f64), 0.0_f64);
-        assert_biteq!(round(-0.0_f64), -0.0_f64);
-    }
-
-    #[test]
-    fn sanity_check_f64() {
-        assert_eq!(round(-1.0_f64), -1.0);
-        assert_eq!(round(2.8_f64), 3.0);
-        assert_eq!(round(-0.5_f64), -1.0);
-        assert_eq!(round(0.5_f64), 1.0);
-        assert_eq!(round(-1.5_f64), -2.0);
-        assert_eq!(round(1.5_f64), 2.0);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn zeroes_f128() {
-        assert_biteq!(round(0.0_f128), 0.0_f128);
-        assert_biteq!(round(-0.0_f128), -0.0_f128);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn sanity_check_f128() {
-        assert_eq!(round(-1.0_f128), -1.0);
-        assert_eq!(round(2.8_f128), 3.0);
-        assert_eq!(round(-0.5_f128), -1.0);
-        assert_eq!(round(0.5_f128), 1.0);
-        assert_eq!(round(-1.5_f128), -2.0);
-        assert_eq!(round(1.5_f128), 2.0);
-    }
+    trunc_status(x + copysign(f0p5 - f0p25 * F::EPSILON, x)).val
 }
diff --git a/library/compiler-builtins/libm/src/math/generic/sqrt.rs b/library/compiler-builtins/libm/src/math/generic/sqrt.rs
index e97a43d349569..013f6c097487f 100644
--- a/library/compiler-builtins/libm/src/math/generic/sqrt.rs
+++ b/library/compiler-builtins/libm/src/math/generic/sqrt.rs
@@ -45,20 +45,6 @@ use crate::support::{
     CastFrom, CastInto, DInt, Float, FpResult, HInt, Int, IntTy, MinInt, Round, Status, cold_path,
 };
 
-#[inline]
-pub fn sqrt<F>(x: F) -> F
-where
-    F: Float + SqrtHelper,
-    F::Int: HInt,
-    F::Int: From<u8>,
-    F::Int: From<F::ISet2>,
-    F::Int: CastInto<F::ISet1>,
-    F::Int: CastInto<F::ISet2>,
-    u32: CastInto<F::Int>,
-{
-    sqrt_round(x, Round::Nearest).val
-}
-
 #[inline]
 pub fn sqrt_round<F>(x: F, _round: Round) -> FpResult<F>
 where
@@ -164,17 +150,17 @@ where
     let s1_u2: F::ISet1 = ((m_u2) >> (F::BITS - F::ISet1::BITS)).cast();
 
     // Perform iterations, if any, at quarter width (used for `f128`).
-    let (r1_u0, _s1_u2) = goldschmidt::<F, F::ISet1>(r1_u0, s1_u2, F::SET1_ROUNDS, false);
+    let (r1_u0, _s1_u2) = goldschmidt::<F::ISet1>(r1_u0, s1_u2, F::SET1_ROUNDS, false);
 
     // Widen values and perform iterations at half width (used for `f64` and `f128`).
     let r2_u0: F::ISet2 = F::ISet2::from(r1_u0) << (F::ISet2::BITS - F::ISet1::BITS);
     let s2_u2: F::ISet2 = ((m_u2) >> (F::BITS - F::ISet2::BITS)).cast();
-    let (r2_u0, _s2_u2) = goldschmidt::<F, F::ISet2>(r2_u0, s2_u2, F::SET2_ROUNDS, false);
+    let (r2_u0, _s2_u2) = goldschmidt::<F::ISet2>(r2_u0, s2_u2, F::SET2_ROUNDS, false);
 
     // Perform final iterations at full width (used for all float types).
     let r_u0: F::Int = F::Int::from(r2_u0) << (F::BITS - F::ISet2::BITS);
     let s_u2: F::Int = m_u2;
-    let (_r_u0, s_u2) = goldschmidt::<F, F::Int>(r_u0, s_u2, F::FINAL_ROUNDS, true);
+    let (_r_u0, s_u2) = goldschmidt::<F::Int>(r_u0, s_u2, F::FINAL_ROUNDS, true);
 
     // Shift back to mantissa position.
     let mut m = s_u2 >> (F::EXP_BITS - 2);
@@ -250,9 +236,8 @@ fn wmulh<I: HInt>(a: I, b: I) -> I {
 /// Note that performance relies on the optimizer being able to unroll these loops (reasonably
 /// trivial, `count` is a constant when called).
 #[inline]
-fn goldschmidt<F, I>(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I)
+fn goldschmidt<I>(mut r_u0: I, mut s_u2: I, count: u32, final_set: bool) -> (I, I)
 where
-    F: SqrtHelper,
     I: HInt + From<u8>,
 {
     let three_u2 = I::from(0b11u8) << (I::BITS - 2);
@@ -365,175 +350,3 @@ static RSQRT_TAB: [u16; 128] = [
     0xc116, 0xc03c, 0xbf65, 0xbe90, 0xbdbe, 0xbcef, 0xbc23, 0xbb59,
     0xba91, 0xb9cc, 0xb90a, 0xb84a, 0xb78c, 0xb6d0, 0xb617, 0xb560,
 ];
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    /// Test behavior specified in IEEE 754 `squareRoot`.
-    fn spec_test<F>()
-    where
-        F: Float + SqrtHelper,
-        F::Int: HInt,
-        F::Int: From<u8>,
-        F::Int: From<F::ISet2>,
-        F::Int: CastInto<F::ISet1>,
-        F::Int: CastInto<F::ISet2>,
-        u32: CastInto<F::Int>,
-    {
-        // Values that should return a NaN and raise invalid
-        let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN];
-
-        // Values that return unaltered
-        let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY];
-
-        for x in nan {
-            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
-            assert!(val.is_nan());
-            assert!(status == Status::INVALID);
-        }
-
-        for x in roundtrip {
-            let FpResult { val, status } = sqrt_round(x, Round::Nearest);
-            assert_biteq!(val, x);
-            assert!(status == Status::OK);
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn sanity_check_f16() {
-        assert_biteq!(sqrt(100.0f16), 10.0);
-        assert_biteq!(sqrt(4.0f16), 2.0);
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn spec_tests_f16() {
-        spec_test::<f16>();
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    #[allow(clippy::approx_constant)]
-    fn conformance_tests_f16() {
-        let cases = [
-            (f16::PI, 0x3f17_u16),
-            (10000.0_f16, 0x5640_u16),
-            (f16::from_bits(0x0000000f), 0x13bf_u16),
-            (f16::INFINITY, f16::INFINITY.to_bits()),
-        ];
-
-        for (input, output) in cases {
-            assert_biteq!(
-                sqrt(input),
-                f16::from_bits(output),
-                "input: {input:?} ({:#018x})",
-                input.to_bits()
-            );
-        }
-    }
-
-    #[test]
-    fn sanity_check_f32() {
-        assert_biteq!(sqrt(100.0f32), 10.0);
-        assert_biteq!(sqrt(4.0f32), 2.0);
-    }
-
-    #[test]
-    fn spec_tests_f32() {
-        spec_test::<f32>();
-    }
-
-    #[test]
-    #[allow(clippy::approx_constant)]
-    fn conformance_tests_f32() {
-        let cases = [
-            (f32::PI, 0x3fe2dfc5_u32),
-            (10000.0f32, 0x42c80000_u32),
-            (f32::from_bits(0x0000000f), 0x1b2f456f_u32),
-            (f32::INFINITY, f32::INFINITY.to_bits()),
-        ];
-
-        for (input, output) in cases {
-            assert_biteq!(
-                sqrt(input),
-                f32::from_bits(output),
-                "input: {input:?} ({:#018x})",
-                input.to_bits()
-            );
-        }
-    }
-
-    #[test]
-    fn sanity_check_f64() {
-        assert_biteq!(sqrt(100.0f64), 10.0);
-        assert_biteq!(sqrt(4.0f64), 2.0);
-    }
-
-    #[test]
-    fn spec_tests_f64() {
-        spec_test::<f64>();
-    }
-
-    #[test]
-    #[allow(clippy::approx_constant)]
-    fn conformance_tests_f64() {
-        let cases = [
-            (f64::PI, 0x3ffc5bf891b4ef6a_u64),
-            (10000.0, 0x4059000000000000_u64),
-            (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64),
-            (f64::INFINITY, f64::INFINITY.to_bits()),
-        ];
-
-        for (input, output) in cases {
-            assert_biteq!(
-                sqrt(input),
-                f64::from_bits(output),
-                "input: {input:?} ({:#018x})",
-                input.to_bits()
-            );
-        }
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn sanity_check_f128() {
-        assert_biteq!(sqrt(100.0f128), 10.0);
-        assert_biteq!(sqrt(4.0f128), 2.0);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        spec_test::<f128>();
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    #[allow(clippy::approx_constant)]
-    fn conformance_tests_f128() {
-        let cases = [
-            (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128),
-            // 10_000.0, see `f16` for reasoning.
-            (
-                f128::from_bits(0x400c3880000000000000000000000000),
-                0x40059000000000000000000000000000_u128,
-            ),
-            (
-                f128::from_bits(0x0000000f),
-                0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128,
-            ),
-            (f128::INFINITY, f128::INFINITY.to_bits()),
-        ];
-
-        for (input, output) in cases {
-            assert_biteq!(
-                sqrt(input),
-                f128::from_bits(output),
-                "input: {input:?} ({:#018x})",
-                input.to_bits()
-            );
-        }
-    }
-}
diff --git a/library/compiler-builtins/libm/src/math/generic/trunc.rs b/library/compiler-builtins/libm/src/math/generic/trunc.rs
index 7f18eb42e884a..0b252fab94ae1 100644
--- a/library/compiler-builtins/libm/src/math/generic/trunc.rs
+++ b/library/compiler-builtins/libm/src/math/generic/trunc.rs
@@ -3,11 +3,6 @@
 
 use crate::support::{Float, FpResult, Int, IntTy, MinInt, Status};
 
-#[inline]
-pub fn trunc<F: Float>(x: F) -> F {
-    trunc_status(x).val
-}
-
 #[inline]
 pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
     let xi: F::Int = x.to_bits();
@@ -39,89 +34,3 @@ pub fn trunc_status<F: Float>(x: F) -> FpResult<F> {
     // Now zero the bits we need to truncate and return.
     FpResult::new(F::from_bits(xi ^ cleared), status)
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use crate::support::Hexf;
-
-    macro_rules! cases {
-        ($f:ty) => {
-            [
-                // roundtrip
-                (0.0, 0.0, Status::OK),
-                (-0.0, -0.0, Status::OK),
-                (1.0, 1.0, Status::OK),
-                (-1.0, -1.0, Status::OK),
-                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
-                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
-                // with rounding
-                (0.1, 0.0, Status::INEXACT),
-                (-0.1, -0.0, Status::INEXACT),
-                (0.5, 0.0, Status::INEXACT),
-                (-0.5, -0.0, Status::INEXACT),
-                (0.9, 0.0, Status::INEXACT),
-                (-0.9, -0.0, Status::INEXACT),
-                (1.1, 1.0, Status::INEXACT),
-                (-1.1, -1.0, Status::INEXACT),
-                (1.5, 1.0, Status::INEXACT),
-                (-1.5, -1.0, Status::INEXACT),
-                (1.9, 1.0, Status::INEXACT),
-                (-1.9, -1.0, Status::INEXACT),
-            ]
-        };
-    }
-
-    #[track_caller]
-    fn check<F: Float>(cases: &[(F, F, Status)]) {
-        for &(x, exp_res, exp_stat) in cases {
-            let FpResult { val, status } = trunc_status(x);
-            assert_biteq!(val, exp_res, "{x:?} {}", Hexf(x));
-            assert_eq!(
-                status,
-                exp_stat,
-                "{x:?} {} -> {exp_res:?} {}",
-                Hexf(x),
-                Hexf(exp_res)
-            );
-        }
-    }
-
-    #[test]
-    #[cfg(f16_enabled)]
-    fn check_f16() {
-        check::<f16>(&cases!(f16));
-        check::<f16>(&[
-            (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
-            (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    fn check_f32() {
-        check::<f32>(&cases!(f32));
-        check::<f32>(&[
-            (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
-            (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    fn check_f64() {
-        check::<f64>(&cases!(f64));
-        check::<f64>(&[
-            (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
-            (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
-        ]);
-    }
-
-    #[test]
-    #[cfg(f128_enabled)]
-    fn spec_tests_f128() {
-        check::<f128>(&cases!(f128));
-        check::<f128>(&[
-            (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
-            (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
-        ]);
-    }
-}
diff --git a/library/compiler-builtins/libm/src/math/hypot.rs b/library/compiler-builtins/libm/src/math/hypot.rs
index c0b2a19370cd7..e8bd44ca975d5 100644
--- a/library/compiler-builtins/libm/src/math/hypot.rs
+++ b/library/compiler-builtins/libm/src/math/hypot.rs
@@ -1,72 +1,301 @@
+/* SPDX-License-Identifier: MIT */
+/* origin: core-math/src/binary64/hypot/hypot.c
+ * Copyright (c) 2022 Alexei Sibidanov.
+ * Ported to Rust in 2025, TG
+ * Approximate CORE-MATH commit: 8ea8ea35c518
+ */
+
+//! Euclidian distance via the pythagorean theorem (`√(x2 + y2)`).
+//!
+//! Per IEEE 754-2019:
+//!
+//! - Domain: `[−∞, +∞] × [−∞, +∞]`
+//! - `hypot(±0, ±0)` is +0
+//! - `hypot(±∞, qNaN)` is +∞
+//! - `hypot(qNaN, ±∞)` is +∞.
+//! - May raise overflow or underflow
+
 use super::sqrt;
+#[allow(unused_imports)] // msrv compat
+use super::support::Float;
+use super::support::cold_path;
+
+#[cfg_attr(assert_no_panic, no_panic::no_panic)]
+pub fn hypot(x: f64, y: f64) -> f64 {
+    return cr_hypot(x, y);
+}
+
+fn cr_hypot(mut x: f64, mut y: f64) -> f64 {
+    let flag = get_flags();
+
+    let xi = x.to_bits();
+    let yi = y.to_bits();
+
+    let emsk: u64 = 0x7ffu64 << 52;
+    let mut ex: u64 = xi & emsk;
+    let mut ey: u64 = yi & emsk;
+    /* emsk corresponds to the upper bits of NaN and Inf (apart the sign bit) */
+    x = x.abs();
+    y = y.abs();
+    if ex == emsk || ey == emsk {
+        cold_path();
+
+        /* Either x or y is NaN or Inf */
+        let wx: u64 = xi << 1;
+        let wy: u64 = yi << 1;
+        let wm: u64 = emsk << 1;
+
+        let one_inf = (wx == wm) ^ (wy == wm);
+        let one_nan = x.is_nan() ^ y.is_nan();
+
+        // let nqnn: i32 = (((wx >> 52) == 0xfff) ^ ((wy >> 52) == 0xfff)) as i32;
+        // /* ninf is 1 when only one of x and y is +/-Inf
+        // nqnn is 1 when only one of x and y is qNaN
+        // IEEE 754 says that hypot(+/-Inf,qNaN)=hypot(qNaN,+/-Inf)=+Inf. */
+        if one_inf && one_nan {
+            return f64::INFINITY;
+        }
+        return x + y; /* inf, sNaN */
+    }
+
+    let u: f64 = x.max(y);
+    let v: f64 = x.min(y);
+    let mut xd: u64 = u.to_bits();
+    let mut yd: u64 = v.to_bits();
+    ey = yd;
 
-const SPLIT: f64 = 134217728. + 1.; // 0x1p27 + 1 === (2 ^ 27) + 1
+    if ey >> 52 == 0 {
+        cold_path();
 
-fn sq(x: f64) -> (f64, f64) {
-    let xh: f64;
-    let xl: f64;
-    let xc: f64;
+        if yd == 0 {
+            return f64::from_bits(xd);
+        }
 
-    xc = x * SPLIT;
-    xh = x - xc + xc;
-    xl = x - xh;
-    let hi = x * x;
-    let lo = xh * xh - hi + 2. * xh * xl + xl * xl;
-    (hi, lo)
+        ex = xd;
+
+        if ex >> 52 == 0 {
+            cold_path();
+
+            if ex == 0 {
+                return 0.0;
+            }
+
+            return as_hypot_denorm(ex, ey);
+        }
+
+        let nz: u32 = ey.leading_zeros();
+        ey <<= nz - 11;
+        ey &= u64::MAX >> 12;
+        ey = ey.wrapping_sub(((nz as i64 - 12i64) << 52) as u64);
+        let t = ey; // why did they do this?
+        yd = t;
+    }
+
+    let de: u64 = xd.wrapping_sub(yd);
+    if de > (27_u64 << 52) {
+        cold_path();
+        return hf64!("0x1p-27").fma(v, u);
+    }
+
+    let off: i64 = (0x3ff_i64 << 52) - (xd & emsk) as i64;
+    xd = xd.wrapping_add(off as u64);
+    yd = yd.wrapping_add(off as u64);
+    x = f64::from_bits(xd);
+    y = f64::from_bits(yd);
+    let x2: f64 = x * x;
+    let dx2: f64 = x.fma(x, -x2);
+    let y2: f64 = y * y;
+    let dy2: f64 = y.fma(y, -y2);
+    let r2: f64 = x2 + y2;
+    let ir2: f64 = 0.5 / r2;
+    let dr2: f64 = ((x2 - r2) + y2) + (dx2 + dy2);
+    let mut th: f64 = sqrt(r2);
+    let rsqrt: f64 = th * ir2;
+    let dz: f64 = dr2 - th.fma(th, -r2);
+    let mut tl: f64 = rsqrt * dz;
+    th = fasttwosum(th, tl, &mut tl);
+    let mut thd: u64 = th.to_bits();
+    let tld = tl.abs().to_bits();
+    ex = thd;
+    ey = tld;
+    ex &= 0x7ff_u64 << 52;
+    let aidr: u64 = ey.wrapping_add(0x3fe_u64 << 52).wrapping_sub(ex);
+    let mid: u64 = (aidr.wrapping_sub(0x3c90000000000000).wrapping_add(16)) >> 5;
+    if mid == 0 || !(0x39b0000000000000_u64..=0x3c9fffffffffff80_u64).contains(&aidr) {
+        cold_path();
+        thd = as_hypot_hard(x, y, flag).to_bits();
+    }
+    thd = thd.wrapping_sub(off as u64);
+    if thd >= (0x7ff_u64 << 52) {
+        cold_path();
+        return as_hypot_overflow();
+    }
+
+    f64::from_bits(thd)
 }
 
-#[cfg_attr(assert_no_panic, no_panic::no_panic)]
-pub fn hypot(mut x: f64, mut y: f64) -> f64 {
-    let x1p700 = f64::from_bits(0x6bb0000000000000); // 0x1p700 === 2 ^ 700
-    let x1p_700 = f64::from_bits(0x1430000000000000); // 0x1p-700 === 2 ^ -700
-
-    let mut uxi = x.to_bits();
-    let mut uyi = y.to_bits();
-    let uti;
-    let ex: i64;
-    let ey: i64;
-    let mut z: f64;
-
-    /* arrange |x| >= |y| */
-    uxi &= -1i64 as u64 >> 1;
-    uyi &= -1i64 as u64 >> 1;
-    if uxi < uyi {
-        uti = uxi;
-        uxi = uyi;
-        uyi = uti;
+fn fasttwosum(x: f64, y: f64, e: &mut f64) -> f64 {
+    let s: f64 = x + y;
+    let z: f64 = s - x;
+    *e = y - z;
+    s
+}
+
+fn as_hypot_overflow() -> f64 {
+    let z: f64 = hf64!("0x1.fffffffffffffp1023");
+    let f = z + z;
+    if f > z {
+        // errno = ERANGE
+    }
+    f
+}
+
+/// Here the square root is refined by Newton iterations: x^2+y^2 is exact
+/// and fits in a 128-bit integer, so the approximation is squared (which
+/// also fits in a 128-bit integer), compared and adjusted if necessary using
+/// the exact value of x^2+y^2.
+fn as_hypot_hard(x: f64, y: f64, flag: FExcept) -> f64 {
+    let op: f64 = 1.0 + hf64!("0x1p-54");
+    let om: f64 = 1.0 - hf64!("0x1p-54");
+    let mut xi: u64 = x.to_bits();
+    let yi: u64 = y.to_bits();
+    let mut bm: u64 = (xi & (u64::MAX >> 12)) | 1u64 << 52;
+    let mut lm: u64 = (yi & (u64::MAX >> 12)) | 1u64 << 52;
+    let be: i32 = (xi >> 52) as i32;
+    let le: i32 = (yi >> 52) as i32;
+    let ri: u64 = sqrt(x * x + y * y).to_bits();
+    let bs: i32 = 2;
+    let mut rm: u64 = ri & (u64::MAX >> 12);
+    let mut re: i32 = (ri >> 52) as i32 - 0x3ff;
+    rm |= 1u64 << 52;
+
+    for _ in 0..3 {
+        if rm == 1u64 << 52 {
+            rm = u64::MAX >> 11;
+            re -= 1;
+        } else {
+            cold_path();
+            rm -= 1;
+        }
+    }
+
+    bm <<= bs;
+    let mut m2: u64 = bm.wrapping_mul(bm);
+    let de: i32 = be - le;
+    let mut ls: i32 = bs - de;
+
+    if ls >= 0 {
+        lm <<= ls;
+        m2 = m2.wrapping_add(lm.wrapping_mul(lm));
+    } else {
+        cold_path();
+        let lm2: u128 = (lm as u128) * (lm as u128);
+        ls *= 2;
+        m2 = m2.wrapping_add((lm2 >> -ls) as u64);
+        m2 |= ((lm2 << (128 + ls)) != 0) as u64;
+    }
+
+    let k: i32 = bs + re;
+    let mut d: i64;
+
+    loop {
+        rm += 1 + (rm >= (1u64 << 53)) as u64;
+        let tm: u64 = rm << k;
+        let rm2: u64 = tm.wrapping_mul(tm);
+        d = m2 as i64 - rm2 as i64;
+
+        if d <= 0 {
+            break;
+        }
     }
 
-    /* special cases */
-    ex = (uxi >> 52) as i64;
-    ey = (uyi >> 52) as i64;
-    x = f64::from_bits(uxi);
-    y = f64::from_bits(uyi);
-    /* note: hypot(inf,nan) == inf */
-    if ey == 0x7ff {
-        return y;
+    if d == 0 {
+        set_flags(flag);
+    } else if op == om {
+        let tm: u64 = (rm << k) - (1 << (k - (rm <= (1u64 << 53)) as i32));
+        d = m2 as i64 - (tm.wrapping_mul(tm)) as i64;
+
+        if d == 0 {
+            cold_path();
+            rm -= rm & 1;
+        } else {
+            rm = rm.wrapping_add((d >> 63) as u64);
+        }
+    } else {
+        cold_path();
+        rm -= ((op == 1.0) as u64) << (rm > (1u64 << 53)) as u32;
     }
-    if ex == 0x7ff || uyi == 0 {
-        return x;
+
+    if rm >= (1u64 << 53) {
+        rm >>= 1;
+        re += 1;
     }
-    /* note: hypot(x,y) ~= x + y*y/x/2 with inexact for small y/x */
-    /* 64 difference is enough for ld80 double_t */
-    if ex - ey > 64 {
-        return x + y;
+
+    let e: u64 = (be - 1 + re) as u64;
+    xi = (e << 52) + rm;
+
+    f64::from_bits(xi)
+}
+
+fn as_hypot_denorm(mut a: u64, mut b: u64) -> f64 {
+    let op: f64 = 1.0 + hf64!("0x1p-54");
+    let om: f64 = 1.0 - hf64!("0x1p-54");
+    let af: f64 = a as i64 as f64;
+    let bf: f64 = b as i64 as f64;
+    a <<= 1;
+    b <<= 1;
+    // Is this casting right?
+    let mut rm: u64 = sqrt(af * af + bf * bf) as u64;
+    let tm: u64 = rm << 1;
+    let mut d: i64 = (a.wrapping_mul(a) as i64)
+        .wrapping_sub(tm.wrapping_mul(tm) as i64)
+        .wrapping_add(b.wrapping_mul(b) as i64);
+    let sd: i64 = d >> 63;
+    let um: i64 = ((rm as i64) ^ sd) - sd;
+    let mut drm: i64 = sd + 1;
+    let mut dd: i64 = (um << 3) + 4;
+    let mut p_d: i64;
+    rm -= drm as u64;
+    drm += sd;
+    loop {
+        p_d = d;
+        rm = rm.wrapping_add(drm as u64);
+        d = d.wrapping_sub(dd);
+        dd = dd.wrapping_add(8);
+        if (d ^ p_d) <= 0 {
+            cold_path();
+            break;
+        }
     }
+    p_d = (sd & d) + (!sd & p_d);
+    if p_d != 0 {
+        if op == om {
+            let sum: i64 = p_d
+                .wrapping_sub(4u64.wrapping_mul(rm) as i64)
+                .wrapping_sub(1);
 
-    /* precise sqrt argument in nearest rounding mode without overflow */
-    /* xh*xh must not overflow and xl*xl must not underflow in sq */
-    z = 1.;
-    if ex > 0x3ff + 510 {
-        z = x1p700;
-        x *= x1p_700;
-        y *= x1p_700;
-    } else if ey < 0x3ff - 450 {
-        z = x1p_700;
-        x *= x1p700;
-        y *= x1p700;
+            if sum != 0 {
+                rm = rm.wrapping_add((sum >> 63).wrapping_add(1) as u64);
+            } else {
+                cold_path();
+                rm += rm & 1;
+            }
+        } else {
+            cold_path();
+            rm += (op > 1.0) as u64;
+        }
+    } else {
+        cold_path();
     }
-    let (hx, lx) = sq(x);
-    let (hy, ly) = sq(y);
-    z * sqrt(ly + lx + hy + hx)
+
+    let xi: u64 = rm;
+    f64::from_bits(xi)
+}
+
+type FExcept = u32;
+
+fn set_flags(_flag: FExcept) {}
+
+fn get_flags() -> FExcept {
+    0
 }
diff --git a/library/compiler-builtins/libm/src/math/j1f.rs b/library/compiler-builtins/libm/src/math/j1f.rs
index cd829c1aa1213..d545a8635b127 100644
--- a/library/compiler-builtins/libm/src/math/j1f.rs
+++ b/library/compiler-builtins/libm/src/math/j1f.rs
@@ -374,8 +374,7 @@ mod tests {
     fn test_y1f_2002() {
         //allow slightly different result on x87
         let res = y1f(2.0000002_f32);
-        if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) && (res == -0.10703231_f32)
-        {
+        if cfg!(x86_no_sse2) && (res == -0.10703231_f32) {
             return;
         }
         assert_eq!(res, -0.10703229_f32);
diff --git a/library/compiler-builtins/libm/src/math/mod.rs b/library/compiler-builtins/libm/src/math/mod.rs
index 4bee4478164a0..fe9ef580ca679 100644
--- a/library/compiler-builtins/libm/src/math/mod.rs
+++ b/library/compiler-builtins/libm/src/math/mod.rs
@@ -75,6 +75,8 @@ cfg_if! {
     }
 }
 
+pub mod approx;
+
 // Private modules
 mod arch;
 mod expo2;
diff --git a/library/compiler-builtins/libm/src/math/pow.rs b/library/compiler-builtins/libm/src/math/pow.rs
index 914d68cfce1a2..87eaab3c7c727 100644
--- a/library/compiler-builtins/libm/src/math/pow.rs
+++ b/library/compiler-builtins/libm/src/math/pow.rs
@@ -472,9 +472,9 @@ mod tests {
                 let exp = expected(*val);
                 let res = computed(*val);
 
-                #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+                #[cfg(x86_no_sse2)]
                 let exp = force_eval!(exp);
-                #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+                #[cfg(x86_no_sse2)]
                 let res = force_eval!(res);
                 assert!(
                     if exp.is_nan() {
diff --git a/library/compiler-builtins/libm/src/math/rem_pio2.rs b/library/compiler-builtins/libm/src/math/rem_pio2.rs
index 61b1030275a22..d2be3957358ae 100644
--- a/library/compiler-builtins/libm/src/math/rem_pio2.rs
+++ b/library/compiler-builtins/libm/src/math/rem_pio2.rs
@@ -53,7 +53,7 @@ pub(crate) fn rem_pio2(x: f64) -> (i32, f64, f64) {
         let tmp = x * INV_PIO2 + TO_INT;
         // force rounding of tmp to it's storage format on x87 to avoid
         // excess precision issues.
-        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        #[cfg(x86_no_sse2)]
         let tmp = force_eval!(tmp);
         let f_n = tmp - TO_INT;
         let n = f_n as i32;
@@ -195,7 +195,7 @@ mod tests {
 
     #[test]
     // FIXME(correctness): inaccurate results on i586
-    #[cfg_attr(x86_no_sse, ignore)]
+    #[cfg_attr(x86_no_sse2, ignore)]
     fn test_near_pi() {
         let arg = 3.141592025756836;
         let arg = force_eval!(arg);
diff --git a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
index 841a51b84c278..8212c1a0a9f42 100644
--- a/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
+++ b/library/compiler-builtins/libm/src/math/rem_pio2_large.rs
@@ -228,7 +228,7 @@ pub(crate) fn rem_pio2_large(x: &[f64], y: &mut [f64], e0: i32, prec: usize) ->
     // FIXME(rust-lang/rust#144518): Inline assembly would cause `no_panic` to fail
     // on the callers of this function. As a workaround, avoid inlining `floor` here
     // when implemented with assembly.
-    #[cfg_attr(x86_no_sse, inline(never))]
+    #[cfg_attr(x86_no_sse2, inline(never))]
     extern "C" fn floor(x: f64) -> f64 {
         super::floor(x)
     }
diff --git a/library/compiler-builtins/libm/src/math/rem_pio2f.rs b/library/compiler-builtins/libm/src/math/rem_pio2f.rs
index 481f7ee830bbb..bbf8042752137 100644
--- a/library/compiler-builtins/libm/src/math/rem_pio2f.rs
+++ b/library/compiler-builtins/libm/src/math/rem_pio2f.rs
@@ -44,7 +44,7 @@ pub(crate) fn rem_pio2f(x: f32) -> (i32, f64) {
         let tmp = x64 * INV_PIO2 + TOINT;
         // force rounding of tmp to it's storage format on x87 to avoid
         // excess precision issues.
-        #[cfg(all(target_arch = "x86", not(target_feature = "sse2")))]
+        #[cfg(x86_no_sse2)]
         let tmp = force_eval!(tmp);
         let f_n = tmp - TOINT;
         return (f_n as i32, x64 - f_n * PIO2_1 - f_n * PIO2_1T);
diff --git a/library/compiler-builtins/libm/src/math/rint.rs b/library/compiler-builtins/libm/src/math/rint.rs
index 011a7ae3d60ad..75c46acc0e856 100644
--- a/library/compiler-builtins/libm/src/math/rint.rs
+++ b/library/compiler-builtins/libm/src/math/rint.rs
@@ -1,4 +1,4 @@
-use super::support::Round;
+use super::generic;
 
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg(f16_enabled)]
@@ -10,7 +10,7 @@ pub fn rintf16(x: f16) -> f16 {
         args: x,
     }
 
-    super::generic::rint_round(x, Round::Nearest).val
+    generic::rint_status(x).val
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
@@ -25,7 +25,7 @@ pub fn rintf(x: f32) -> f32 {
         args: x,
     }
 
-    super::generic::rint_round(x, Round::Nearest).val
+    generic::rint_status(x).val
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
@@ -37,15 +37,120 @@ pub fn rint(x: f64) -> f64 {
             all(target_arch = "aarch64", target_feature = "neon"),
             all(target_arch = "wasm32", intrinsics_enabled),
         ),
+        use_arch_required: x86_no_sse2,
         args: x,
     }
 
-    super::generic::rint_round(x, Round::Nearest).val
+    generic::rint_status(x).val
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even.
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn rintf128(x: f128) -> f128 {
-    super::generic::rint_round(x, Round::Nearest).val
+    generic::rint_status(x).val
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, FpResult, Hex, Status};
+
+    macro_rules! cases {
+        ($f:ty) => {
+            [
+                // roundtrip
+                (0.0, 0.0, Status::OK),
+                (-0.0, -0.0, Status::OK),
+                (1.0, 1.0, Status::OK),
+                (-1.0, -1.0, Status::OK),
+                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
+                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
+                // with rounding
+                (0.1, 0.0, Status::INEXACT),
+                (-0.1, -0.0, Status::INEXACT),
+                (0.5, 0.0, Status::INEXACT),
+                (-0.5, -0.0, Status::INEXACT),
+                (0.9, 1.0, Status::INEXACT),
+                (-0.9, -1.0, Status::INEXACT),
+                (1.1, 1.0, Status::INEXACT),
+                (-1.1, -1.0, Status::INEXACT),
+                (1.5, 2.0, Status::INEXACT),
+                (-1.5, -2.0, Status::INEXACT),
+                (1.9, 2.0, Status::INEXACT),
+                (-1.9, -2.0, Status::INEXACT),
+                (2.5, 2.0, Status::INEXACT),
+                (-2.5, -2.0, Status::INEXACT),
+                (3.5, 4.0, Status::INEXACT),
+                (-3.5, -4.0, Status::INEXACT),
+            ]
+        };
+    }
+
+    #[track_caller]
+    fn check<F: Float>(f: fn(F) -> F, cases: &[(F, F, Status)]) {
+        for &(x, exp_res, exp_stat) in cases {
+            let FpResult { val, status } = generic::rint_status(x);
+            assert_biteq!(val, exp_res, "generic::rint_status({x:?}) ({})", Hex(x));
+            assert_eq!(
+                status,
+                exp_stat,
+                "{x:?} {} -> {exp_res:?} {}",
+                Hex(x),
+                Hex(exp_res)
+            );
+            let val = f(x);
+            assert_biteq!(val, exp_res, "rint({x:?}) ({})", Hex(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        check::<f16>(rintf16, &cases!(f16));
+        check::<f16>(
+            rintf16,
+            &[
+                (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
+                (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f32() {
+        check::<f32>(rintf, &cases!(f32));
+        check::<f32>(
+            rintf,
+            &[
+                (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
+                (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f64() {
+        check::<f64>(rint, &cases!(f64));
+        check::<f64>(
+            rint,
+            &[
+                (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
+                (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        check::<f128>(rintf128, &cases!(f128));
+        check::<f128>(
+            rintf128,
+            &[
+                (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
+                (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
+            ],
+        );
+    }
 }
diff --git a/library/compiler-builtins/libm/src/math/round.rs b/library/compiler-builtins/libm/src/math/round.rs
index 256197e6ccbee..091cd801ad6b4 100644
--- a/library/compiler-builtins/libm/src/math/round.rs
+++ b/library/compiler-builtins/libm/src/math/round.rs
@@ -1,25 +1,120 @@
+use super::generic;
+
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg(f16_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf16(x: f16) -> f16 {
-    super::generic::round(x)
+    generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf(x: f32) -> f32 {
-    super::generic::round(x)
+    generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn round(x: f64) -> f64 {
-    super::generic::round(x)
+    generic::round(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties away from zero.
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundf128(x: f128) -> f128 {
-    super::generic::round(x)
+    generic::round(x)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::support::{Float, Hex};
+
+    macro_rules! cases {
+        ($f:ty) => {
+            [
+                // roundtrip
+                (0.0, 0.0),
+                (-0.0, -0.0),
+                (1.0, 1.0),
+                (-1.0, -1.0),
+                (<$f>::INFINITY, <$f>::INFINITY),
+                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY),
+                // with rounding
+                (0.1, 0.0),
+                (-0.1, -0.0),
+                (0.5, 1.0),
+                (-0.5, -1.0),
+                (0.9, 1.0),
+                (-0.9, -1.0),
+                (1.1, 1.0),
+                (-1.1, -1.0),
+                (1.5, 2.0),
+                (-1.5, -2.0),
+                (1.9, 2.0),
+                (-1.9, -2.0),
+            ]
+        };
+    }
+
+    #[track_caller]
+    fn check<F: Float>(f: fn(F) -> F, cases: &[(F, F)]) {
+        for &(x, exp_res) in cases {
+            let val = generic::round(x);
+            assert_biteq!(val, exp_res, "generic::round_status({x:?}) {}", Hex(x));
+            let val = f(x);
+            assert_biteq!(val, exp_res, "round({x:?}) {}", Hex(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        check::<f16>(roundf16, &cases!(f16));
+        check::<f16>(
+            roundf16,
+            &[
+                (hf16!("0x1p10"), hf16!("0x1p10")),
+                (hf16!("-0x1p10"), hf16!("-0x1p10")),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f32() {
+        check::<f32>(roundf, &cases!(f32));
+        check::<f32>(
+            roundf,
+            &[
+                (hf32!("0x1p23"), hf32!("0x1p23")),
+                (hf32!("-0x1p23"), hf32!("-0x1p23")),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f64() {
+        check::<f64>(round, &cases!(f64));
+        check::<f64>(
+            round,
+            &[
+                (hf64!("0x1p52"), hf64!("0x1p52")),
+                (hf64!("-0x1p52"), hf64!("-0x1p52")),
+            ],
+        );
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        check::<f128>(roundf128, &cases!(f128));
+        check::<f128>(
+            roundf128,
+            &[
+                (hf128!("0x1p112"), hf128!("0x1p112")),
+                (hf128!("-0x1p112"), hf128!("-0x1p112")),
+            ],
+        );
+    }
 }
diff --git a/library/compiler-builtins/libm/src/math/roundeven.rs b/library/compiler-builtins/libm/src/math/roundeven.rs
index f0d67d41076ec..4cedd0c16b82d 100644
--- a/library/compiler-builtins/libm/src/math/roundeven.rs
+++ b/library/compiler-builtins/libm/src/math/roundeven.rs
@@ -1,25 +1,28 @@
-use super::support::{Float, Round};
+//! Note that we can use `rint` for these implementations since Rust expects the rounding
+//! mode is always ties-to-even. `roundeven` also does not raise `FE_INEXACT`.
+//!
+//! Tested in the `rint` module.
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg(f16_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf16(x: f16) -> f16 {
-    roundeven_impl(x)
+    super::rintf16(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf(x: f32) -> f32 {
-    roundeven_impl(x)
+    super::rintf(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
 /// `roundToIntegralTiesToEven`.
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundeven(x: f64) -> f64 {
-    roundeven_impl(x)
+    super::rint(x)
 }
 
 /// Round `x` to the nearest integer, breaking ties toward even. This is IEEE 754
@@ -27,10 +30,5 @@ pub fn roundeven(x: f64) -> f64 {
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn roundevenf128(x: f128) -> f128 {
-    roundeven_impl(x)
-}
-
-#[inline]
-pub fn roundeven_impl<F: Float>(x: F) -> F {
-    super::generic::rint_round(x, Round::Nearest).val
+    super::rintf128(x)
 }
diff --git a/library/compiler-builtins/libm/src/math/sin.rs b/library/compiler-builtins/libm/src/math/sin.rs
index 5378a7bc3874a..2f54074c25ef0 100644
--- a/library/compiler-builtins/libm/src/math/sin.rs
+++ b/library/compiler-builtins/libm/src/math/sin.rs
@@ -86,7 +86,7 @@ mod tests {
     use super::*;
 
     #[test]
-    #[cfg_attr(x86_no_sse, ignore = "FIXME(i586): possible incorrect rounding")]
+    #[cfg_attr(x86_no_sse2, ignore = "FIXME(i586): possible incorrect rounding")]
     fn test_near_pi() {
         let x = f64::from_bits(0x400921fb000FD5DD); // 3.141592026217707
         let sx = f64::from_bits(0x3ea50d15ced1a4a2); // 6.273720864039205e-7
diff --git a/library/compiler-builtins/libm/src/math/sqrt.rs b/library/compiler-builtins/libm/src/math/sqrt.rs
index 7ba1bc9b32b23..10edf397a0a0a 100644
--- a/library/compiler-builtins/libm/src/math/sqrt.rs
+++ b/library/compiler-builtins/libm/src/math/sqrt.rs
@@ -1,3 +1,6 @@
+use super::generic;
+use crate::support::Round;
+
 /// The square root of `x` (f16).
 #[cfg(f16_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
@@ -8,7 +11,7 @@ pub fn sqrtf16(x: f16) -> f16 {
         args: x,
     }
 
-    return super::generic::sqrt(x);
+    return generic::sqrt_round(x, Round::Nearest).val;
 }
 
 /// The square root of `x` (f32).
@@ -24,7 +27,7 @@ pub fn sqrtf(x: f32) -> f32 {
         args: x,
     }
 
-    super::generic::sqrt(x)
+    generic::sqrt_round(x, Round::Nearest).val
 }
 
 /// The square root of `x` (f64).
@@ -40,12 +43,187 @@ pub fn sqrt(x: f64) -> f64 {
         args: x,
     }
 
-    super::generic::sqrt(x)
+    generic::sqrt_round(x, Round::Nearest).val
 }
 
 /// The square root of `x` (f128).
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn sqrtf128(x: f128) -> f128 {
-    return super::generic::sqrt(x);
+    return generic::sqrt_round(x, Round::Nearest).val;
+}
+
+#[cfg(test)]
+mod tests {
+    use generic::SqrtHelper;
+
+    use super::*;
+    use crate::support::{CastInto, Float, FpResult, HInt, Status};
+
+    /// Test behavior specified in IEEE 754 `squareRoot`.
+    fn spec_test<F>()
+    where
+        F: Float + SqrtHelper,
+        F::Int: HInt,
+        F::Int: From<u8>,
+        F::Int: From<F::ISet2>,
+        F::Int: CastInto<F::ISet1>,
+        F::Int: CastInto<F::ISet2>,
+        u32: CastInto<F::Int>,
+    {
+        // Values that should return a NaN and raise invalid
+        let nan = [F::NEG_INFINITY, F::NEG_ONE, F::NAN, F::MIN];
+
+        // Values that return unaltered
+        let roundtrip = [F::ZERO, F::NEG_ZERO, F::INFINITY];
+
+        for x in nan {
+            let FpResult { val, status } = generic::sqrt_round(x, Round::Nearest);
+            assert!(val.is_nan());
+            assert!(status == Status::INVALID);
+        }
+
+        for x in roundtrip {
+            let FpResult { val, status } = generic::sqrt_round(x, Round::Nearest);
+            assert_biteq!(val, x);
+            assert!(status == Status::OK);
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn sanity_check_f16() {
+        assert_biteq!(sqrtf16(100.0f16), 10.0);
+        assert_biteq!(sqrtf16(4.0f16), 2.0);
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn spec_tests_f16() {
+        spec_test::<f16>();
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f16() {
+        let cases = [
+            (f16::PI, 0x3f17_u16),
+            (10000.0_f16, 0x5640_u16),
+            (f16::from_bits(0x0000000f), 0x13bf_u16),
+            (f16::INFINITY, f16::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrtf16(input),
+                f16::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f32() {
+        assert_biteq!(sqrtf(100.0f32), 10.0);
+        assert_biteq!(sqrtf(4.0f32), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f32() {
+        spec_test::<f32>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f32() {
+        let cases = [
+            (f32::PI, 0x3fe2dfc5_u32),
+            (10000.0f32, 0x42c80000_u32),
+            (f32::from_bits(0x0000000f), 0x1b2f456f_u32),
+            (f32::INFINITY, f32::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrtf(input),
+                f32::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    fn sanity_check_f64() {
+        assert_biteq!(sqrt(100.0f64), 10.0);
+        assert_biteq!(sqrt(4.0f64), 2.0);
+    }
+
+    #[test]
+    fn spec_tests_f64() {
+        spec_test::<f64>();
+    }
+
+    #[test]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f64() {
+        let cases = [
+            (f64::PI, 0x3ffc5bf891b4ef6a_u64),
+            (10000.0, 0x4059000000000000_u64),
+            (f64::from_bits(0x0000000f), 0x1e7efbdeb14f4eda_u64),
+            (f64::INFINITY, f64::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrt(input),
+                f64::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn sanity_check_f128() {
+        assert_biteq!(sqrtf128(100.0f128), 10.0);
+        assert_biteq!(sqrtf128(4.0f128), 2.0);
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    fn spec_tests_f128() {
+        spec_test::<f128>();
+    }
+
+    #[test]
+    #[cfg(f128_enabled)]
+    #[allow(clippy::approx_constant)]
+    fn conformance_tests_f128() {
+        let cases = [
+            (f128::PI, 0x3fffc5bf891b4ef6aa79c3b0520d5db9_u128),
+            // 10_000.0, see `f16` for reasoning.
+            (
+                f128::from_bits(0x400c3880000000000000000000000000),
+                0x40059000000000000000000000000000_u128,
+            ),
+            (
+                f128::from_bits(0x0000000f),
+                0x1fc9efbdeb14f4ed9b17ae807907e1e9_u128,
+            ),
+            (f128::INFINITY, f128::INFINITY.to_bits()),
+        ];
+
+        for (input, output) in cases {
+            assert_biteq!(
+                sqrtf128(input),
+                f128::from_bits(output),
+                "input: {input:?} ({:#018x})",
+                input.to_bits()
+            );
+        }
+    }
 }
diff --git a/library/compiler-builtins/libm/src/math/support/big.rs b/library/compiler-builtins/libm/src/math/support/big.rs
index b7f1285424956..c316d93f524ab 100644
--- a/library/compiler-builtins/libm/src/math/support/big.rs
+++ b/library/compiler-builtins/libm/src/math/support/big.rs
@@ -3,7 +3,7 @@
 #[cfg(test)]
 mod tests;
 
-use core::ops;
+use core::{fmt, ops};
 
 use super::{DInt, HInt, Int, MinInt};
 
@@ -11,18 +11,18 @@ const U128_LO_MASK: u128 = u64::MAX as u128;
 
 /// A 256-bit unsigned integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
+#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord)]
 pub struct u256 {
     pub hi: u128,
     pub lo: u128,
 }
 
 impl u256 {
-    #[cfg(any(test, feature = "unstable-public-internals"))]
     pub const MAX: Self = Self {
         lo: u128::MAX,
         hi: u128::MAX,
     };
+    pub const MIN: Self = Self { lo: 0, hi: 0 };
 
     /// Reinterpret as a signed integer
     pub fn signed(self) -> i256 {
@@ -35,15 +35,23 @@ impl u256 {
 
 /// A 256-bit signed integer represented as two 128-bit native-endian limbs.
 #[allow(non_camel_case_types)]
-#[derive(Clone, Copy, Debug, PartialEq, PartialOrd, Eq, Ord)]
+#[derive(Clone, Copy, PartialEq, PartialOrd, Eq, Ord)]
 pub struct i256 {
     pub hi: i128,
     pub lo: u128,
 }
 
 impl i256 {
+    pub const MAX: Self = Self {
+        lo: u128::MAX,
+        hi: i128::MAX,
+    };
+    pub const MIN: Self = Self {
+        lo: u128::MIN,
+        hi: i128::MIN,
+    };
+
     /// Reinterpret as an unsigned integer
-    #[cfg(any(test, feature = "unstable-public-internals"))]
     pub fn unsigned(self) -> u256 {
         u256 {
             lo: self.lo,
@@ -61,11 +69,8 @@ impl MinInt for u256 {
     const BITS: u32 = 256;
     const ZERO: Self = Self { lo: 0, hi: 0 };
     const ONE: Self = Self { lo: 1, hi: 0 };
-    const MIN: Self = Self { lo: 0, hi: 0 };
-    const MAX: Self = Self {
-        lo: u128::MAX,
-        hi: u128::MAX,
-    };
+    const MIN: Self = Self::MIN;
+    const MAX: Self = Self::MAX;
 }
 
 impl MinInt for i256 {
@@ -77,14 +82,8 @@ impl MinInt for i256 {
     const BITS: u32 = 256;
     const ZERO: Self = Self { lo: 0, hi: 0 };
     const ONE: Self = Self { lo: 1, hi: 0 };
-    const MIN: Self = Self {
-        lo: u128::MIN,
-        hi: i128::MIN,
-    };
-    const MAX: Self = Self {
-        lo: u128::MAX,
-        hi: i128::MAX,
-    };
+    const MIN: Self = Self::MIN;
+    const MAX: Self = Self::MAX;
 }
 
 macro_rules! impl_common {
@@ -280,3 +279,35 @@ impl DInt for i256 {
         self.hi
     }
 }
+
+impl fmt::Debug for u256 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(self, f)
+    }
+}
+
+impl fmt::Debug for i256 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(&self.unsigned(), f)
+    }
+}
+
+impl fmt::LowerHex for u256 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        cfg_if! {
+            if #[cfg(feature = "compiler-builtins")] {
+                let _ = f;
+                unimplemented!()
+            } else {
+                let pfx= if f.alternate() { "0x"} else {""};
+                write!(f, "{pfx}{:032x}{:032x}", self.hi, self.lo)
+            }
+        }
+    }
+}
+
+impl fmt::LowerHex for i256 {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::LowerHex::fmt(&self.unsigned(), f)
+    }
+}
diff --git a/library/compiler-builtins/libm/src/math/support/big/tests.rs b/library/compiler-builtins/libm/src/math/support/big/tests.rs
index 2eafed50a2757..0ac8f9721a4dd 100644
--- a/library/compiler-builtins/libm/src/math/support/big/tests.rs
+++ b/library/compiler-builtins/libm/src/math/support/big/tests.rs
@@ -1,17 +1,11 @@
 extern crate std;
-use std::string::String;
-use std::{eprintln, format};
+use std::eprintln;
 
-use super::{HInt, MinInt, i256, u256};
+use super::{DInt, HInt, MinInt, i256, u256};
 use crate::support::{Int as _, NarrowingDiv};
 
 const LOHI_SPLIT: u128 = 0xaaaaaaaaaaaaaaaaffffffffffffffff;
 
-/// Print a `u256` as hex since we can't add format implementations
-fn hexu(v: u256) -> String {
-    format!("0x{:032x}{:032x}", v.hi, v.lo)
-}
-
 #[test]
 fn widen_u128() {
     assert_eq!(
@@ -81,11 +75,9 @@ fn widen_mul_u128() {
         eprintln!(
             "\
             FAILURE ({i}): {a:#034x} * {b:#034x}\n\
-            expected: {}\n\
-            got:      {}\
+            expected: {expected:#x}\n\
+            actual:   {actual:#x}\
             ",
-            hexu(expected),
-            hexu(actual)
         );
     };
 
@@ -106,6 +98,169 @@ fn not_u256() {
     assert_eq!(!u256::ZERO, u256::MAX);
 }
 
+#[test]
+fn shl_u256() {
+    let only_high = [
+        1,
+        u16::MAX.into(),
+        u32::MAX.into(),
+        u64::MAX.into(),
+        u128::MAX,
+    ];
+    let mut has_errors = false;
+
+    let mut add_error = |a, b, expected, actual| {
+        has_errors = true;
+        eprintln!(
+            "\
+            FAILURE:  {a:#x} << {b}\n\
+                expected: {expected:#x}\n\
+                actual:   {actual:#x}\
+            ",
+        );
+    };
+
+    for a in only_high {
+        for perturb in 0..10 {
+            let a = a.saturating_add(perturb);
+            for shift in 0..128 {
+                let res = u256::from_lo_hi(0, a) << shift;
+                let expected = u256::from_lo_hi(0, a << shift);
+                if res != expected {
+                    add_error(a.widen(), shift, expected, res);
+                }
+            }
+        }
+    }
+
+    let check = [
+        (
+            u256::MAX,
+            1,
+            u256 {
+                lo: u128::MAX << 1,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            5,
+            u256 {
+                lo: u128::MAX << 5,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            63,
+            u256 {
+                lo: u128::MAX << 63,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            64,
+            u256 {
+                lo: (u64::MAX as u128) << 64,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            65,
+            u256 {
+                lo: (u64::MAX as u128) << 65,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            127,
+            u256 {
+                lo: 1 << 127,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            128,
+            u256 {
+                lo: 0,
+                hi: u128::MAX,
+            },
+        ),
+        (
+            u256::MAX,
+            129,
+            u256 {
+                lo: 0,
+                hi: u128::MAX << 1,
+            },
+        ),
+        (
+            u256::MAX,
+            191,
+            u256 {
+                lo: 0,
+                hi: u128::MAX << 63,
+            },
+        ),
+        (
+            u256::MAX,
+            192,
+            u256 {
+                lo: 0,
+                hi: u128::MAX << 64,
+            },
+        ),
+        (
+            u256::MAX,
+            193,
+            u256 {
+                lo: 0,
+                hi: u128::MAX << 65,
+            },
+        ),
+        (
+            u256::MAX,
+            254,
+            u256 {
+                lo: 0,
+                hi: 0b11 << 126,
+            },
+        ),
+        (
+            u256::MAX,
+            255,
+            u256 {
+                lo: 0,
+                hi: 1 << 127,
+            },
+        ),
+        (
+            u256 {
+                hi: 0,
+                lo: LOHI_SPLIT,
+            },
+            64,
+            u256 {
+                lo: 0xffffffffffffffff0000000000000000,
+                hi: 0xaaaaaaaaaaaaaaaa,
+            },
+        ),
+    ];
+
+    for (input, shift, expected) in check {
+        let res = input << shift;
+        if res != expected {
+            add_error(input, shift, expected, res);
+        }
+    }
+
+    assert!(!has_errors);
+}
+
 #[test]
 fn shr_u256() {
     let only_low = [
@@ -121,13 +276,10 @@ fn shr_u256() {
         has_errors = true;
         eprintln!(
             "\
-            FAILURE:  {} >> {b}\n\
-            expected: {}\n\
-            actual:   {}\
+            FAILURE:  {a:#x} >> {b}\n\
+            expected: {expected:#x}\n\
+            actual:   {actual:#x}\
             ",
-            hexu(a),
-            hexu(expected),
-            hexu(actual),
         );
     };
 
@@ -269,10 +421,10 @@ fn shr_u256_overflow() {
 #[test]
 #[cfg(not(debug_assertions))]
 fn shr_u256_overflow() {
-    // No panic without debug assertions
-    assert_eq!(u256::MAX >> 256, u256::ZERO);
-    assert_eq!(u256::MAX >> 257, u256::ZERO);
-    assert_eq!(u256::MAX >> u32::MAX, u256::ZERO);
+    // Without debug assertions, the shift amount is wrapped
+    assert_eq!(u256::MAX >> 256, u256::MAX);
+    assert_eq!(u256::MAX >> 257, u256::MAX >> 1);
+    assert_eq!(u256::MAX >> u32::MAX, u256::ONE);
 }
 
 #[test]
diff --git a/library/compiler-builtins/libm/src/math/support/float_traits.rs b/library/compiler-builtins/libm/src/math/support/float_traits.rs
index 944546601c9ca..ad9ddb5ea46ac 100644
--- a/library/compiler-builtins/libm/src/math/support/float_traits.rs
+++ b/library/compiler-builtins/libm/src/math/support/float_traits.rs
@@ -1,6 +1,11 @@
 use core::{fmt, mem, ops};
 
 use super::int_traits::{CastFrom, Int, MinInt};
+use crate::support::{DInt, DisplayHex};
+
+/// Wrapper to extract the integer type half of the float's size
+#[allow(unused)] // only used in c-b
+pub type HalfRep<F> = <<F as Float>::Int as DInt>::H;
 
 /// Whether MIPS sNaN/qNaNs should be used.
 ///
@@ -14,6 +19,7 @@ const MIPS_NAN: bool = cfg!(target_arch = "mips") || cfg!(target_arch = "mips64"
 pub trait Float:
     Copy
     + fmt::Debug
+    + DisplayHex
     + PartialEq
     + PartialOrd
     + ops::AddAssign
@@ -120,13 +126,7 @@ pub trait Float:
     /// This method returns `true` if two NaNs are compared. Use [`biteq`](Self::biteq) instead
     /// if `NaN` should not be treated separately.
     #[allow(dead_code)]
-    fn eq_repr(self, rhs: Self) -> bool {
-        if self.is_nan() && rhs.is_nan() {
-            true
-        } else {
-            self.biteq(rhs)
-        }
-    }
+    fn eq_repr(self, rhs: Self) -> bool;
 
     /// Returns true if the value is NaN.
     fn is_nan(self) -> bool;
@@ -299,6 +299,27 @@ macro_rules! float_impl {
             fn to_bits(self) -> Self::Int {
                 self.to_bits()
             }
+
+            fn eq_repr(self, rhs: Self) -> bool {
+                #[cfg(not(feature = "unmangled-names"))]
+                fn is_nan(x: $ty) -> bool {
+                    // When not using unmangled-names, the "real" compiler-builtins might not have
+                    // the necessary builtin (__unordtf2) to test whether `f128` is NaN.
+                    // FIXME(f128): Remove once the nightly toolchain has the __unordtf2 builtin
+                    // x is NaN if all the bits of the exponent are set and the significand is non-0
+                    x.to_bits() & $ty::EXP_MASK == $ty::EXP_MASK && x.to_bits() & $ty::SIG_MASK != 0
+                }
+                #[cfg(feature = "unmangled-names")]
+                fn is_nan(x: $ty) -> bool {
+                    x.is_nan()
+                }
+                if is_nan(self) && is_nan(rhs) {
+                    true
+                } else {
+                    self.to_bits() == rhs.to_bits()
+                }
+            }
+
             fn is_nan(self) -> bool {
                 self.is_nan()
             }
@@ -548,7 +569,7 @@ mod tests {
         }
         assert!(f32::NAN.is_qnan());
         // FIXME(rust-lang/rust#115567): x87 use in `is_snan` quiets the sNaN
-        if !cfg!(x86_no_sse) {
+        if !cfg!(x86_no_sse2) {
             assert!(f32::SNAN.is_snan());
         }
 
@@ -593,7 +614,7 @@ mod tests {
         }
         assert!(f64::NAN.is_qnan());
         // FIXME(rust-lang/rust#115567): x87 use in `is_snan` quiets the sNaN
-        if !cfg!(x86_no_sse) {
+        if !cfg!(x86_no_sse2) {
             assert!(f64::SNAN.is_snan());
         }
 
diff --git a/library/compiler-builtins/libm/src/math/support/hex_float.rs b/library/compiler-builtins/libm/src/math/support/hex_float.rs
index 5be0d3159de3b..e1100a4a119f0 100644
--- a/library/compiler-builtins/libm/src/math/support/hex_float.rs
+++ b/library/compiler-builtins/libm/src/math/support/hex_float.rs
@@ -1,5 +1,7 @@
 //! Utilities for working with hex float formats.
 
+pub use hex_fmt::{DisplayHex, Hex};
+
 use super::{Round, Status, f32_from_bits, f64_from_bits};
 
 /// Construct a 16-bit float from hex float representation (C-style)
@@ -324,27 +326,32 @@ const fn hex_digit(c: u8) -> Option<u8> {
     }
 }
 
-#[cfg(any(test, feature = "unstable-public-internals"))]
 mod hex_fmt {
     use core::fmt;
 
-    use crate::support::{Float, Int};
-
-    /// Format a floating point number as its IEEE hex (`%a`) representation.
-    pub struct Hexf<F>(pub F);
+    use crate::support::{Float, div_ceil_u32};
 
     // Adapted from https://github.com/ericseppanen/hexfloat2/blob/a5c27932f0ff/src/format.rs
-    #[cfg(not(feature = "compiler-builtins"))]
     pub(super) fn fmt_any_hex<F: Float>(x: &F, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         if x.is_sign_negative() {
             write!(f, "-")?;
+        } else {
+            write!(f, "+")?;
+        }
+
+        if x.is_nan() {
+            if x.is_snan() {
+                write!(f, "sNaN")?;
+            } else if x.is_nan() {
+                write!(f, "qNaN")?;
+            }
+            let payload = x.frac() & !F::SIG_TOP_BIT;
+            let width = div_ceil_u32(F::SIG_BITS, 4) as usize + 2;
+            write!(f, "({payload:#0width$x})")?;
+            return Ok(());
         }
 
-        if x.is_snan() {
-            return write!(f, "sNaN");
-        } else if x.is_nan() {
-            return write!(f, "qNaN");
-        } else if x.is_infinite() {
+        if x.is_infinite() {
             return write!(f, "inf");
         } else if *x == F::ZERO {
             return write!(f, "0x0p+0");
@@ -370,150 +377,136 @@ mod hex_fmt {
         write!(f, "0x{leading}{sig:0mwidth$x}p{exponent:+}")
     }
 
-    #[cfg(feature = "compiler-builtins")]
-    pub(super) fn fmt_any_hex<F: Float>(_x: &F, _f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        unimplemented!()
+    /// Types that can be formatted as hex via `Hex`. For ints we always print with a fixed
+    /// number of leading zeros. For floats we use the IEEE hex (`%a`) representation. The `-`
+    /// format modifier is used to print the integer hex representation rather than hex float.
+    pub trait DisplayHex {
+        #[allow(unused)] // Only used for tests and public test internals
+        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result;
     }
 
-    impl<F: Float> fmt::LowerHex for Hexf<F> {
+    /// A wrapper implementing formatting traits via `DisplayHex`.
+    #[allow(unused)] // Only used for tests and public test internals
+    pub struct Hex<T>(pub T);
+
+    impl<T: DisplayHex> fmt::Debug for Hex<T> {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    fmt_any_hex(&self.0, f)
-                }
-            }
+            self.0.fmt(f)
         }
     }
 
-    impl<F: Float> fmt::LowerHex for Hexf<(F, F)> {
+    impl<T: DisplayHex> fmt::Display for Hex<T> {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
-                }
-            }
+            self.0.fmt(f)
         }
     }
 
-    impl<F: Float> fmt::LowerHex for Hexf<(F, i32)> {
+    impl<T: DisplayHex> fmt::LowerHex for Hex<T> {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    write!(f, "({:x}, {:x})", Hexf(self.0.0), Hexf(self.0.1))
-                }
-            }
+            self.0.fmt(f)
         }
     }
 
-    impl fmt::LowerHex for Hexf<i32> {
+    impl<F: Float> DisplayHex for F {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    write!(f, "{:#010x}", self.0)
-                }
+            if f.sign_minus() {
+                self.to_bits().fmt(f)
+            } else {
+                fmt_any_hex(self, f)
             }
         }
     }
 
-    impl<T> fmt::Debug for Hexf<T>
-    where
-        Hexf<T>: fmt::LowerHex,
-    {
-        fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    fmt::LowerHex::fmt(self, f)
+    macro_rules! impl_int {
+        ($ity:ty) => {
+            impl DisplayHex for $ity {
+                #[inline]
+                fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+                    write!(
+                        f,
+                        "{self:#0width$x}",
+                        width = ((<$ity>::BITS / 4) + 2) as usize,
+                    )
                 }
             }
-        }
+        };
     }
 
-    impl<T> fmt::Display for Hexf<T>
-    where
-        Hexf<T>: fmt::LowerHex,
-    {
+    impl_int!(i8);
+    impl_int!(i16);
+    impl_int!(i32);
+    impl_int!(i64);
+    impl_int!(i128);
+    impl_int!(isize);
+    impl_int!(u8);
+    impl_int!(u16);
+    impl_int!(u32);
+    impl_int!(u64);
+    impl_int!(u128);
+    impl_int!(usize);
+
+    // Not really a meaningful impl, but makes some generics easier.
+    impl DisplayHex for bool {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
             cfg_if! {
                 if #[cfg(feature = "compiler-builtins")] {
                     let _ = f;
                     unimplemented!()
                 } else {
-                    fmt::LowerHex::fmt(self, f)
+                    write!(f, "{self}")
                 }
             }
         }
     }
 
-    pub struct Hexi<F>(pub F);
-
-    impl<I: Int> fmt::LowerHex for Hexi<I> {
+    impl<T1> DisplayHex for (T1,)
+    where
+        T1: Copy + DisplayHex,
+    {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    write!(f, "{:#0width$x}", self.0, width = ((I::BITS / 4) + 2) as usize)
-                }
-            }
+            write!(f, "(")?;
+            self.0.fmt(f)?;
+            write!(f, ",)")
         }
     }
 
-    impl<T> fmt::Debug for Hexi<T>
+    impl<T1, T2> DisplayHex for (T1, T2)
     where
-        Hexi<T>: fmt::LowerHex,
+        T1: Copy + DisplayHex,
+        T2: Copy + DisplayHex,
     {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    fmt::LowerHex::fmt(self, f)
-                }
-            }
+            write!(f, "(")?;
+            self.0.fmt(f)?;
+            write!(f, ", ")?;
+            self.1.fmt(f)?;
+            write!(f, ")")
         }
     }
 
-    impl<T> fmt::Display for Hexi<T>
+    impl<T1, T2, T3> DisplayHex for (T1, T2, T3)
     where
-        Hexi<T>: fmt::LowerHex,
+        T1: Copy + DisplayHex,
+        T2: Copy + DisplayHex,
+        T3: Copy + DisplayHex,
     {
         fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-            cfg_if! {
-                if #[cfg(feature = "compiler-builtins")] {
-                    let _ = f;
-                    unimplemented!()
-                } else {
-                    fmt::LowerHex::fmt(self, f)
-                }
-            }
+            write!(f, "(")?;
+            self.0.fmt(f)?;
+            write!(f, ", ")?;
+            self.1.fmt(f)?;
+            write!(f, ", ")?;
+            self.2.fmt(f)?;
+            write!(f, ")")
         }
     }
 }
 
-#[cfg(any(test, feature = "unstable-public-internals"))]
-pub use hex_fmt::*;
-
 #[cfg(test)]
 mod parse_tests {
     extern crate std;
-    use std::string::String;
+
+    use std::string::{String, ToString};
     use std::{format, println};
 
     use super::*;
@@ -563,13 +556,17 @@ mod parse_tests {
         Ok(())
     }
 
+    /// Strip the qNaN/sNaN and payload since we don't parse that.
     #[cfg_attr(not(f16_enabled), expect(unused))]
-    pub fn canonicalize_snan_str(s: String) -> String {
+    pub fn canonicalize_snan_str(mut s: String) -> String {
         if s.contains("sNaN") || s.contains("qNaN") {
-            s.replace("sNaN", "NaN").replace("qNaN", "NaN")
-        } else {
-            s
+            s = s.replace("sNaN", "NaN").replace("qNaN", "NaN");
+            if let Some((nan, payload)) = s.split_once("(") {
+                assert!(payload.ends_with(")"));
+                s = nan.to_string();
+            }
         }
+        s
     }
 
     #[test]
@@ -578,7 +575,7 @@ mod parse_tests {
         let n = 1_i32 << 14;
         for i in -n..n {
             let u = i.rotate_right(11) as u32;
-            let s = format!("{}", Hexf(f32::from_bits(u)));
+            let s = format!("{}", Hex(f32::from_bits(u)));
             let s = canonicalize_snan_str(s);
             match rounding_properties(&s) {
                 Ok(()) => (),
@@ -653,7 +650,7 @@ mod parse_tests {
     #[cfg(f128_enabled)]
     fn rounding() {
         let pi = std::f128::consts::PI;
-        let s = format!("{}", Hexf(pi));
+        let s = format!("{}", Hex(pi));
 
         for k in 0..=111 {
             let (bits, status) = parse_any(&s, 128 - k, 112 - k, Round::Nearest).unwrap();
@@ -914,62 +911,59 @@ mod tests_panicking {
     extern crate std;
     use super::*;
 
-    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
-    // hide them from the AST.
+    #[test]
     #[cfg(f16_enabled)]
-    macro_rules! f16_tests {
-        () => {
-            #[test]
-            fn test_f16_almost_extra_precision() {
-                // Exact maximum precision allowed
-                hf16("0x1.ffcp+0");
-            }
-
-            #[test]
-            #[should_panic(expected = "the value is too precise")]
-            fn test_f16_extra_precision() {
-                // One bit more than the above.
-                hf16("0x1.ffdp+0");
-            }
+    fn test_f16_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf16("0x1.ffcp+0");
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too huge")]
-            fn test_f16_overflow() {
-                // One bit more than the above.
-                hf16("0x1p+16");
-            }
+    #[test]
+    #[cfg(f16_enabled)]
+    #[should_panic(expected = "the value is too precise")]
+    fn test_f16_extra_precision() {
+        // One bit more than the above.
+        hf16("0x1.ffdp+0");
+    }
 
-            #[test]
-            fn test_f16_tiniest() {
-                let x = hf16("0x1.p-24");
-                let y = hf16("0x0.001p-12");
-                let z = hf16("0x0.8p-23");
-                assert_eq!(x, y);
-                assert_eq!(x, z);
-            }
+    #[test]
+    #[cfg(f16_enabled)]
+    #[should_panic(expected = "the value is too huge")]
+    fn test_f16_overflow() {
+        // One bit more than the above.
+        hf16("0x1p+16");
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too tiny")]
-            fn test_f16_too_tiny() {
-                hf16("0x1.p-25");
-            }
+    #[test]
+    #[cfg(f16_enabled)]
+    fn test_f16_tiniest() {
+        let x = hf16("0x1.p-24");
+        let y = hf16("0x0.001p-12");
+        let z = hf16("0x0.8p-23");
+        assert_eq!(x, y);
+        assert_eq!(x, z);
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too tiny")]
-            fn test_f16_also_too_tiny() {
-                hf16("0x0.8p-24");
-            }
+    #[test]
+    #[cfg(f16_enabled)]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f16_too_tiny() {
+        hf16("0x1.p-25");
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too tiny")]
-            fn test_f16_again_too_tiny() {
-                hf16("0x0.001p-13");
-            }
-        };
+    #[test]
+    #[cfg(f16_enabled)]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f16_also_too_tiny() {
+        hf16("0x0.8p-24");
     }
 
+    #[test]
     #[cfg(f16_enabled)]
-    f16_tests!();
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f16_again_too_tiny() {
+        hf16("0x0.001p-13");
+    }
 
     #[test]
     fn test_f32_almost_extra_precision() {
@@ -1038,68 +1032,66 @@ mod tests_panicking {
         hf64("0x1.abcdabcdabcdf8p+0");
     }
 
-    // HACK(msrv): 1.63 rejects unknown width float literals at an AST level, so use a macro to
-    // hide them from the AST.
+    #[test]
     #[cfg(f128_enabled)]
-    macro_rules! f128_tests {
-        () => {
-            #[test]
-            fn test_f128_almost_extra_precision() {
-                // Exact maximum precision allowed
-                hf128("0x1.ffffffffffffffffffffffffffffp+16383");
-            }
-
-            #[test]
-            #[should_panic(expected = "the value is too precise")]
-            fn test_f128_extra_precision() {
-                // Just below the maximum finite.
-                hf128("0x1.fffffffffffffffffffffffffffe8p+16383");
-            }
-            #[test]
-            #[should_panic(expected = "the value is too huge")]
-            fn test_f128_extra_precision_overflow() {
-                // One bit more than the above. Should overflow.
-                hf128("0x1.ffffffffffffffffffffffffffff8p+16383");
-            }
+    fn test_f128_almost_extra_precision() {
+        // Exact maximum precision allowed
+        hf128("0x1.ffffffffffffffffffffffffffffp+16383");
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too huge")]
-            fn test_f128_overflow() {
-                // One bit more than the above.
-                hf128("0x1p+16384");
-            }
+    #[test]
+    #[cfg(f128_enabled)]
+    #[should_panic(expected = "the value is too precise")]
+    fn test_f128_extra_precision() {
+        // Just below the maximum finite.
+        hf128("0x1.fffffffffffffffffffffffffffe8p+16383");
+    }
+    #[test]
+    #[cfg(f128_enabled)]
+    #[should_panic(expected = "the value is too huge")]
+    fn test_f128_extra_precision_overflow() {
+        // One bit more than the above. Should overflow.
+        hf128("0x1.ffffffffffffffffffffffffffff8p+16383");
+    }
 
-            #[test]
-            fn test_f128_tiniest() {
-                let x = hf128("0x1.p-16494");
-                let y = hf128("0x0.0000000000000001p-16430");
-                let z = hf128("0x0.8p-16493");
-                assert_eq!(x, y);
-                assert_eq!(x, z);
-            }
+    #[test]
+    #[cfg(f128_enabled)]
+    #[should_panic(expected = "the value is too huge")]
+    fn test_f128_overflow() {
+        // One bit more than the above.
+        hf128("0x1p+16384");
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too tiny")]
-            fn test_f128_too_tiny() {
-                hf128("0x1.p-16495");
-            }
+    #[test]
+    #[cfg(f128_enabled)]
+    fn test_f128_tiniest() {
+        let x = hf128("0x1.p-16494");
+        let y = hf128("0x0.0000000000000001p-16430");
+        let z = hf128("0x0.8p-16493");
+        assert_eq!(x, y);
+        assert_eq!(x, z);
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too tiny")]
-            fn test_f128_again_too_tiny() {
-                hf128("0x0.0000000000000001p-16431");
-            }
+    #[test]
+    #[cfg(f128_enabled)]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f128_too_tiny() {
+        hf128("0x1.p-16495");
+    }
 
-            #[test]
-            #[should_panic(expected = "the value is too tiny")]
-            fn test_f128_also_too_tiny() {
-                hf128("0x0.8p-16494");
-            }
-        };
+    #[test]
+    #[cfg(f128_enabled)]
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f128_again_too_tiny() {
+        hf128("0x0.0000000000000001p-16431");
     }
 
+    #[test]
     #[cfg(f128_enabled)]
-    f128_tests!();
+    #[should_panic(expected = "the value is too tiny")]
+    fn test_f128_also_too_tiny() {
+        hf128("0x0.8p-16494");
+    }
 }
 
 #[cfg(test)]
@@ -1119,7 +1111,7 @@ mod print_tests {
             use super::parse_tests::canonicalize_snan_str;
 
             let f = f16::from_bits(x);
-            let s = format!("{}", Hexf(f));
+            let s = format!("{}", Hex(f));
             let s = canonicalize_snan_str(s);
             let from_s = hf16(&s);
 
@@ -1148,10 +1140,10 @@ mod print_tests {
         //  - `f16 -> f32 -> str -> f16 -> f32`
         for x in 0..=u16::MAX {
             let f16 = f16::from_bits(x);
-            let s16 = format!("{}", Hexf(f16));
+            let s16 = format!("{}", Hex(f16));
             let s16 = canonicalize_snan_str(s16);
             let f32 = f16 as f32;
-            let s32 = format!("{}", Hexf(f32));
+            let s32 = format!("{}", Hex(f32));
             let s32 = canonicalize_snan_str(s32);
 
             let a = hf32(&s16);
@@ -1181,66 +1173,94 @@ mod print_tests {
     }
     #[test]
     fn spot_checks() {
-        assert_eq!(Hexf(f32::MAX).to_string(), "0x1.fffffep+127");
-        assert_eq!(Hexf(f64::MAX).to_string(), "0x1.fffffffffffffp+1023");
+        assert_eq!(Hex(f32::MAX).to_string(), "+0x1.fffffep+127");
+        assert_eq!(Hex(f64::MAX).to_string(), "+0x1.fffffffffffffp+1023");
 
-        assert_eq!(Hexf(f32::MIN).to_string(), "-0x1.fffffep+127");
-        assert_eq!(Hexf(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023");
+        assert_eq!(Hex(f32::MIN).to_string(), "-0x1.fffffep+127");
+        assert_eq!(Hex(f64::MIN).to_string(), "-0x1.fffffffffffffp+1023");
 
-        assert_eq!(Hexf(f32::ZERO).to_string(), "0x0p+0");
-        assert_eq!(Hexf(f64::ZERO).to_string(), "0x0p+0");
+        assert_eq!(Hex(f32::ZERO).to_string(), "+0x0p+0");
+        assert_eq!(Hex(f64::ZERO).to_string(), "+0x0p+0");
 
-        assert_eq!(Hexf(f32::NEG_ZERO).to_string(), "-0x0p+0");
-        assert_eq!(Hexf(f64::NEG_ZERO).to_string(), "-0x0p+0");
+        assert_eq!(Hex(f32::NEG_ZERO).to_string(), "-0x0p+0");
+        assert_eq!(Hex(f64::NEG_ZERO).to_string(), "-0x0p+0");
 
-        assert_eq!(Hexf(f32::NAN).to_string(), "qNaN");
-        assert_eq!(Hexf(f64::NAN).to_string(), "qNaN");
-        assert_eq!(Hexf(f32::NEG_NAN).to_string(), "-qNaN");
-        assert_eq!(Hexf(f64::NEG_NAN).to_string(), "-qNaN");
-        if !cfg!(x86_no_sse) {
+        assert_eq!(Hex(f32::NAN).to_string(), "+qNaN(0x000000)");
+        assert_eq!(Hex(f64::NAN).to_string(), "+qNaN(0x0000000000000)");
+        assert_eq!(Hex(f32::NEG_NAN).to_string(), "-qNaN(0x000000)");
+        assert_eq!(Hex(f64::NEG_NAN).to_string(), "-qNaN(0x0000000000000)");
+        if !cfg!(x86_no_sse2) {
             // FIXME(rust-lang/rust#115567): calls quiet the sNaN
-            assert_eq!(Hexf(f32::SNAN).to_string(), "sNaN");
-            assert_eq!(Hexf(f64::SNAN).to_string(), "sNaN");
-            assert_eq!(Hexf(f32::NEG_SNAN).to_string(), "-sNaN");
-            assert_eq!(Hexf(f64::NEG_SNAN).to_string(), "-sNaN");
+            assert_eq!(Hex(f32::SNAN).to_string(), "+sNaN(0x200000)");
+            assert_eq!(Hex(f64::SNAN).to_string(), "+sNaN(0x4000000000000)");
+            assert_eq!(Hex(f32::NEG_SNAN).to_string(), "-sNaN(0x200000)");
+            assert_eq!(Hex(f64::NEG_SNAN).to_string(), "-sNaN(0x4000000000000)");
+            assert_eq!(Hex(f32::from_bits(u32::MAX)).to_string(), "-qNaN(0x3fffff)");
+            assert_eq!(
+                Hex(f64::from_bits(u64::MAX)).to_string(),
+                "-qNaN(0x7ffffffffffff)"
+            );
         }
 
-        assert_eq!(Hexf(f32::INFINITY).to_string(), "inf");
-        assert_eq!(Hexf(f64::INFINITY).to_string(), "inf");
+        assert_eq!(Hex(f32::INFINITY).to_string(), "+inf");
+        assert_eq!(Hex(f64::INFINITY).to_string(), "+inf");
 
-        assert_eq!(Hexf(f32::NEG_INFINITY).to_string(), "-inf");
-        assert_eq!(Hexf(f64::NEG_INFINITY).to_string(), "-inf");
+        assert_eq!(Hex(f32::NEG_INFINITY).to_string(), "-inf");
+        assert_eq!(Hex(f64::NEG_INFINITY).to_string(), "-inf");
 
         #[cfg(f16_enabled)]
         {
-            assert_eq!(Hexf(f16::MAX).to_string(), "0x1.ffcp+15");
-            assert_eq!(Hexf(f16::MIN).to_string(), "-0x1.ffcp+15");
-            assert_eq!(Hexf(f16::ZERO).to_string(), "0x0p+0");
-            assert_eq!(Hexf(f16::NEG_ZERO).to_string(), "-0x0p+0");
-            assert_eq!(Hexf(f16::NAN).to_string(), "qNaN");
-            assert_eq!(Hexf(f16::SNAN).to_string(), "sNaN");
-            assert_eq!(Hexf(f16::NEG_NAN).to_string(), "-qNaN");
-            assert_eq!(Hexf(f16::INFINITY).to_string(), "inf");
-            assert_eq!(Hexf(f16::NEG_INFINITY).to_string(), "-inf");
+            assert_eq!(Hex(f16::MAX).to_string(), "+0x1.ffcp+15");
+            assert_eq!(Hex(f16::MIN).to_string(), "-0x1.ffcp+15");
+            assert_eq!(Hex(f16::ZERO).to_string(), "+0x0p+0");
+            assert_eq!(Hex(f16::NEG_ZERO).to_string(), "-0x0p+0");
+
+            assert_eq!(Hex(f16::NAN).to_string(), "+qNaN(0x000)");
+            assert_eq!(Hex(f16::SNAN).to_string(), "+sNaN(0x100)");
+            assert_eq!(Hex(f16::NEG_NAN).to_string(), "-qNaN(0x000)");
+            assert_eq!(Hex(f16::NEG_SNAN).to_string(), "-sNaN(0x100)");
+            assert_eq!(Hex(f16::from_bits(u16::MAX)).to_string(), "-qNaN(0x1ff)");
+
+            assert_eq!(Hex(f16::INFINITY).to_string(), "+inf");
+            assert_eq!(Hex(f16::NEG_INFINITY).to_string(), "-inf");
         }
 
         #[cfg(f128_enabled)]
         {
             assert_eq!(
-                Hexf(f128::MAX).to_string(),
-                "0x1.ffffffffffffffffffffffffffffp+16383"
+                Hex(f128::MAX).to_string(),
+                "+0x1.ffffffffffffffffffffffffffffp+16383"
             );
             assert_eq!(
-                Hexf(f128::MIN).to_string(),
+                Hex(f128::MIN).to_string(),
                 "-0x1.ffffffffffffffffffffffffffffp+16383"
             );
-            assert_eq!(Hexf(f128::ZERO).to_string(), "0x0p+0");
-            assert_eq!(Hexf(f128::NEG_ZERO).to_string(), "-0x0p+0");
-            assert_eq!(Hexf(f128::NAN).to_string(), "qNaN");
-            assert_eq!(Hexf(f128::SNAN).to_string(), "sNaN");
-            assert_eq!(Hexf(f128::NEG_NAN).to_string(), "-qNaN");
-            assert_eq!(Hexf(f128::INFINITY).to_string(), "inf");
-            assert_eq!(Hexf(f128::NEG_INFINITY).to_string(), "-inf");
+            assert_eq!(Hex(f128::ZERO).to_string(), "+0x0p+0");
+            assert_eq!(Hex(f128::NEG_ZERO).to_string(), "-0x0p+0");
+
+            assert_eq!(
+                Hex(f128::NAN).to_string(),
+                "+qNaN(0x0000000000000000000000000000)"
+            );
+            assert_eq!(
+                Hex(f128::SNAN).to_string(),
+                "+sNaN(0x4000000000000000000000000000)"
+            );
+            assert_eq!(
+                Hex(f128::NEG_NAN).to_string(),
+                "-qNaN(0x0000000000000000000000000000)"
+            );
+            assert_eq!(
+                Hex(f128::NEG_SNAN).to_string(),
+                "-sNaN(0x4000000000000000000000000000)"
+            );
+            assert_eq!(
+                Hex(f128::from_bits(u128::MAX)).to_string(),
+                "-qNaN(0x7fffffffffffffffffffffffffff)"
+            );
+
+            assert_eq!(Hex(f128::INFINITY).to_string(), "+inf");
+            assert_eq!(Hex(f128::NEG_INFINITY).to_string(), "-inf");
         }
     }
 }
diff --git a/library/compiler-builtins/libm/src/math/support/int_traits.rs b/library/compiler-builtins/libm/src/math/support/int_traits.rs
index 55b609affd2e6..f113f9d62d143 100644
--- a/library/compiler-builtins/libm/src/math/support/int_traits.rs
+++ b/library/compiler-builtins/libm/src/math/support/int_traits.rs
@@ -3,6 +3,8 @@ use core::{cmp, fmt, ops};
 mod narrowing_div;
 pub use narrowing_div::NarrowingDiv;
 
+use crate::support::DisplayHex;
+
 /// Minimal integer implementations needed on all integer types, including wide integers.
 #[allow(dead_code)] // Some constants are only used with tests
 pub trait MinInt:
@@ -40,6 +42,7 @@ pub trait Int:
     + fmt::Display
     + fmt::Binary
     + fmt::LowerHex
+    + DisplayHex
     + ops::AddAssign
     + ops::SubAssign
     + ops::MulAssign
diff --git a/library/compiler-builtins/libm/src/math/support/macros.rs b/library/compiler-builtins/libm/src/math/support/macros.rs
index 550d2e92eb7c5..f1bfa21eec7f7 100644
--- a/library/compiler-builtins/libm/src/math/support/macros.rs
+++ b/library/compiler-builtins/libm/src/math/support/macros.rs
@@ -1,42 +1,67 @@
 /// `libm` cannot have dependencies, so this is vendored directly from the `cfg-if` crate
 /// (with some comments stripped for compactness).
 macro_rules! cfg_if {
-    // match if/else chains with a final `else`
-    ($(
-        if #[cfg($meta:meta)] { $($tokens:tt)* }
-    ) else * else {
-        $($tokens2:tt)*
-    }) => {
-        cfg_if! { @__items () ; $( ( ($meta) ($($tokens)*) ), )* ( () ($($tokens2)*) ), }
-    };
-
-    // match if/else chains lacking a final `else`
     (
-        if #[cfg($i_met:meta)] { $($i_tokens:tt)* }
-        $( else if #[cfg($e_met:meta)] { $($e_tokens:tt)* } )*
+        if #[cfg( $($i_meta:tt)+ )] { $( $i_tokens:tt )* }
+        $(
+            else if #[cfg( $($ei_meta:tt)+ )] { $( $ei_tokens:tt )* }
+        )*
+        $(
+            else { $( $e_tokens:tt )* }
+        )?
     ) => {
         cfg_if! {
-            @__items
-            () ;
-            ( ($i_met) ($($i_tokens)*) ),
-            $( ( ($e_met) ($($e_tokens)*) ), )*
-            ( () () ),
+            @__items () ;
+            (( $($i_meta)+ ) ( $( $i_tokens )* )),
+            $(
+                (( $($ei_meta)+ ) ( $( $ei_tokens )* )),
+            )*
+            $(
+                (() ( $( $e_tokens )* )),
+            )?
         }
     };
 
     // Internal and recursive macro to emit all the items
     //
-    // Collects all the negated cfgs in a list at the beginning and after the
-    // semicolon is all the remaining items
-    (@__items ($($not:meta,)*) ; ) => {};
-    (@__items ($($not:meta,)*) ; ( ($($m:meta),*) ($($tokens:tt)*) ), $($rest:tt)*) => {
-        #[cfg(all($($m,)* not(any($($not),*))))] cfg_if! { @__identity $($tokens)* }
-        cfg_if! { @__items ($($not,)* $($m,)*) ; $($rest)* }
+    // Collects all the previous cfgs in a list at the beginning, so they can be
+    // negated. After the semicolon are all the remaining items.
+    (@__items ( $( ($($_:tt)*) , )* ) ; ) => {};
+    (
+        @__items ( $( ($($no:tt)+) , )* ) ;
+        (( $( $($yes:tt)+ )? ) ( $( $tokens:tt )* )),
+        $( $rest:tt , )*
+    ) => {
+        // Emit all items within one block, applying an appropriate #[cfg]. The
+        // #[cfg] will require all `$yes` matchers specified and must also negate
+        // all previous matchers.
+        #[cfg(all(
+            $( $($yes)+ , )?
+            not(any( $( $($no)+ ),* ))
+        ))]
+        // Subtle: You might think we could put `$( $tokens )*` here. But if
+        // that contains multiple items then the `#[cfg(all(..))]` above would
+        // only apply to the first one. By wrapping `$( $tokens )*` in this
+        // macro call, we temporarily group the items into a single thing (the
+        // macro call) that will be included/excluded by the `#[cfg(all(..))]`
+        // as appropriate. If the `#[cfg(all(..))]` succeeds, the macro call
+        // will be included, and then evaluated, producing `$( $tokens )*`. See
+        // also the "issue #90" test below.
+        cfg_if! { @__temp_group $( $tokens )* }
+
+        // Recurse to emit all other items in `$rest`, and when we do so add all
+        // our `$yes` matchers to the list of `$no` matchers as future emissions
+        // will have to negate everything we just matched as well.
+        cfg_if! {
+            @__items ( $( ($($no)+) , )* $( ($($yes)+) , )? ) ;
+            $( $rest , )*
+        }
     };
 
-    // Internal macro to make __apply work out right for different match types,
-    // because of how macros matching/expand stuff.
-    (@__identity $($tokens:tt)*) => { $($tokens)* };
+    // See the "Subtle" comment above.
+    (@__temp_group $( $tokens:tt )* ) => {
+        $( $tokens )*
+    };
 }
 
 /// Choose between using an arch-specific implementation and the function body. Returns directly
@@ -46,7 +71,7 @@ macro_rules! cfg_if {
 /// These live in the `math::arch::some_target_arch` module.
 ///
 /// Specify a `use_arch_required` meta field if something architecture-specific must be used
-/// regardless of feature configuration (`force-soft-floats`).
+/// regardless of feature configuration (`arch`).
 ///
 /// The passed meta options do not need to account for the `arch` target feature.
 macro_rules! select_implementation {
@@ -71,8 +96,8 @@ macro_rules! select_implementation {
             }
         }
 
-        // By default, never use arch-specific implementations if we have force-soft-floats
-        #[cfg(arch_enabled)]
+        // By default, never use arch-specific implementations if `arch` is disabled.
+        #[cfg(feature = "arch")]
         select_implementation! {
             @cfg $($use_arch)?;
             // Wrap in `if true` to avoid unused warnings
@@ -146,9 +171,9 @@ macro_rules! assert_biteq {
             "{}\nl: {l:?} ({lb:#0width$x} {lh})\nr: {r:?} ({rb:#0width$x} {rh})",
             format_args!($($tt)*),
             lb = l.to_bits(),
-            lh = $crate::support::Hexf(l),
+            lh = $crate::support::Hex(l),
             rb = r.to_bits(),
-            rh = $crate::support::Hexf(r),
+            rh = $crate::support::Hex(r),
             width = ((bits / 4) + 2) as usize,
 
         );
diff --git a/library/compiler-builtins/libm/src/math/support/mod.rs b/library/compiler-builtins/libm/src/math/support/mod.rs
index 9dc872cdc1506..8bca2e60c7df8 100644
--- a/library/compiler-builtins/libm/src/math/support/mod.rs
+++ b/library/compiler-builtins/libm/src/math/support/mod.rs
@@ -17,10 +17,8 @@ pub use big::{i256, u256};
 pub(crate) use cfg_if;
 pub use env::{FpResult, Round, Status};
 #[allow(unused_imports)]
-pub use float_traits::{DFloat, Float, HFloat, IntTy};
+pub use float_traits::{DFloat, Float, HFloat, HalfRep, IntTy};
 pub(crate) use float_traits::{f32_from_bits, f64_from_bits};
-#[cfg(any(test, feature = "unstable-public-internals"))]
-pub use hex_float::Hexf;
 #[cfg(f16_enabled)]
 #[allow(unused_imports)]
 pub use hex_float::hf16;
@@ -28,7 +26,7 @@ pub use hex_float::hf16;
 #[allow(unused_imports)]
 pub use hex_float::hf128;
 #[allow(unused_imports)]
-pub use hex_float::{hf32, hf64};
+pub use hex_float::{DisplayHex, Hex, hf32, hf64};
 pub use int_traits::{CastFrom, CastInto, DInt, HInt, Int, MinInt, NarrowingDiv};
 pub use modular::linear_mul_reduction;
 
@@ -70,3 +68,10 @@ pub unsafe fn unchecked_div_isize(x: isize, y: isize) -> isize {
         }
     }
 }
+
+// FIXME(msrv): `div_ceil` is stablein 1.73.
+pub fn div_ceil_u32(a: u32, b: u32) -> u32 {
+    let d = a / b;
+    let r = a % b;
+    if r > 0 { d + 1 } else { d }
+}
diff --git a/library/compiler-builtins/libm/src/math/trunc.rs b/library/compiler-builtins/libm/src/math/trunc.rs
index 20d52a111a120..9eb902ea578f8 100644
--- a/library/compiler-builtins/libm/src/math/trunc.rs
+++ b/library/compiler-builtins/libm/src/math/trunc.rs
@@ -1,10 +1,12 @@
+use super::generic;
+
 /// Rounds the number toward 0 to the closest integral value (f16).
 ///
 /// This effectively removes the decimal part of the number, leaving the integral part.
 #[cfg(f16_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf16(x: f16) -> f16 {
-    super::generic::trunc(x)
+    generic::trunc_status(x).val
 }
 
 /// Rounds the number toward 0 to the closest integral value (f32).
@@ -18,7 +20,7 @@ pub fn truncf(x: f32) -> f32 {
         args: x,
     }
 
-    super::generic::trunc(x)
+    generic::trunc_status(x).val
 }
 
 /// Rounds the number toward 0 to the closest integral value (f64).
@@ -32,7 +34,7 @@ pub fn trunc(x: f64) -> f64 {
         args: x,
     }
 
-    super::generic::trunc(x)
+    generic::trunc_status(x).val
 }
 
 /// Rounds the number toward 0 to the closest integral value (f128).
@@ -41,13 +43,105 @@ pub fn trunc(x: f64) -> f64 {
 #[cfg(f128_enabled)]
 #[cfg_attr(assert_no_panic, no_panic::no_panic)]
 pub fn truncf128(x: f128) -> f128 {
-    super::generic::trunc(x)
+    generic::trunc_status(x).val
 }
 
 #[cfg(test)]
 mod tests {
+    use super::*;
+    use crate::support::{Float, FpResult, Hex, Status};
+
+    macro_rules! cases {
+        ($f:ty) => {
+            [
+                // roundtrip
+                (0.0, 0.0, Status::OK),
+                (-0.0, -0.0, Status::OK),
+                (1.0, 1.0, Status::OK),
+                (-1.0, -1.0, Status::OK),
+                (<$f>::INFINITY, <$f>::INFINITY, Status::OK),
+                (<$f>::NEG_INFINITY, <$f>::NEG_INFINITY, Status::OK),
+                // with rounding
+                (0.1, 0.0, Status::INEXACT),
+                (-0.1, -0.0, Status::INEXACT),
+                (0.5, 0.0, Status::INEXACT),
+                (-0.5, -0.0, Status::INEXACT),
+                (0.9, 0.0, Status::INEXACT),
+                (-0.9, -0.0, Status::INEXACT),
+                (1.1, 1.0, Status::INEXACT),
+                (-1.1, -1.0, Status::INEXACT),
+                (1.5, 1.0, Status::INEXACT),
+                (-1.5, -1.0, Status::INEXACT),
+                (1.9, 1.0, Status::INEXACT),
+                (-1.9, -1.0, Status::INEXACT),
+            ]
+        };
+    }
+
+    #[track_caller]
+    fn check<F: Float>(f: fn(F) -> F, cases: &[(F, F, Status)]) {
+        for &(x, exp_res, exp_stat) in cases {
+            let FpResult { val, status } = generic::trunc_status(x);
+            assert_biteq!(val, exp_res, "generic::trunc_status({x:?}) ({})", Hex(x));
+            assert_eq!(
+                status,
+                exp_stat,
+                "{x:?} {} -> {exp_res:?} {}",
+                Hex(x),
+                Hex(exp_res)
+            );
+            let val = f(x);
+            assert_biteq!(val, exp_res, "trunc({x:?}) ({})", Hex(x));
+        }
+    }
+
+    #[test]
+    #[cfg(f16_enabled)]
+    fn check_f16() {
+        check::<f16>(truncf16, &cases!(f16));
+        check::<f16>(
+            truncf16,
+            &[
+                (hf16!("0x1p10"), hf16!("0x1p10"), Status::OK),
+                (hf16!("-0x1p10"), hf16!("-0x1p10"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f32() {
+        check::<f32>(truncf, &cases!(f32));
+        check::<f32>(
+            truncf,
+            &[
+                (hf32!("0x1p23"), hf32!("0x1p23"), Status::OK),
+                (hf32!("-0x1p23"), hf32!("-0x1p23"), Status::OK),
+            ],
+        );
+    }
+
+    #[test]
+    fn check_f64() {
+        check::<f64>(trunc, &cases!(f64));
+        check::<f64>(
+            trunc,
+            &[
+                (hf64!("0x1p52"), hf64!("0x1p52"), Status::OK),
+                (hf64!("-0x1p52"), hf64!("-0x1p52"), Status::OK),
+            ],
+        );
+    }
+
     #[test]
-    fn sanity_check() {
-        assert_eq!(super::truncf(1.1), 1.0);
+    #[cfg(f128_enabled)]
+    fn check_f128() {
+        check::<f128>(truncf128, &cases!(f128));
+        check::<f128>(
+            truncf128,
+            &[
+                (hf128!("0x1p112"), hf128!("0x1p112"), Status::OK),
+                (hf128!("-0x1p112"), hf128!("-0x1p112"), Status::OK),
+            ],
+        );
     }
 }
diff --git a/library/compiler-builtins/rust-version b/library/compiler-builtins/rust-version
index aa3876b14a221..8ce8152f91161 100644
--- a/library/compiler-builtins/rust-version
+++ b/library/compiler-builtins/rust-version
@@ -1 +1 @@
-db3e99bbab28c6ca778b13222becdea54533d908
+d0442e2800d356ae282ddcdbe0eff8798fe648b6