diff --git a/.github/workflows/build_binaries.json b/.github/workflows/build_binaries.json index 38509c9..6b04289 100644 --- a/.github/workflows/build_binaries.json +++ b/.github/workflows/build_binaries.json @@ -13,7 +13,7 @@ "rust": "nightly-2024-07-07", "target": "x86_64-unknown-linux-gnu", "cross": false, - "features": "opencl3" + "features": "opencl" }, { "name": "opencl-linux-arm64", @@ -21,7 +21,7 @@ "rust": "nightly-2024-07-07", "target": "aarch64-unknown-linux-gnu", "cross": true, - "features": "opencl3", + "features": "opencl", "build_enabled": true, "best_effort": true }, @@ -31,7 +31,25 @@ "rust": "nightly-2024-07-07", "target": "riscv64gc-unknown-linux-gnu", "cross": true, - "features": "opencl3", + "features": "opencl", + "build_enabled": true, + "best_effort": true + }, + { + "name": "combined-linux-x86_64", + "runs-on": "ubuntu-20.04", + "rust": "nightly-2024-07-07", + "target": "x86_64-unknown-linux-gnu", + "cross": false, + "features": "nvidia,opencl" + }, + { + "name": "combined-linux-arm64", + "runs-on": "ubuntu-latest", + "rust": "nightly-2024-07-07", + "target": "aarch64-unknown-linux-gnu", + "cross": true, + "features": "nvidia,opencl", "build_enabled": true, "best_effort": true }, @@ -41,15 +59,31 @@ "rust": "stable", "target": "x86_64-apple-darwin", "cross": false, - "features": "opencl3" + "features": "opencl" + }, + { + "name": "metal-macos-arm64", + "runs-on": "macos-14", + "rust": "stable", + "target": "aarch64-apple-darwin", + "cross": false, + "features": "metal" + }, + { + "name": "combined-macos-x86_64", + "runs-on": "macos-13", + "rust": "stable", + "target": "x86_64-apple-darwin", + "cross": false, + "features": "metal,opencl" }, { - "name": "opencl-macos-arm64", + "name": "combined-macos-arm64", "runs-on": "macos-14", "rust": "stable", "target": "aarch64-apple-darwin", "cross": false, - "features": "opencl3" + "features": "metal,opencl" }, { "name": "cuda-windows-x64", @@ -66,7 +100,7 @@ "target": "x86_64-pc-windows-msvc", "rustflags": "-L C:/vcpkg/packages/opencl_x64-windows/lib", "cross": false, - "features": "opencl3" + "features": "opencl" }, { "name": "cuda-windows-arm64", @@ -85,8 +119,28 @@ "target": "aarch64-pc-windows-msvc", "rustflags": "-L C:/vcpkg/packages/opencl_x64-windows/lib", "cross": false, - "features": "opencl3", + "features": "opencl", + "build_enabled": true, + "best_effort": true + }, + { + "name": "combined-windows-x64", + "runs-on": "windows-2019", + "rust": "stable", + "target": "x86_64-pc-windows-msvc", + "rustflags": "-L C:/vcpkg/packages/opencl_x64-windows/lib", + "cross": false, + "features": "opencl,nvidia" + }, + { + "name": "combined-windows-arm64", + "runs-on": "windows-latest", + "rust": "stable", + "target": "aarch64-pc-windows-msvc", + "rustflags": "-L C:/vcpkg/packages/opencl_x64-windows/lib", + "cross": false, + "features": "opencl,nvidia", "build_enabled": true, "best_effort": true } -] +] \ No newline at end of file diff --git a/.github/workflows/build_binaries.yml b/.github/workflows/build_binaries.yml index b26ed67..c04a408 100644 --- a/.github/workflows/build_binaries.yml +++ b/.github/workflows/build_binaries.yml @@ -156,7 +156,7 @@ jobs: - name: Install nVida cuda toolkit uses: jimver/cuda-toolkit@v0.2.16 - if: ${{ matrix.builds.features == 'nvidia' }} + if: contains(matrix.builds.features, 'nvidia') with: method: network sub-packages: '["nvcc", "cudart"]' @@ -170,7 +170,7 @@ jobs: protobuf-compiler - name: Install Linux dependencies - Ubuntu / OpenCL - if: ${{ startsWith(runner.os,'Linux') && ( ! matrix.builds.cross ) && ( matrix.builds.features == 'opencl3' ) }} + if: ${{ startsWith(runner.os,'Linux') && ( ! matrix.builds.cross ) && contains(matrix.builds.features, 'opencl') }} run: | sudo apt-get update sudo apt-get install --no-install-recommends --assume-yes \ diff --git a/.gitignore b/.gitignore index a1df0b4..bd04afe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,11 @@ /target config.json data +xtrgpuminer/log/xtrgpuminer.log +CUDA_gpu_status.json +OpenCL_gpu_status.json +Metal_gpu_status.json +panic.log # Ignore OS files .DS_Store diff --git a/Cargo.lock b/Cargo.lock index 15c152c..dc0ebdd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -178,7 +178,7 @@ checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -200,18 +200,18 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "async-trait" -version = "0.1.85" +version = "0.1.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f934833b4b7233644e5848f235df3f57ed8c80f1528a26c3dfa13d2147fa056" +checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -234,7 +234,7 @@ checksum = "edca88bc138befd0323b20752846e6587272d3b03b0343c8ea28a6f819e6e71f" dependencies = [ "async-trait", "axum-core", - "bytes 1.9.0", + "bytes 1.10.0", "futures-util", "http", "http-body", @@ -267,7 +267,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09f2bd6146b97ae3359fa0cc6d6b376d9539582c7b4220f041a33ec24c226199" dependencies = [ "async-trait", - "bytes 1.9.0", + "bytes 1.10.0", "futures-util", "http", "http-body", @@ -399,6 +399,12 @@ dependencies = [ "digest", ] +[[package]] +name = "block" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" + [[package]] name = "block-buffer" version = "0.10.4" @@ -428,7 +434,7 @@ dependencies = [ "proc-macro-crate 3.2.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -478,18 +484,18 @@ checksum = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38" [[package]] name = "bytes" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "325918d6fe32f23b19878fe4b34794ae41fc19ddbe53b10571a4874d44ffd39b" +checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9" dependencies = [ "serde", ] [[package]] name = "cc" -version = "1.2.10" +version = "1.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13208fcbb66eaeffe09b99fffbe1af420f00a7b35aa99ad683dfc1aa76145229" +checksum = "e4730490333d58093109dc02c23174c3f4d490998c3fed3cc8e82d57afedb9cf" dependencies = [ "shlex", ] @@ -608,9 +614,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.27" +version = "4.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "769b0145982b4b48713e01ec42d61614425f27b7058bda7180a3a41f30104796" +checksum = "3e77c3243bd94243c03672cb5154667347c457ca271254724f9f393aee1c05ff" dependencies = [ "clap_builder", "clap_derive", @@ -630,14 +636,14 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.24" +version = "4.5.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b755194d6389280185988721fffba69495eed5ee9feeee9a599b53db80318c" +checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed" dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -665,23 +671,13 @@ dependencies = [ ] [[package]] -name = "const_format" -version = "0.2.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "126f97965c8ad46d6d9163268ff28432e8f6a1196a55578867832e3049df63dd" -dependencies = [ - "const_format_proc_macros", -] - -[[package]] -name = "const_format_proc_macros" -version = "0.2.34" +name = "core-foundation" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d57c2eccfb16dbac1f4e61e206105db5820c9d26c3c472bc17c774259ef7744" +checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" dependencies = [ - "proc-macro2", - "quote", - "unicode-xid", + "core-foundation-sys", + "libc", ] [[package]] @@ -690,6 +686,17 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core-graphics-types" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45390e6114f68f718cc7a830514a96f903cccd70d02a8f6d9f643ac4ba45afaf" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "libc", +] + [[package]] name = "core2" version = "0.4.0" @@ -775,7 +782,7 @@ checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -845,7 +852,7 @@ dependencies = [ "proc-macro2", "quote", "strsim", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -856,7 +863,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -902,9 +909,9 @@ checksum = "3c877555693c14d2f84191cfd3ad8582790fc52b5e2274b40b59cf5f5cea25c7" [[package]] name = "diesel" -version = "2.2.6" +version = "2.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12" +checksum = "04001f23ba8843dc315804fa324000376084dfb1c30794ff68dd279e6e5696d5" dependencies = [ "bigdecimal", "chrono", @@ -928,7 +935,7 @@ dependencies = [ "dsl_auto_type", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -948,7 +955,7 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25" dependencies = [ - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -991,7 +998,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1002,16 +1009,16 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dsl_auto_type" -version = "0.1.2" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5d9abe6314103864cc2d8901b7ae224e0ab1a103a0a416661b4097b0779b607" +checksum = "139ae9aca7527f85f26dd76483eb38533fd84bd571065da1739656ef71c5ff5b" dependencies = [ "darling", "either", "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1035,7 +1042,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1115,7 +1122,28 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" dependencies = [ - "foreign-types-shared", + "foreign-types-shared 0.1.1", +] + +[[package]] +name = "foreign-types" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d737d9aa519fb7b749cbc3b962edcf310a8dd1f4b67c91c4f83975dbdd17d965" +dependencies = [ + "foreign-types-macros", + "foreign-types-shared 0.3.1", +] + +[[package]] +name = "foreign-types-macros" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.98", ] [[package]] @@ -1124,6 +1152,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" +[[package]] +name = "foreign-types-shared" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa9a19cbb55df58761df49b23516a86d432839add4af60fc256da840f66ed35b" + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -1211,7 +1245,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1333,7 +1367,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ccae279728d634d083c00f6099cb58f01cc99c145b84b8be2f6c74618d79922e" dependencies = [ "atomic-waker", - "bytes 1.9.0", + "bytes 1.10.0", "fnv", "futures-core", "futures-sink", @@ -1498,7 +1532,7 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "fnv", "itoa", ] @@ -1509,7 +1543,7 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "http", ] @@ -1519,7 +1553,7 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "futures-util", "http", "http-body", @@ -1550,7 +1584,7 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "futures-channel", "futures-util", "h2", @@ -1584,7 +1618,7 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "futures-channel", "futures-util", "http", @@ -1735,7 +1769,7 @@ checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -1801,7 +1835,7 @@ checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2056,6 +2090,15 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "malloc_buf" +version = "0.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb907fe88d54d8d9ce32a3cceab4218ed2f6b7d35617cafe9adf84e43919cb" +dependencies = [ + "libc", +] + [[package]] name = "match_cfg" version = "0.1.0" @@ -2086,6 +2129,21 @@ dependencies = [ "zeroize", ] +[[package]] +name = "metal" +version = "0.31.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f569fb946490b5743ad69813cb19629130ce9374034abe31614a36402d18f99e" +dependencies = [ + "bitflags 2.8.0", + "block", + "core-graphics-types", + "foreign-types 0.5.0", + "log", + "objc", + "paste", +] + [[package]] name = "migrations_internals" version = "2.2.0" @@ -2310,7 +2368,7 @@ checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2342,6 +2400,15 @@ dependencies = [ "libm", ] +[[package]] +name = "objc" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "915b1b472bc21c53464d6c8461c9d3af805ba1ef837e1cac254428f4a77177b1" +dependencies = [ + "malloc_buf", +] + [[package]] name = "object" version = "0.36.7" @@ -2388,13 +2455,13 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.69" +version = "0.10.70" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f5e534d133a060a3c19daec1eb3e98ec6f4685978834f2dbadfe2ec215bab64e" +checksum = "61cfb4e166a8bb8c9b55c500bc2308550148ece889be90f609377e58140f42c6" dependencies = [ "bitflags 2.8.0", "cfg-if", - "foreign-types", + "foreign-types 0.3.2", "libc", "once_cell", "openssl-macros", @@ -2409,14 +2476,14 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] name = "openssl-sys" -version = "0.9.104" +version = "0.9.105" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" +checksum = "8b22d5b84be05a8d6947c7cb71f7c849aa0f112acd4bf51c2a7c1c988ac0a9dc" dependencies = [ "cc", "libc", @@ -2435,30 +2502,28 @@ dependencies = [ [[package]] name = "parity-scale-codec" -version = "3.7.2" +version = "3.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b91c2d9a6a6004e205b7e881856fb1a0f5022d382acc2c01b52185f7b6f65997" +checksum = "306800abfa29c7f16596b5970a588435e3d5b3149683d00c12b699cc19f895ee" dependencies = [ "arrayvec", "bitvec", "byte-slice-cast", - "const_format", "impl-trait-for-tuples", "parity-scale-codec-derive", - "rustversion", "serde", ] [[package]] name = "parity-scale-codec-derive" -version = "3.7.2" +version = "3.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77555fd9d578b6470470463fded832619a5fec5ad6cbc551fe4d7507ce50cd3a" +checksum = "d830939c76d294956402033aee57a6da7b438f2294eb94864c37b0569053a42c" dependencies = [ "proc-macro-crate 3.2.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 1.0.109", ] [[package]] @@ -2495,6 +2560,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "path-clean" version = "0.1.0" @@ -2544,11 +2615,11 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e2ec53ad785f4d35dac0adea7f7dc6f1bb277ad84a680c7afefeae05d1f5916" +checksum = "dfe2e71e1471fe07709406bf725f710b02927c9c54b2b5b2ec0e8087d97c327d" dependencies = [ - "pin-project-internal 1.1.8", + "pin-project-internal 1.1.9", ] [[package]] @@ -2564,13 +2635,13 @@ dependencies = [ [[package]] name = "pin-project-internal" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d56a66c0c55993aa927429d0f8a0abfd74f084e4d9c192cffed01e418d83eefb" +checksum = "f6e859e6e5bd50440ab63c47e3ebabc90f26251f7c73c3d3e837b74a1cc3fa67" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2652,7 +2723,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac" dependencies = [ "proc-macro2", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -2725,7 +2796,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "prost-derive 0.11.9", ] @@ -2735,7 +2806,7 @@ version = "0.13.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c0fef6c4230e4ccf618a35c59d7ede15dea37de8427500f50aff708806e42ec" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "prost-derive 0.13.4", ] @@ -2745,7 +2816,7 @@ version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "heck 0.4.1", "itertools 0.10.5", "lazy_static", @@ -2777,7 +2848,7 @@ dependencies = [ "prost 0.13.4", "prost-types 0.13.4", "regex", - "syn 2.0.96", + "syn 2.0.98", "tempfile", ] @@ -2804,7 +2875,7 @@ dependencies = [ "itertools 0.13.0", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3030,9 +3101,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.21" +version = "0.23.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" +checksum = "9fb9263ab4eb695e42321db096e3b8fbd715a59b154d5c88d82db2175b681ba7" dependencies = [ "log", "once_cell", @@ -3105,7 +3176,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3141,7 +3212,7 @@ checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3174,7 +3245,7 @@ checksum = "6c64451ba24fc7a6a2d60fc75dd9c83c90903b19028d4eff35e88fc1e86564e9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3410,9 +3481,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.96" +version = "2.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5d0adab1ae378d7f53bdebc67a39f1f151407ef230f0ce2883572f5d8985c80" +checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" dependencies = [ "proc-macro2", "quote", @@ -3445,7 +3516,7 @@ checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -3558,7 +3629,7 @@ dependencies = [ "async-trait", "bitflags 2.8.0", "blake2", - "bytes 1.9.0", + "bytes 1.10.0", "chrono", "cidr", "data-encoding", @@ -3570,7 +3641,7 @@ dependencies = [ "multiaddr", "nom", "once_cell", - "pin-project 1.1.8", + "pin-project 1.1.9", "prost 0.13.4", "rand", "serde", @@ -3983,7 +4054,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4088,7 +4159,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d61fa4ffa3de412bfea335c6ecff681de2b609ba3c77ef3e00e521813a9ed9e" dependencies = [ "backtrace", - "bytes 1.9.0", + "bytes 1.10.0", "libc", "mio", "parking_lot", @@ -4107,7 +4178,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4138,7 +4209,7 @@ version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "36943ee01a6d67977dd3f84a5a1d2efeb4ada3a1ae771cadfaa535d9d9fc6507" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "futures-core", "futures-io", "futures-sink", @@ -4153,7 +4224,7 @@ version = "0.7.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078" dependencies = [ - "bytes 1.9.0", + "bytes 1.10.0", "futures-core", "futures-sink", "pin-project-lite", @@ -4192,9 +4263,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.22" +version = "0.22.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" +checksum = "02a8b472d1a3d7c18e2d61a489aee3453fd9031c33e4f55bd533f4a7adca1bee" dependencies = [ "indexmap 2.7.1", "serde", @@ -4213,7 +4284,7 @@ dependencies = [ "async-trait", "axum", "base64 0.22.1", - "bytes 1.9.0", + "bytes 1.10.0", "h2", "http", "http-body", @@ -4222,7 +4293,7 @@ dependencies = [ "hyper-timeout", "hyper-util", "percent-encoding", - "pin-project 1.1.8", + "pin-project 1.1.9", "prost 0.13.4", "rustls-pemfile", "socket2", @@ -4246,7 +4317,7 @@ dependencies = [ "prost-build 0.13.4", "prost-types 0.13.4", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4259,7 +4330,7 @@ dependencies = [ "futures-util", "hdrhistogram", "indexmap 1.9.3", - "pin-project 1.1.8", + "pin-project 1.1.9", "pin-project-lite", "rand", "slab", @@ -4318,7 +4389,7 @@ checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4538,7 +4609,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "wasm-bindgen-shared", ] @@ -4560,7 +4631,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -4586,9 +4657,9 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.26.7" +version = "0.26.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d642ff16b7e79272ae451b7322067cdc17cadf68c23264be9d94a32319efe7e" +checksum = "2210b291f7ea53617fbafcc4939f10914214ec15aace5ba62293a668f322c5c9" dependencies = [ "rustls-pki-types", ] @@ -4792,9 +4863,9 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "winnow" -version = "0.6.25" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad699df48212c6cc6eb4435f35500ac6fd3b9913324f938aea302022ce19d310" +checksum = "86e376c75f4f43f44db463cf729e0d3acbf954d13e22c51e26e4c264b4ab545f" dependencies = [ "memchr", ] @@ -4845,11 +4916,12 @@ version = "0.2.14" dependencies = [ "anyhow", "axum", - "clap 4.5.27", + "clap 4.5.28", "cust", "libsqlite3-sys", "log", "log4rs", + "metal", "minotari_app_grpc", "num-format", "opencl-sys", @@ -4883,7 +4955,7 @@ dependencies = [ "log", "nohash-hasher", "parking_lot", - "pin-project 1.1.8", + "pin-project 1.1.9", "rand", "static_assertions", "web-time", @@ -4918,7 +4990,7 @@ checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "synstructure 0.13.1", ] @@ -4940,7 +5012,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -4960,7 +5032,7 @@ checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", "synstructure 0.13.1", ] @@ -4981,7 +5053,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] [[package]] @@ -5003,5 +5075,5 @@ checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.96", + "syn 2.0.98", ] diff --git a/Cargo.toml b/Cargo.toml index 869a286..726838e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,7 +4,6 @@ name = "xtrgpuminer" version = "0.2.14" [dependencies] -anyhow = "*" clap = { version = "4.5.0", features = ["derive"] } minotari_app_grpc = { git = "http://github.com/tari-project/tari" } num-format = "0.4.4" @@ -36,7 +35,11 @@ log4rs = "1.3.0" opencl-sys = "*" opencl3 = { version = "0.9.5", optional = true } thiserror = "1.0.63" +metal = { version = "0.31.0", optional = true } +anyhow = "1.0.95" [features] default = [] -nvidia = ["cust"] +nvidia = ["dep:cust"] +metal = ["dep:metal"] +opencl = ["dep:opencl3"] diff --git a/gpu_status.json b/gpu_status.json deleted file mode 100644 index 8380585..0000000 --- a/gpu_status.json +++ /dev/null @@ -1,12 +0,0 @@ -{ - "gpu_devices": [ - { - "device_index": 0, - "device_name": "NVIDIA GeForce GTX 1070", - "is_available": true, - "grid_size": 256, - "max_grid_size": 1024, - "block_size": 1000 - } - ] -} \ No newline at end of file diff --git a/src/cuda_engine.rs b/src/cuda_engine.rs index cf078d1..48c491a 100644 --- a/src/cuda_engine.rs +++ b/src/cuda_engine.rs @@ -8,9 +8,15 @@ use cust::{ module::{ModuleJitOption, ModuleJitOption::DetermineTargetFromContext}, prelude::{Module, *}, }; -use log::{debug, error, info, warn}; - -use crate::{context_impl::ContextImpl, gpu_status_file::GpuStatus, EngineImpl, FunctionImpl}; +use log::{debug, error, info}; + +use crate::{ + context_impl::ContextImpl, + engine_impl::EngineImpl, + function_impl::FunctionImpl, + gpu_status_file::{GpuDevice, GpuSettings, GpuStatus}, + multi_engine_wrapper::EngineType, +}; const LOG_TARGET: &str = "tari::gpuminer::cuda"; #[derive(Clone)] pub struct CudaEngine {} @@ -36,20 +42,27 @@ impl EngineImpl for CudaEngine { Ok(num_devices) } - fn detect_devices(&self) -> Result, anyhow::Error> { + fn get_engine_type(&self) -> EngineType { + EngineType::Cuda + } + + fn detect_devices(&self) -> Result, anyhow::Error> { + info!(target: LOG_TARGET, "Detect CUDA devices"); let num_devices = Device::num_devices()?; let mut total_devices = 0; let mut devices = Vec::with_capacity(num_devices as usize); for i in 0..num_devices { let device = Device::get_device(i)?; let name = device.name()?; - let mut gpu = GpuStatus { + let mut gpu = GpuDevice { device_name: name.clone(), - is_available: true, - block_size: 0, device_index: i, - grid_size: 0, - max_grid_size: device.get_attribute(DeviceAttribute::MaxGridDimX).unwrap_or_default() as u32, + settings: GpuSettings::default(), + status: GpuStatus { + recommended_block_size: 0, + recommended_grid_size: 0, + max_grid_size: device.get_attribute(DeviceAttribute::MaxGridDimX).unwrap_or_default() as u32, + }, }; if let Ok(context) = self .create_context(u32::try_from(i).unwrap()) @@ -60,8 +73,8 @@ impl EngineImpl for CudaEngine { .inspect_err(|e| error!(target: LOG_TARGET, "Could not create function {:?}", e)) { if let Ok((grid, block)) = func.suggested_launch_configuration(&(i as usize)) { - gpu.grid_size = grid; - gpu.block_size = block; + gpu.status.recommended_grid_size = grid; + gpu.status.recommended_block_size = block; } devices.push(gpu); total_devices += 1; diff --git a/src/engine_impl.rs b/src/engine_impl.rs index 61f9ce2..6c150dc 100644 --- a/src/engine_impl.rs +++ b/src/engine_impl.rs @@ -1,13 +1,23 @@ -use crate::{context_impl::ContextImpl, function_impl::FunctionImpl, gpu_status_file::GpuStatus}; +use std::any::Any; + +use crate::{ + context_impl::ContextImpl, + function_impl::FunctionImpl, + gpu_status_file::{GpuDevice, GpuStatus}, + multi_engine_wrapper::EngineType, +}; pub trait EngineImpl { - type Context: ContextImpl; - type Function: FunctionImpl; + type Context: Any; + type Function: Any; + + fn get_engine_type(&self) -> EngineType; + fn init(&mut self) -> Result<(), anyhow::Error>; fn num_devices(&self) -> Result; - fn detect_devices(&self) -> Result, anyhow::Error>; + fn detect_devices(&self) -> Result, anyhow::Error>; fn create_context(&self, device_index: u32) -> Result; diff --git a/src/gpu_engine.rs b/src/gpu_engine.rs index a81b679..bf005a9 100644 --- a/src/gpu_engine.rs +++ b/src/gpu_engine.rs @@ -1,4 +1,7 @@ -use crate::{engine_impl::EngineImpl, gpu_status_file::GpuStatus}; +use crate::{ + engine_impl::EngineImpl, + gpu_status_file::{GpuDevice, GpuStatus}, +}; #[derive(Clone)] pub struct GpuEngine { @@ -18,7 +21,7 @@ impl GpuEngine { self.inner.num_devices() } - pub fn detect_devices(&self) -> Result, anyhow::Error> { + pub fn detect_devices(&self) -> Result, anyhow::Error> { self.inner.detect_devices() } diff --git a/src/gpu_status_file.rs b/src/gpu_status_file.rs index 712d3a5..cae03d9 100644 --- a/src/gpu_status_file.rs +++ b/src/gpu_status_file.rs @@ -1,35 +1,62 @@ use std::{ + collections::HashMap, fs::File, io::BufReader, path::{Path, PathBuf}, }; -use anyhow; +use log::warn; -#[derive(serde::Deserialize, serde::Serialize, Debug)] +#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)] pub struct GpuStatus { - pub device_index: u32, - pub device_name: String, - pub is_available: bool, - pub grid_size: u32, + pub recommended_grid_size: u32, + pub recommended_block_size: u32, pub max_grid_size: u32, - pub block_size: u32, } -#[derive(serde::Deserialize, serde::Serialize, Debug)] +#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)] +pub struct GpuSettings { + pub is_excluded: bool, + pub is_available: bool, +} + +impl Default for GpuSettings { + fn default() -> Self { + Self { + is_excluded: false, + is_available: true, + } + } +} + +#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)] +pub struct GpuDevice { + pub device_name: String, + pub device_index: u32, + pub status: GpuStatus, + pub settings: GpuSettings, +} + +#[derive(serde::Deserialize, serde::Serialize, Debug, Clone)] pub struct GpuStatusFile { - pub gpu_devices: Vec, + pub gpu_devices: HashMap, } impl Default for GpuStatusFile { fn default() -> Self { - Self { gpu_devices: vec![] } + Self { + gpu_devices: HashMap::new(), + } } } impl GpuStatusFile { - pub fn new(gpu_devices: Vec) -> Self { - Self { gpu_devices } + pub fn new(gpu_devices: Vec, file_path: &PathBuf) -> Self { + let resolved_gpu_file_content = Self::resolve_settings_for_detected_devices(gpu_devices, file_path); + + Self { + gpu_devices: resolved_gpu_file_content, + } } pub fn load(path: &PathBuf) -> Result { @@ -44,4 +71,35 @@ impl GpuStatusFile { serde_json::to_writer_pretty(file, self)?; Ok(()) } + + fn resolve_settings_for_detected_devices( + gpu_devices: Vec, + file_path: &PathBuf, + ) -> HashMap { + match Self::load(file_path) { + Ok(file) => { + let mut resolved_gpu_devices = HashMap::new(); + for device in gpu_devices { + let device_name = device.device_name.clone(); + let resolved_device = match file.gpu_devices.get(&device_name) { + Some(existing_device) => { + let mut resolved_device = device.clone(); + resolved_device.settings = existing_device.settings.clone(); + resolved_device + }, + None => device, + }; + resolved_gpu_devices.insert(device_name, resolved_device); + } + resolved_gpu_devices + }, + Err(e) => { + warn!("Could not load GPU status file: {}. Using detected devices", e); + gpu_devices + .into_iter() + .map(|device| (device.device_name.clone(), device)) + .collect() + }, + } + } } diff --git a/src/main.rs b/src/main.rs index 9932dc4..9cfad4c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,4 @@ use std::{ - any, - cmp, convert::TryInto, env::current_dir, fs::{self, File}, @@ -18,22 +16,23 @@ use std::{ time::{Duration, Instant}, }; -use anyhow::{anyhow, Context as AnyContext}; +use anyhow::anyhow; use clap::Parser; #[cfg(feature = "nvidia")] use cust::{ memory::{AsyncCopyDestination, DeviceCopy}, prelude::*, }; +use gpu_status_file::GpuStatus; use http::stats_collector::{self, HashrateSample}; use log::{debug, error, info, warn}; use minotari_app_grpc::{ conversions::block, tari_rpc::{Block, BlockHeader as grpc_header, NewBlockTemplate, TransactionOutput as GrpcTransactionOutput}, }; +use multi_engine_wrapper::{EngineType, MultiEngineWrapper}; use num_format::{Locale, ToFormattedString}; -use sha3::Digest; -use tari_common::{configuration::Network, initialize_logging}; +use tari_common::configuration::Network; use tari_common_types::{tari_address::TariAddress, types::FixedHash}; use tari_core::{ blocks::BlockHeader, @@ -52,24 +51,20 @@ use tokio::{ time::sleep, }; -#[cfg(feature = "nvidia")] -use crate::cuda_engine::CudaEngine; -#[cfg(feature = "opencl3")] -use crate::opencl_engine::OpenClEngine; use crate::{ config_file::ConfigFile, engine_impl::EngineImpl, - function_impl::FunctionImpl, gpu_engine::GpuEngine, gpu_status_file::GpuStatusFile, http::{config::Config, server::HttpServer}, - node_client::{ClientType, NodeClient}, - stats_store::StatsStore, + node_client::ClientType, tari_coinbase::generate_coinbase, }; mod config_file; mod context_impl; +mod multi_engine_wrapper; + #[cfg(feature = "nvidia")] mod cuda_engine; mod engine_impl; @@ -78,12 +73,16 @@ mod gpu_engine; mod gpu_status_file; mod http; mod node_client; -#[cfg(feature = "opencl3")] + +#[cfg(feature = "opencl")] mod opencl_engine; mod p2pool_client; mod stats_store; mod tari_coinbase; +#[cfg(feature = "metal")] +mod metal_engine; + const LOG_TARGET: &str = "tari::gpuminer"; static block_template_cache: OnceLock>> = OnceLock::new(); @@ -203,14 +202,6 @@ struct Cli { #[arg(short = 'd', long, alias = "detect")] detect: Option, - /// (Optional) use only specific devices - #[arg(long, alias = "use-devices", num_args=0.., value_delimiter=',')] - use_devices: Option>, - - /// (Optional) exclude specific devices from use - #[arg(long, alias = "exclude-devices", num_args=0.., value_delimiter=',')] - exclude_devices: Option>, - /// Gpu status file path #[arg(short, long, value_name = "gpu-status")] gpu_status_file: Option, @@ -220,6 +211,9 @@ struct Cli { #[arg(long)] max_template_failures: Option, + + #[arg(long)] + engine: Option, } async fn main_inner() -> Result<(), anyhow::Error> { @@ -238,351 +232,312 @@ async fn main_inner() -> Result<(), anyhow::Error> { let submit = true; - #[cfg(not(any(feature = "nvidia", feature = "opencl3")))] + #[cfg(not(any(feature = "nvidia", feature = "opencl", feature = "metal")))] { eprintln!("No GPU engine available"); process::exit(1); } - #[cfg(feature = "nvidia")] - let mut gpu_engine = GpuEngine::new(CudaEngine::new()); + let selected_cli_engine: EngineType = match cli.engine { + Some(ref engine) => EngineType::from_string(engine), + None => { + eprintln!("No engine specified"); + process::exit(105); + }, + }; - #[cfg(feature = "opencl3")] - let mut gpu_engine = GpuEngine::new(OpenClEngine::new()); + let mut multi_engine_wrapper = MultiEngineWrapper::new(selected_cli_engine.clone()); + multi_engine_wrapper.init().expect("Could not init engine"); - #[cfg(any(feature = "nvidia", feature = "opencl3"))] - { - gpu_engine.init().unwrap(); + // http server + let mut shutdown = Shutdown::new(); - // http server - let mut shutdown = Shutdown::new(); + // just create the context to test if it can run + if let Some(_detect) = cli.detect { + let default_path = { + let path = current_dir().expect("no current directory"); + path + }; - let num_devices = gpu_engine.num_devices()?; + let mut engines_that_detected_any_device: Vec = + multi_engine_wrapper.create_status_files_for_each_engine(cli.gpu_status_file.unwrap_or(default_path)); - // just create the context to test if it can run - if let Some(_detect) = cli.detect { - let gpu = gpu_engine.clone(); + if engines_that_detected_any_device.is_empty() { + eprintln!("No GPU devices detected"); + process::exit(1); + } - let gpu_devices = match gpu.detect_devices() { - Ok(gpu_stats) => gpu_stats, - Err(error) => { - warn!(target: LOG_TARGET, "No gpu device detected"); - return Err(anyhow::anyhow!("Gpu detect error: {:?}", error)); - }, - }; + return Ok(()); + } - let any_gpu_available = gpu_devices.iter().any(|g| g.is_available); - let status_file = GpuStatusFile::new(gpu_devices); - let default_path = { + let mut config = match ConfigFile::load(&cli.config.as_ref().cloned().unwrap_or_else(|| { + let mut path = current_dir().expect("no current directory"); + path.push("config.json"); + path + })) { + Ok(config) => { + info!(target: LOG_TARGET, "Config file loaded successfully"); + config + }, + Err(err) => { + eprintln!("Error loading config file: {}. Creating new one", err); + let default = ConfigFile::default(); + let path = cli.config.unwrap_or_else(|| { let mut path = current_dir().expect("no current directory"); - path.push("gpu_status.json"); + path.push("config.json"); path - }; - let path = cli.gpu_status_file.unwrap_or_else(|| default_path.clone()); - - let _ = match GpuStatusFile::load(&path) { - Ok(_) => { - if let Err(err) = status_file.save(&path) { - warn!(target: LOG_TARGET,"Error saving gpu status: {}", err); - } - status_file - }, - Err(err) => { - if let Err(err) = fs::create_dir_all(path.parent().expect("no parent")) { - warn!(target: LOG_TARGET, "Error creating directory: {}", err); - } - if let Err(err) = status_file.save(&path) { - warn!(target: LOG_TARGET,"Error saving gpu status: {}", err); - } - status_file - }, - }; - - if any_gpu_available { - return Ok(()); - } - return Err(anyhow::anyhow!("No available gpu device detected")); - } - - let mut config = match ConfigFile::load(&cli.config.as_ref().cloned().unwrap_or_else(|| { - let mut path = current_dir().expect("no current directory"); - path.push("config.json"); - path - })) { - Ok(config) => { - info!(target: LOG_TARGET, "Config file loaded successfully"); - config - }, - Err(err) => { - eprintln!("Error loading config file: {}. Creating new one", err); - let default = ConfigFile::default(); - let path = cli.config.unwrap_or_else(|| { - let mut path = current_dir().expect("no current directory"); - path.push("config.json"); - path - }); - dbg!(&path); - fs::create_dir_all(path.parent().expect("no parent"))?; - default.save(&path).expect("Could not save default config"); - default - }, - }; - - if let Some(ref addr) = cli.tari_address { - config.tari_address = addr.clone(); - } - if let Some(ref url) = cli.tari_node_url { - config.tari_node_url = url.clone(); - } - if cli.p2pool_enabled { - config.p2pool_enabled = true; - } - if let Some(enabled) = cli.http_server_enabled { - config.http_server_enabled = enabled; - } - if let Some(port) = cli.http_server_port { - config.http_server_port = port; - } - if let Some(block_size) = cli.block_size { - config.block_size = block_size; - } - if let Some(grid_size) = cli.grid_size { - let sizes: Vec = grid_size.split(',').map(|s| s.parse::().unwrap()).collect(); - if sizes.len() == 1 { - config.single_grid_size = sizes[0]; - } else { - config.per_device_grid_sizes = sizes; - } - } - if let Some(coinbase_extra) = cli.coinbase_extra { - config.coinbase_extra = coinbase_extra; - } + }); + dbg!(&path); + fs::create_dir_all(path.parent().expect("no parent"))?; + default.save(&path).expect("Could not save default config"); + default + }, + }; - if let Some(template_timeout) = cli.template_timeout_secs { - config.template_timeout_secs = template_timeout; + let default_gpu_status_path = { + let path = current_dir().expect("no current directory"); + path + }; + let mut gpu_status_path = cli.gpu_status_file.clone().unwrap_or(default_gpu_status_path); + let gpu_status_file_name = format!("{}_gpu_status.json", selected_cli_engine.to_string()); + gpu_status_path.push(gpu_status_file_name); + + let gpu_status_file = GpuStatusFile::load(&gpu_status_path).unwrap_or_else(|_| { + let default = GpuStatusFile::default(); + default + .save(&gpu_status_path) + .expect("Could not save default gpu status"); + default + }); + + if let Some(ref addr) = cli.tari_address { + config.tari_address = addr.clone(); + } + if let Some(ref url) = cli.tari_node_url { + config.tari_node_url = url.clone(); + } + if cli.p2pool_enabled { + config.p2pool_enabled = true; + } + if let Some(enabled) = cli.http_server_enabled { + config.http_server_enabled = enabled; + } + if let Some(port) = cli.http_server_port { + config.http_server_port = port; + } + if let Some(block_size) = cli.block_size { + config.block_size = block_size; + } + if let Some(grid_size) = cli.grid_size { + let sizes: Vec = grid_size.split(',').map(|s| s.parse::().unwrap()).collect(); + if sizes.len() == 1 { + config.single_grid_size = sizes[0]; + } else { + config.per_device_grid_sizes = sizes; } - // create a list of devices (by index) to use - let devices_to_use: Vec = (0..num_devices) - .filter(|x| { - if let Some(use_devices) = &cli.use_devices { - use_devices.contains(x) - } else { - true - } - }) - .filter(|x| { - if let Some(excluded_devices) = &cli.exclude_devices { - !excluded_devices.contains(x) - } else { - true - } - }) - .collect(); - - info!(target: LOG_TARGET, "Device indexes to use: {:?} from the total number of devices: {:?}", devices_to_use, num_devices); + } + if let Some(coinbase_extra) = cli.coinbase_extra { + config.coinbase_extra = coinbase_extra; + } - println!( - "Device indexes to use: {:?} from the total number of devices: {:?}", - devices_to_use, num_devices - ); + if let Some(template_timeout) = cli.template_timeout_secs { + config.template_timeout_secs = template_timeout; + } - if let Some(max_template_failures) = cli.max_template_failures { - config.max_template_failures = max_template_failures as u64; - } - // create a list of devices (by index) to use - let devices_to_use: Vec = (0..num_devices) - .filter(|x| { - if let Some(use_devices) = &cli.use_devices { - use_devices.contains(x) - } else { - true - } - }) - .filter(|x| { - if let Some(excluded_devices) = &cli.exclude_devices { - !excluded_devices.contains(x) - } else { - true - } - }) - .collect(); + if let Some(max_template_failures) = cli.max_template_failures { + config.max_template_failures = max_template_failures as u64; + } - info!(target: LOG_TARGET, "Device indexes to use: {:?} from the total number of devices: {:?}", devices_to_use, num_devices); + let gpu_devices = gpu_status_file.gpu_devices.clone(); + gpu_devices.iter().for_each(|(device_name, gpu_device)| { println!( - "Device indexes to use: {:?} from the total number of devices: {:?}", - devices_to_use, num_devices + "Device: {} is available: {} is excluded {}", + device_name, gpu_device.settings.is_available, gpu_device.settings.is_excluded ); - - if cli.find_optimal { - let mut best_hashrate = 0; - let mut best_grid_size = 1; - let mut current_grid_size = 32; - let mut is_doubling_stage = true; - let mut last_grid_size_increase = 0; - let mut prev_hashrate = 0; - - while true { - dbg!("here"); - let mut config = config.clone(); - config.single_grid_size = current_grid_size; - // config.block_size = ; - let mut threads = vec![]; - let (tx, rx) = tokio::sync::broadcast::channel(100); - for i in 0..num_devices { - if !devices_to_use.contains(&i) { - continue; - } - let c = config.clone(); - let gpu = gpu_engine.clone(); - let x = tx.clone(); - threads.push(thread::spawn(move || { - run_thread(gpu, num_devices as u64, i as u32, c, true, x) - })); + }); + + let num_devices = multi_engine_wrapper.num_devices()?; + let devices_to_use: Vec = gpu_devices + .into_values() + .filter(|d| d.settings.is_available && !d.settings.is_excluded) + .map(|d| d.device_index) + .collect(); + + println!( + "Device indexes to use: {:?} from the total number of devices: {:?}", + devices_to_use.len(), + num_devices + ); + + if cli.find_optimal { + let mut best_hashrate = 0; + let mut best_grid_size = 1; + let mut current_grid_size = 32; + let mut is_doubling_stage = true; + let mut last_grid_size_increase = 0; + let mut prev_hashrate = 0; + + while true { + dbg!("here"); + let mut config = config.clone(); + config.single_grid_size = current_grid_size; + // config.block_size = ; + let mut threads = vec![]; + let (tx, rx) = tokio::sync::broadcast::channel(100); + for i in 0..num_devices { + if !devices_to_use.contains(&i) { + continue; } - let thread_len = threads.len(); - let mut thread_hashrate = Vec::with_capacity(thread_len); - for t in threads { - match t.join() { - Ok(res) => match res { - Ok(hashrate) => { - info!(target: LOG_TARGET, "Thread join succeeded: {}", hashrate.to_formatted_string(&Locale::en)); - thread_hashrate.push(hashrate); - }, - Err(err) => { - eprintln!("Thread join succeeded but result failed: {:?}", err); - error!(target: LOG_TARGET, "Thread join succeeded but result failed: {:?}", err); - }, + let c = config.clone(); + let gpu = multi_engine_wrapper.clone(); + let x = tx.clone(); + threads.push(thread::spawn(move || { + run_thread(gpu, num_devices as u64, i as u32, c, true, x) + })); + } + let thread_len = threads.len(); + let mut thread_hashrate = Vec::with_capacity(thread_len); + for t in threads { + match t.join() { + Ok(res) => match res { + Ok(hashrate) => { + info!(target: LOG_TARGET, "Thread join succeeded: {}", hashrate.to_formatted_string(&Locale::en)); + thread_hashrate.push(hashrate); }, Err(err) => { - eprintln!("Thread join failed: {:?}", err); - error!(target: LOG_TARGET, "Thread join failed: {:?}", err); + eprintln!("Thread join succeeded but result failed: {:?}", err); + error!(target: LOG_TARGET, "Thread join succeeded but result failed: {:?}", err); }, - } + }, + Err(err) => { + eprintln!("Thread join failed: {:?}", err); + error!(target: LOG_TARGET, "Thread join failed: {:?}", err); + }, } - let total_hashrate: u64 = thread_hashrate.iter().sum(); - if total_hashrate > best_hashrate { - best_hashrate = total_hashrate; - best_grid_size = current_grid_size; - // best_grid_size = config.single_grid_size; - // best_block_size = config.block_size; - println!( - "Best hashrate: {} grid_size: {}, current_grid: {} block_size: {} Prev Hash {}", - best_hashrate, best_grid_size, current_grid_size, config.block_size, prev_hashrate - ); + } + let total_hashrate: u64 = thread_hashrate.iter().sum(); + if total_hashrate > best_hashrate { + best_hashrate = total_hashrate; + best_grid_size = current_grid_size; + // best_grid_size = config.single_grid_size; + // best_block_size = config.block_size; + println!( + "Best hashrate: {} grid_size: {}, current_grid: {} block_size: {} Prev Hash {}", + best_hashrate, best_grid_size, current_grid_size, config.block_size, prev_hashrate + ); + } + // if total_hashrate < prev_hashrate { + // println!("total decreased, breaking"); + // break; + // } + if is_doubling_stage { + if total_hashrate > prev_hashrate { + last_grid_size_increase = current_grid_size; + current_grid_size = current_grid_size * 2; + } else { + is_doubling_stage = false; + last_grid_size_increase = last_grid_size_increase / 2; + current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase); + } + } else { + // Bisecting stage + if last_grid_size_increase < 2 { + break; } - // if total_hashrate < prev_hashrate { - // println!("total decreased, breaking"); - // break; - // } - if is_doubling_stage { - if total_hashrate > prev_hashrate { - last_grid_size_increase = current_grid_size; - current_grid_size = current_grid_size * 2; - } else { - is_doubling_stage = false; - last_grid_size_increase = last_grid_size_increase / 2; - current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase); - } + if total_hashrate > prev_hashrate { + last_grid_size_increase = last_grid_size_increase / 2; + current_grid_size += last_grid_size_increase; } else { - // Bisecting stage - if last_grid_size_increase < 2 { - break; - } - if total_hashrate > prev_hashrate { - last_grid_size_increase = last_grid_size_increase / 2; - current_grid_size += last_grid_size_increase; - } else { - last_grid_size_increase = last_grid_size_increase / 2; - current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase); - } + last_grid_size_increase = last_grid_size_increase / 2; + current_grid_size = current_grid_size.saturating_sub(last_grid_size_increase); } - prev_hashrate = total_hashrate; } - return Ok(()); + prev_hashrate = total_hashrate; } + return Ok(()); + } - let (stats_tx, stats_rx) = tokio::sync::broadcast::channel(100); - if config.http_server_enabled { - let mut stats_collector = stats_collector::StatsCollector::new(shutdown.to_signal(), stats_rx); - let stats_client = stats_collector.create_client(); - info!(target: LOG_TARGET, "Stats collector started"); - tokio::spawn(async move { - stats_collector.run().await; - info!(target: LOG_TARGET, "Stats collector shutdown"); - }); - let http_server_config = Config::new(config.http_server_port); - info!(target: LOG_TARGET, "HTTP server runs on port: {}", &http_server_config.port); - let http_server = HttpServer::new(shutdown.to_signal(), http_server_config, stats_client); - info!(target: LOG_TARGET, "HTTP server enabled"); - tokio::spawn(async move { - if let Err(error) = http_server.start().await { - println!("Failed to start HTTP server: {error:?}"); - error!(target: LOG_TARGET, "Failed to start HTTP server: {:?}", error); - } else { - info!(target: LOG_TARGET, "Success to start HTTP server"); - } - }); - } + let (stats_tx, stats_rx) = tokio::sync::broadcast::channel(100); + if config.http_server_enabled { + let mut stats_collector = stats_collector::StatsCollector::new(shutdown.to_signal(), stats_rx); + let stats_client = stats_collector.create_client(); + info!(target: LOG_TARGET, "Stats collector started"); + tokio::spawn(async move { + stats_collector.run().await; + info!(target: LOG_TARGET, "Stats collector shutdown"); + }); + let http_server_config = Config::new(config.http_server_port); + info!(target: LOG_TARGET, "HTTP server runs on port: {}", &http_server_config.port); + let http_server = HttpServer::new(shutdown.to_signal(), http_server_config, stats_client); + info!(target: LOG_TARGET, "HTTP server enabled"); + tokio::spawn(async move { + if let Err(error) = http_server.start().await { + println!("Failed to start HTTP server: {error:?}"); + error!(target: LOG_TARGET, "Failed to start HTTP server: {:?}", error); + } else { + info!(target: LOG_TARGET, "Success to start HTTP server"); + } + }); + } + + let mut threads = vec![]; + + info!(target: LOG_TARGET, "Starting template height watcher"); + + if num_devices > 0 && !benchmark { + let c = config.clone(); + let s = shutdown.to_signal(); + threads.push(thread::spawn(move || { + let runtime = Runtime::new().unwrap(); + runtime.block_on(async { run_template_height_watcher(c, s).await }) + })); + } - let mut threads = vec![]; + info!(target: LOG_TARGET, "Starting mining threads: {}", devices_to_use.len()); - if num_devices > 0 && !benchmark { + for i in 0..num_devices { + println!("Device index: {}", i); + if devices_to_use.contains(&i) { + println!("Starting thread for device index: {}", i); let c = config.clone(); - let s = shutdown.to_signal(); + let gpu = multi_engine_wrapper.clone(); + let curr_stats_tx = stats_tx.clone(); threads.push(thread::spawn(move || { - let runtime = Runtime::new().unwrap(); - runtime.block_on(async { run_template_height_watcher(c, s).await }) + run_thread(gpu, num_devices as u64, i as u32, c, benchmark, curr_stats_tx) })); } + } - for i in 0..num_devices { - println!("Device index: {}", i); - if devices_to_use.contains(&i) { - println!("Starting thread for device index: {}", i); - let c = config.clone(); - let gpu = gpu_engine.clone(); - let curr_stats_tx = stats_tx.clone(); - threads.push(thread::spawn(move || { - run_thread(gpu, num_devices as u64, i as u32, c, benchmark, curr_stats_tx) - })); - } - } - - // for t in threads { - // t.join().unwrap()?; - // } - // let mut res = Ok(()); - let thread_len = threads.len(); - let mut thread_hashrate = Vec::with_capacity(thread_len); - for t in threads { - match t.join() { - Ok(res) => match res { - Ok(hashrate) => { - info!(target: LOG_TARGET, "Thread join succeeded: {}", hashrate.to_formatted_string(&Locale::en)); - thread_hashrate.push(hashrate); - }, - Err(err) => { - error!(target: LOG_TARGET, "Thread join succeeded but result failed: {:?}", err); - }, + let thread_len = threads.len(); + let mut thread_hashrate = Vec::with_capacity(thread_len); + for t in threads { + match t.join() { + Ok(res) => match res { + Ok(hashrate) => { + info!(target: LOG_TARGET, "Thread join succeeded: {}", hashrate.to_formatted_string(&Locale::en)); + thread_hashrate.push(hashrate); }, Err(err) => { - error!(target: LOG_TARGET, "Thread join failed: {:?}", err); + error!(target: LOG_TARGET, "Thread join succeeded but result failed: {:?}", err); }, - } - } - - // kill other threads - shutdown.trigger(); - if thread_hashrate.len() == thread_len { - let total_hashrate: u64 = thread_hashrate.iter().sum(); - warn!(target: LOG_TARGET, "Total hashrate: {}", total_hashrate.to_formatted_string(&Locale::en)); - } else { - error!(target: LOG_TARGET, "Not all threads finished successfully"); + }, + Err(err) => { + error!(target: LOG_TARGET, "Thread join failed: {:?}", err); + }, } + } - Ok(()) + // kill other threads + shutdown.trigger(); + if thread_hashrate.len() == thread_len { + let total_hashrate: u64 = thread_hashrate.iter().sum(); + warn!(target: LOG_TARGET, "Total hashrate: {}", total_hashrate.to_formatted_string(&Locale::en)); + } else { + error!(target: LOG_TARGET, "Not all threads finished successfully"); } + + Ok(()) } async fn run_template_height_watcher(config: ConfigFile, shutdown: ShutdownSignal) -> Result { @@ -691,8 +646,8 @@ async fn run_template_height_watcher(config: ConfigFile, shutdown: ShutdownSigna Ok(0) } -fn run_thread( - gpu_engine: GpuEngine, +fn run_thread( + gpu_engine: MultiEngineWrapper, num_threads: u64, thread_index: u32, config: ConfigFile, @@ -718,7 +673,7 @@ fn run_thread( let context = gpu_engine.create_context(thread_index)?; - let gpu_function = gpu_engine.get_main_function(&context)?; + let gpu_function = gpu_engine.create_main_function(&context)?; // let (mut grid_size, block_size) = gpu_function // .suggested_launch_configuration() @@ -1021,7 +976,6 @@ async fn get_template_from_client( mining_hash, }) } - fn copy_u8_to_u64(input: Vec) -> Vec { let mut output: Vec = Vec::with_capacity(input.len() / 8); diff --git a/src/metal_engine.rs b/src/metal_engine.rs new file mode 100644 index 0000000..d315b93 --- /dev/null +++ b/src/metal_engine.rs @@ -0,0 +1,326 @@ +use std::mem::{size_of, transmute}; + +use log::{debug, error}; +use metal::{ + objc::rc::autoreleasepool, + Buffer, + CompileOptions, + ComputePipelineDescriptor, + ComputePipelineState, + Device, + Function, + Library, + MTLResourceOptions, + MTLResourceUsage, + MTLSize, +}; + +use crate::{ + context_impl::ContextImpl, + engine_impl::EngineImpl, + function_impl::FunctionImpl, + gpu_status_file::{GpuDevice, GpuStatus}, + multi_engine_wrapper::EngineType, +}; + +const LOG_TARGET: &str = "tari::gpuminer::metal"; +static LIBRARY_SRC: &str = include_str!("./metal_sha3.metal"); + +pub struct MetalContext { + context: Device, +} +impl MetalContext { + pub fn new(context: Device) -> Self { + MetalContext { context } + } +} +impl ContextImpl for MetalContext {} + +pub struct MetalFunction { + program: Library, +} +impl FunctionImpl for MetalFunction { + type Device = Device; + + fn suggested_launch_configuration(&self, device: &Self::Device) -> Result<(u32, u32), anyhow::Error> { + let kernel = self.program.get_function("sha3", None).unwrap_or_else(|error| { + panic!("Failed to get function sum: {:?}", error); + }); + + let pipeline_state = create_pipeline_state(device, &kernel)?; + + let grid_size = pipeline_state.max_total_threads_per_threadgroup() as u32; + let block_size = pipeline_state.thread_execution_width() as u32; + + // 896 is default block size from config.json + let block_size = block_size.max(896); + + Ok((block_size, grid_size)) + } +} + +#[derive(Clone)] +pub struct MetalEngine {} +impl MetalEngine { + pub fn new() -> Self { + MetalEngine {} + } +} + +impl EngineImpl for MetalEngine { + type Context = MetalContext; + type Function = MetalFunction; + + fn init(&mut self) -> Result<(), anyhow::Error> { + debug!(target: LOG_TARGET,"MetalEngine: Initializing"); + Ok(()) + } + + fn get_engine_type(&self) -> EngineType { + EngineType::Metal + } + + fn num_devices(&self) -> Result { + Ok(Device::all().len() as u32) + } + + fn detect_devices(&self) -> Result, anyhow::Error> { + let mut total_devices = 0; + let mut gpu_devices: Vec = vec![]; + + let all_devices = Device::all(); + + for (id, device) in all_devices.into_iter().enumerate() { + let mut gpu_device = GpuDevice { + device_name: device.name().to_string(), + device_index: id as u32, + settings: Default::default(), + status: GpuStatus { + recommended_block_size: 0, + recommended_grid_size: 0, + max_grid_size: 0, + }, + }; + + if let Ok(context) = self.create_context(gpu_device.device_index).inspect_err(|error| { + error!(target: LOG_TARGET,"Failed to create context: {:?}", error); + }) { + if let Ok(function) = self.create_main_function(&context).inspect_err(|error| { + error!(target: LOG_TARGET,"Failed to create main function: {:?}", error); + }) { + if let Ok((block_size, grid_size)) = function + .suggested_launch_configuration(&context.context) + .inspect_err(|error| { + error!(target: LOG_TARGET,"Failed to get suggested launch configuration: {:?}", error); + }) + { + gpu_device.status.recommended_block_size = block_size; + gpu_device.status.recommended_grid_size = grid_size; + gpu_device.status.max_grid_size = grid_size; + } + gpu_devices.push(gpu_device); + total_devices += 1; + } + } + } + + if total_devices > 0 { + return Ok(gpu_devices); + } + + return Err(anyhow::anyhow!("No devices found")); + } + + fn create_context(&self, device_index: u32) -> Result { + let all_devices = Device::all(); + let device = all_devices + .get(device_index as usize) + .ok_or(anyhow::anyhow!("create_context: Device not found"))?; + + Ok(MetalContext::new(device.clone())) + } + + fn create_main_function(&self, context: &Self::Context) -> Result { + let function = create_program_from_source(&context.context)?; + Ok(MetalFunction { program: function }) + } + + fn mine( + &self, + function: &Self::Function, + context: &Self::Context, + data: &[u64], + min_difficulty: u64, + nonce_start: u64, + num_iterations: u32, + block_size: u32, + grid_size: u32, + ) -> Result<(Option, u32, u64), anyhow::Error> { + autoreleasepool(|| { + let command_queue = context.context.new_command_queue(); + + let (buffer, min_difficulty_buffer, nonce_start_buffer, num_iterations_buffer, output) = + create_buffers(&context.context, data, min_difficulty, nonce_start, num_iterations); + + let command_buffer = command_queue.new_command_buffer(); + let encoder = command_buffer.new_compute_command_encoder(); + + let kernel = function + .program + .get_function("sha3", None) + .map_err(|error| anyhow::anyhow!("Failed to get function sha3: {:?}", error))?; + + let pipeline_state = create_pipeline_state(&context.context, &kernel)?; + encoder.set_compute_pipeline_state(&pipeline_state); + + debug!(target: LOG_TARGET,"Setting buffers for arguments"); + encoder.set_buffer(0, Some(&buffer), 0); + encoder.set_buffer(1, Some(&output), 0); + encoder.set_buffer(2, Some(&nonce_start_buffer), 0); + encoder.set_buffer(3, Some(&min_difficulty_buffer), 0); + encoder.set_buffer(4, Some(&num_iterations_buffer), 0); + + debug!(target: LOG_TARGET,"Describing resources"); + encoder.use_resource(&buffer, MTLResourceUsage::Read); + encoder.use_resource(&min_difficulty_buffer, MTLResourceUsage::Read); + encoder.use_resource(&nonce_start_buffer, MTLResourceUsage::Read); + encoder.use_resource(&num_iterations_buffer, MTLResourceUsage::Read); + encoder.use_resource(&output, MTLResourceUsage::Write); + encoder.memory_barrier_with_resources(&[&output]); + + let threadgroups_per_grid = MTLSize { + width: block_size as u64, + height: 1, + depth: 1, + }; + + let threads_per_thread_group = MTLSize { + width: grid_size.min(pipeline_state.max_total_threads_per_threadgroup() as u32) as u64, + height: 1, + depth: 1, + }; + + let thread_per_thread_group_combined = + threads_per_thread_group.width * threads_per_thread_group.height * threads_per_thread_group.depth; + + let threadgroups_per_grid_combined = + threadgroups_per_grid.width * threadgroups_per_grid.height * threadgroups_per_grid.depth; + + let hash_base = + (thread_per_thread_group_combined * threadgroups_per_grid_combined * num_iterations as u64) as u32; + + debug!(target: LOG_TARGET,"Threads per thread group: {:?}", threads_per_thread_group); + debug!(target: LOG_TARGET,"Threads per grid: {:?}", threadgroups_per_grid); + + encoder.dispatch_threads(threadgroups_per_grid, threads_per_thread_group); + encoder.end_encoding(); + + command_buffer.commit(); + command_buffer.wait_until_completed(); + + let ptr = output.contents() as *mut [u64; 2]; + unsafe { + let result = *ptr; + if result[0] > 0 { + return Ok((Some(result[0]), hash_base, u64::MAX / result[1])); + } else { + return Ok((None, hash_base as u32, u64::MAX / result[1])); + } + } + }) + } +} + +fn create_buffers( + context: &Device, + data: &[u64], + min_difficulty: u64, + nonce_start: u64, + num_iterations: u32, +) -> (Buffer, Buffer, Buffer, Buffer, Buffer) { + debug!(target: LOG_TARGET,"MetalEngine: Creating buffers"); + + debug!(target: LOG_TARGET,"Creating data buffer with data: {:?}", data); + let data_buffer = context.new_buffer_with_data( + unsafe { transmute(data.as_ptr()) }, + (data.len() * size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ); + + debug!(target: LOG_TARGET,"Creating min_difficulty buffer with data: {:?}", min_difficulty); + let min_difficulty_buffer = { + let min_difficulty_data = [min_difficulty]; + context.new_buffer_with_data( + unsafe { transmute(min_difficulty_data.as_ptr()) }, + (min_difficulty_data.len() * size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + + debug!(target: LOG_TARGET,"Creating nonce_start buffer with data: {:?}", nonce_start); + let nonce_start_buffer = { + let nonce_start_data = [nonce_start]; + context.new_buffer_with_data( + unsafe { transmute(nonce_start_data.as_ptr()) }, + (nonce_start_data.len() * size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + + debug!(target: LOG_TARGET,"Creating num_iterations buffer with data: {:?}", num_iterations); + let num_iterations_buffer = { + let num_iterations_data = [num_iterations]; + context.new_buffer_with_data( + unsafe { transmute(num_iterations_data.as_ptr()) }, + (num_iterations_data.len() * size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + + debug!(target: LOG_TARGET,"Creating output buffer"); + let output = { + let output_data = vec![0u64, 0u64]; + context.new_buffer_with_data( + unsafe { transmute(output_data.as_ptr()) }, + (output_data.len() * size_of::()) as u64, + MTLResourceOptions::CPUCacheModeDefaultCache, + ) + }; + + ( + data_buffer, + min_difficulty_buffer, + nonce_start_buffer, + num_iterations_buffer, + output, + ) +} + +fn create_pipeline_state(device: &Device, kernel: &Function) -> Result { + debug!(target: LOG_TARGET,"MetalEngine: Creating pipeline state"); + let pipeline_state_descriptor = ComputePipelineDescriptor::new(); + pipeline_state_descriptor.set_compute_function(Some(&kernel)); + + let compute_function = pipeline_state_descriptor + .compute_function() + .ok_or_else(|| anyhow::anyhow!("Failed to get compute function from pipeline state descriptor"))?; + + let pipeline_state = device + .new_compute_pipeline_state_with_function(compute_function) + .map_err(|error| anyhow::anyhow!("Failed to create compute pipeline state: {:?}", error))?; + + Ok(pipeline_state) +} + +fn create_program_from_source(context: &Device) -> Result { + debug!(target: LOG_TARGET,"MetalEngine: Creating program from source"); + let options = CompileOptions::new(); + + let library = context + .new_library_with_source(LIBRARY_SRC, &options) + .unwrap_or_else(|error| { + panic!("Failed to create library from source: {:?}", error); + }); + + Ok(library) +} diff --git a/src/metal_sha3.metal b/src/metal_sha3.metal new file mode 100644 index 0000000..ac595a9 --- /dev/null +++ b/src/metal_sha3.metal @@ -0,0 +1,161 @@ +#include +#include + +using namespace metal; + +constant static const ulong rot[24] = {1, 3, 6, 10, 15, 21, 28, 36, + 45, 55, 2, 14, 27, 41, 56, 8, + 25, 43, 62, 18, 39, 61, 20, 44}; + +constant static const int pos[24] = {10, 7, 11, 17, 18, 3, 5, 16, + 8, 21, 24, 4, 15, 23, 19, 13, + 12, 2, 20, 14, 22, 9, 6, 1}; + +constant static const ulong RC[] = { + 0x0000000000000001ul, 0x0000000000008082ul, 0x800000000000808aul, + 0x8000000080008000ul, 0x000000000000808bul, 0x0000000080000001ul, + 0x8000000080008081ul, 0x8000000000008009ul, 0x000000000000008aul, + 0x0000000000000088ul, 0x0000000080008009ul, 0x000000008000000aul, + 0x000000008000808bul, 0x800000000000008bul, 0x8000000000008089ul, + 0x8000000000008003ul, 0x8000000000008002ul, 0x8000000000000080ul, + 0x000000000000800aul, 0x800000008000000aul, 0x8000000080008081ul, + 0x8000000000008080ul, 0x0000000080000001ul, 0x8000000080008008ul, +}; + +kernel void sha3(device ulong *buffer [[ buffer(0) ]], + device ulong *output_1 [[ buffer(1) ]], + device ulong& nonce_start [[ buffer(2) ]], + device ulong& difficulty [[ buffer(3) ]], + device uint& num_rounds [[ buffer(4) ]], + uint gid [[ thread_position_in_grid ]], + uint threads_per_simdgroup [[ threads_per_grid ]] + ) { + + ulong state[25]; + for (uint i = 0; i < num_rounds; i++) { + for (uint j = 0; j < 25; j++) { + state[j] = 0; + } + state[0] = nonce_start + gid + i * threads_per_simdgroup; + state[1] = buffer[1]; + state[2] = buffer[2]; + state[3] = buffer[3]; + state[4] = buffer[4]; + state[5] = buffer[5]; + + state[16] ^= 0x8000000000000000ull; + + uint r, x, y, t; + ulong tmp, current, C[5]; + for (r = 0; r < 24; ++r) { + for (x = 0; x < 5; ++x) { + C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ state[x + 20]; + } + for (x = 0; x < 5; ++x) { + tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ull); + for (y = 0; y < 5; ++y) { + state[x + y * 5] ^= tmp; + } + } + current = state[1]; + for (t = 0; t < 24; ++t) { + tmp = state[pos[t]]; + state[pos[t]] = rotate(current, rot[t]); + current = tmp; + } + for (y = 0; y < 25; y += 5) { + for (x = 0; x < 5; ++x) { + C[x] = state[y + x]; + } + for (x = 0; x < 5; ++x) { + state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); + } + } + state[0] ^= RC[r]; + } + + // Re-initialize state for rounds 2 and 3 + for (uint j = 4; j < 25; j++) { + state[j] = 0; + } + state[4] = 0x06; + state[16] = 0x8000000000000000ull; + + for (r = 0; r < 24; ++r) { + for (x = 0; x < 5; ++x) { + C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ state[x + 20]; + } + for (x = 0; x < 5; ++x) { + tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ull); + for (y = 0; y < 5; ++y) { + state[x + y * 5] ^= tmp; + } + } + current = state[1]; + for (t = 0; t < 24; ++t) { + tmp = state[pos[t]]; + state[pos[t]] = rotate(current, rot[t]); + current = tmp; + } + for (y = 0; y < 25; y += 5) { + for (x = 0; x < 5; ++x) { + C[x] = state[y + x]; + } + for (x = 0; x < 5; ++x) { + state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); + } + } + state[0] ^= RC[r]; + } + + // Re-initialize state for round 3 + for (uint j = 4; j < 25; j++) { + state[j] = 0; + } + state[4] = 0x06; + state[16] = 0x8000000000000000ull; + + // Round 3 + for (r = 0; r < 24; ++r) { + for (x = 0; x < 5; ++x) { + C[x] = state[x] ^ state[x + 5] ^ state[x + 10] ^ state[x + 15] ^ state[x + 20]; + } + for (x = 0; x < 5; ++x) { + tmp = C[(x + 4) % 5] ^ rotate(C[(x + 1) % 5], 1ull); + for (y = 0; y < 5; ++y) { + state[x + y * 5] ^= tmp; + } + } + current = state[1]; + for (t = 0; t < 24; ++t) { + tmp = state[pos[t]]; + state[pos[t]] = rotate(current, rot[t]); + current = tmp; + } + for (y = 0; y < 25; y += 5) { + for (x = 0; x < 5; ++x) { + C[x] = state[y + x]; + } + for (x = 0; x < 5; ++x) { + state[x + y] = C[x] ^ (~C[(x + 1) % 5] & C[(x + 2) % 5]); + } + } + state[0] ^= RC[r]; + } + + // Check difficulty + ulong swap = reverse_bits(state[0]); + __atomic_thread_fence(memory_order::memory_order_relaxed); + threadgroup_barrier(mem_flags::mem_threadgroup); + if (swap < difficulty) { + if (output_1[1] == 0 || output_1[1] > swap) { + output_1[0] = nonce_start + gid + i * threads_per_simdgroup; + output_1[1] = swap; + } + } else { + if (output_1[1] == 0 || output_1[1] > swap) { + output_1[1] = swap; + } + } + } +} diff --git a/src/multi_engine_wrapper.rs b/src/multi_engine_wrapper.rs new file mode 100644 index 0000000..f8a7ac3 --- /dev/null +++ b/src/multi_engine_wrapper.rs @@ -0,0 +1,292 @@ +use std::{any::Any, fs::create_dir_all, path::PathBuf}; + +use log::warn; + +#[cfg(feature = "nvidia")] +use crate::cuda_engine::CudaEngine; +#[cfg(feature = "metal")] +use crate::metal_engine::MetalEngine; +#[cfg(feature = "opencl")] +use crate::opencl_engine::OpenClEngine; +use crate::{ + engine_impl::EngineImpl, + gpu_engine::GpuEngine, + gpu_status_file::{GpuDevice, GpuStatus, GpuStatusFile}, +}; + +const LOG_TARGET: &str = "tari::gpuminer::multi_engine_wrapper"; + +#[derive(Debug, PartialEq, Clone)] +pub enum EngineType { + Cuda, + OpenCL, + Metal, +} + +impl EngineType { + pub fn to_string(&self) -> String { + match self { + EngineType::Cuda => "CUDA".to_string(), + EngineType::OpenCL => "OpenCL".to_string(), + EngineType::Metal => "Metal".to_string(), + } + } + + pub fn from_string(engine_type: &str) -> Self { + match engine_type { + "CUDA" => EngineType::Cuda, + "OpenCL" => EngineType::OpenCL, + "Metal" => EngineType::Metal, + _ => panic!("Unknown engine type"), + } + } +} + +#[derive(Clone)] +pub struct MultiEngineWrapper { + selected_engine: EngineType, + #[cfg(feature = "nvidia")] + cuda_engine: GpuEngine, + #[cfg(feature = "opencl")] + opencl_engine: GpuEngine, + #[cfg(feature = "metal")] + metal_engine: GpuEngine, +} + +impl MultiEngineWrapper { + pub fn new(selected_engine: EngineType) -> Self { + Self { + selected_engine, + #[cfg(feature = "nvidia")] + cuda_engine: GpuEngine::new(CudaEngine::new()), + #[cfg(feature = "opencl")] + opencl_engine: GpuEngine::new(OpenClEngine::new()), + #[cfg(feature = "metal")] + metal_engine: GpuEngine::new(MetalEngine::new()), + } + } + + pub fn create_status_file( + &self, + destination_folder: &PathBuf, + engine_type: EngineType, + gpu_devices: Vec, + ) -> Result<(), anyhow::Error> { + let file_name = format!("{}_gpu_status.json", engine_type.to_string()); + let status_file_path = destination_folder.join(file_name); + + let status_file = GpuStatusFile::new(gpu_devices, &status_file_path); + + let _ = match GpuStatusFile::load(&status_file_path) { + Ok(_) => { + if let Err(err) = status_file.save(&status_file_path) { + warn!(target: LOG_TARGET,"Error saving gpu status: {}", err); + } + status_file + }, + Err(_) => { + if let Err(err) = create_dir_all(&status_file_path.parent().expect("no parent")) { + warn!(target: LOG_TARGET, "Error creating directory: {}", err); + } + if let Err(err) = status_file.save(&status_file_path) { + warn!(target: LOG_TARGET,"Error saving gpu status: {}", err); + } + status_file + }, + }; + + return Ok(()); + } + + pub fn create_status_files_for_each_engine(&mut self, destination_folder: PathBuf) -> Vec { + let mut engines_with_created_status_files: Vec = Vec::new(); + #[cfg(feature = "opencl")] + { + if let Err(err) = self.opencl_engine.init() { + warn!(target: LOG_TARGET, "Error initializing OpenCL engine: {}", err); + } + match self.opencl_engine.detect_devices() { + Ok(gpu_devices) => { + if let Ok(_) = self.create_status_file(&destination_folder, EngineType::OpenCL, gpu_devices) { + engines_with_created_status_files.push(EngineType::OpenCL); + } + }, + Err(err) => { + warn!(target: LOG_TARGET, "Error detecting OpenCL devices: {}", err); + }, + }; + } + + #[cfg(feature = "nvidia")] + { + if let Err(err) = self.cuda_engine.init() { + warn!(target: LOG_TARGET, "Error initializing CUDA engine: {}", err); + } + match self.cuda_engine.detect_devices() { + Ok(gpu_devices) => { + if let Ok(_) = self.create_status_file(&destination_folder, EngineType::Cuda, gpu_devices) { + engines_with_created_status_files.push(EngineType::Cuda); + } + }, + Err(err) => { + warn!(target: LOG_TARGET, "Error detecting CUDA devices: {}", err); + }, + }; + } + + #[cfg(feature = "metal")] + { + if let Err(err) = self.metal_engine.init() { + warn!(target: LOG_TARGET, "Error initializing Metal engine: {}", err); + } + match self.metal_engine.detect_devices() { + Ok(gpu_devices) => { + if let Ok(_) = self.create_status_file(&destination_folder, EngineType::Metal, gpu_devices) { + engines_with_created_status_files.push(EngineType::Metal); + } + }, + Err(err) => { + warn!(target: LOG_TARGET, "Error detecting Metal devices: {}", err); + }, + }; + } + + return engines_with_created_status_files; + } +} + +impl EngineImpl for MultiEngineWrapper { + type Context = Box; + type Function = Box; + + fn get_engine_type(&self) -> EngineType { + self.selected_engine.clone() + } + + fn init(&mut self) -> Result<(), anyhow::Error> { + match self.selected_engine { + #[cfg(feature = "nvidia")] + EngineType::Cuda => self.cuda_engine.init(), + #[cfg(feature = "opencl")] + EngineType::OpenCL => self.opencl_engine.init(), + #[cfg(feature = "metal")] + EngineType::Metal => self.metal_engine.init(), + _ => panic!("Unknown engine type"), + } + } + + fn num_devices(&self) -> Result { + match self.selected_engine { + #[cfg(feature = "nvidia")] + EngineType::Cuda => self.cuda_engine.num_devices(), + #[cfg(feature = "opencl")] + EngineType::OpenCL => self.opencl_engine.num_devices(), + #[cfg(feature = "metal")] + EngineType::Metal => self.metal_engine.num_devices(), + _ => panic!("Unknown engine type"), + } + } + + fn detect_devices(&self) -> Result, anyhow::Error> { + match self.selected_engine { + #[cfg(feature = "nvidia")] + EngineType::Cuda => self.cuda_engine.detect_devices(), + #[cfg(feature = "opencl")] + EngineType::OpenCL => self.opencl_engine.detect_devices(), + #[cfg(feature = "metal")] + EngineType::Metal => self.metal_engine.detect_devices(), + _ => panic!("Unknown engine type"), + } + } + + fn create_context(&self, device_index: u32) -> Result { + match self.selected_engine { + #[cfg(feature = "nvidia")] + EngineType::Cuda => self + .cuda_engine + .create_context(device_index) + .map(|f| Box::new(f) as Box), + #[cfg(feature = "opencl")] + EngineType::OpenCL => self + .opencl_engine + .create_context(device_index) + .map(|f| Box::new(f) as Box), + #[cfg(feature = "metal")] + EngineType::Metal => self + .metal_engine + .create_context(device_index) + .map(|f| Box::new(f) as Box), + _ => panic!("Unknown engine type"), + } + } + + fn create_main_function(&self, context: &Self::Context) -> Result { + match self.selected_engine { + #[cfg(feature = "nvidia")] + EngineType::Cuda => self + .cuda_engine + .get_main_function(context.downcast_ref().unwrap()) + .map(|f| Box::new(f) as Box), + #[cfg(feature = "opencl")] + EngineType::OpenCL => self + .opencl_engine + .get_main_function(context.downcast_ref().unwrap()) + .map(|f| Box::new(f) as Box), + #[cfg(feature = "metal")] + EngineType::Metal => self + .metal_engine + .get_main_function(context.downcast_ref().unwrap()) + .map(|f| Box::new(f) as Box), + _ => panic!("Unknown engine type"), + } + } + + fn mine( + &self, + function: &Self::Function, + context: &Self::Context, + data: &[u64], + min_difficulty: u64, + nonce_start: u64, + num_iterations: u32, + block_size: u32, + grid_size: u32, + ) -> Result<(Option, u32, u64), anyhow::Error> { + match self.selected_engine { + #[cfg(feature = "nvidia")] + EngineType::Cuda => self.cuda_engine.mine( + function.downcast_ref().unwrap(), + context.downcast_ref().unwrap(), + data, + min_difficulty, + nonce_start, + num_iterations, + block_size, + grid_size, + ), + #[cfg(feature = "opencl")] + EngineType::OpenCL => self.opencl_engine.mine( + function.downcast_ref().unwrap(), + context.downcast_ref().unwrap(), + data, + min_difficulty, + nonce_start, + num_iterations, + block_size, + grid_size, + ), + #[cfg(feature = "metal")] + EngineType::Metal => self.metal_engine.mine( + function.downcast_ref().unwrap(), + context.downcast_ref().unwrap(), + data, + min_difficulty, + nonce_start, + num_iterations, + block_size, + grid_size, + ), + _ => panic!("Unknown engine type"), + } + } +} diff --git a/src/node_client.rs b/src/node_client.rs index ecea3de..782428d 100644 --- a/src/node_client.rs +++ b/src/node_client.rs @@ -5,7 +5,6 @@ use log::{error, info, warn}; use minotari_app_grpc::tari_rpc::{ base_node_client::BaseNodeClient, pow_algo::PowAlgos, - sha_p2_pool_client::ShaP2PoolClient, Block, Empty, GetNewBlockResult, diff --git a/src/opencl_engine.rs b/src/opencl_engine.rs index a8f3a3a..5bd2439 100644 --- a/src/opencl_engine.rs +++ b/src/opencl_engine.rs @@ -22,11 +22,13 @@ use crate::{ context_impl::ContextImpl, engine_impl::EngineImpl, function_impl::FunctionImpl, - gpu_status_file::GpuStatus, + gpu_status_file::{GpuDevice, GpuSettings, GpuStatus}, + multi_engine_wrapper::EngineType, }; const LOG_TARGET: &str = "tari::gpuminer::opencl"; +#[derive(Clone)] pub struct OpenClEngineInner { platforms: Vec, } @@ -56,6 +58,10 @@ impl EngineImpl for OpenClEngine { Ok(()) } + fn get_engine_type(&self) -> EngineType { + EngineType::OpenCL + } + fn num_devices(&self) -> Result { let mut total_devices = 0; let lock = self.inner.read().unwrap(); @@ -67,9 +73,9 @@ impl EngineImpl for OpenClEngine { Ok(total_devices as u32) } - fn detect_devices(&self) -> Result, anyhow::Error> { + fn detect_devices(&self) -> Result, anyhow::Error> { let mut total_devices = 0; - let mut gpu_devices: Vec = vec![]; + let mut gpu_devices: Vec = vec![]; let lock = self.inner.read().unwrap(); let platforms = lock.platforms.clone(); drop(lock); @@ -84,13 +90,15 @@ impl EngineImpl for OpenClEngine { let name = dev.name().unwrap_or_default() as String; debug!(target: LOG_TARGET, "Device index {:?}: {}", total_devices, &name); println!("device: {}", &name); - let mut gpu = GpuStatus { + let mut gpu = GpuDevice { device_name: name, - device_index: id as u32, - is_available: true, - max_grid_size: dev.max_work_group_size().unwrap_or_default() as u32, - grid_size: 0, - block_size: 0, + device_index: total_devices as u32, + settings: GpuSettings::default(), + status: GpuStatus { + max_grid_size: dev.max_work_group_size().unwrap_or_default() as u32, + recommended_grid_size: 0, + recommended_block_size: 0, + }, }; if let Ok(context) = self .create_context(u32::try_from(id).unwrap()) @@ -101,8 +109,8 @@ impl EngineImpl for OpenClEngine { .inspect_err(|e| error!(target: LOG_TARGET, "Could not create function {:?}", e)) { if let Ok((grid, block)) = func.suggested_launch_configuration(&dev) { - gpu.grid_size = grid; - gpu.block_size = block; + gpu.status.recommended_grid_size = grid; + gpu.status.recommended_block_size = block; } gpu_devices.push(gpu); total_devices += 1; diff --git a/src/tari_coinbase.rs b/src/tari_coinbase.rs index f7588bc..3d28e38 100644 --- a/src/tari_coinbase.rs +++ b/src/tari_coinbase.rs @@ -1,6 +1,7 @@ use rand::rngs::OsRng; use tari_common_types::tari_address::TariAddress; use tari_core::{ + self, consensus::ConsensusConstants, one_sided::{ diffie_hellman_stealth_domain_hasher, @@ -23,8 +24,6 @@ use tari_core::{ CoinbaseBuilder, }, }; -use tari_crypto::keys::PublicKey as PK; -use tari_key_manager::key_manager_service::KeyManagerInterface; pub async fn generate_coinbase( fee: MicroMinotari,