31aa85733e
Brings AMD cards to parity with NVIDIA for monitoring/control surface, which was NVML-only. New src/amd_smi.rs is a gpu_tune::GpuTuner backed by Linux amdgpu sysfs (power1_average, temp1_input edge, freq1_input sclk, pp_dpm_sclk/mclk), matched to the device by PCI bus id from OpenCL cl_khr_pci_bus_info. gpu_tune is un-gated to compile under the gpu feature; open() probes NVML then amd_smi. GpuSolver carries the tuner and Backend::Gpu dispatches power/temp/clocks, so the TUI and --benchmark now show power, temperature, clocks and Sol/W for AMD. Telemetry-only — setters are Unsupported (amdgpu control nodes are root-only). --target-temp <C> adds an opt-in software governor (miner::govern_cadence) that paces solve cadence to hold edge temperature, no hardware writes/root. With small thermal throttle it won't beat flat-out on raw Sol/s; it's a temp/efficiency lever. Unit-tested controller; flag/plumbing verified live. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1231 lines
48 KiB
Rust
1231 lines
48 KiB
Rust
//! jackpotminer — a GPU-accelerated Equihash 192,7 miner for ZClassic and other
|
||
//! coins using the same proof-of-work.
|
||
|
||
mod blake;
|
||
mod control;
|
||
mod controls;
|
||
mod cpu_groups;
|
||
mod equihash;
|
||
mod miner;
|
||
mod params;
|
||
mod stratum;
|
||
mod tui;
|
||
|
||
#[cfg(feature = "gpu")]
|
||
mod gpu;
|
||
|
||
// AMD-tuned OpenCL kernel driver (selected by GpuSolver for AMD-vendor devices).
|
||
#[cfg(feature = "gpu")]
|
||
mod gpu_amd;
|
||
|
||
// AMD GPU telemetry via Linux amdgpu sysfs (a `gpu_tune::GpuTuner` backend).
|
||
#[cfg(feature = "gpu")]
|
||
mod amd_smi;
|
||
|
||
// Runtime dynamic-library loader (dlopen) for the CUDA driver + NVML.
|
||
#[cfg(feature = "cuda")]
|
||
mod dylib;
|
||
|
||
#[cfg(feature = "cuda")]
|
||
mod cuda;
|
||
|
||
#[cfg(feature = "cuda")]
|
||
mod nvml;
|
||
|
||
// Platform-agnostic GPU tuning/telemetry surface. The trait + policy compile for
|
||
// either GPU backend; NVML (cuda) and amd_smi (gpu) are the implementations.
|
||
#[cfg(any(feature = "cuda", feature = "gpu"))]
|
||
mod gpu_tune;
|
||
|
||
use std::io::IsTerminal;
|
||
use std::sync::atomic::{AtomicBool, Ordering};
|
||
use std::sync::Arc;
|
||
|
||
use anyhow::{anyhow, Context, Result};
|
||
use clap::{CommandFactory, FromArgMatches, Parser};
|
||
use log::{info, warn};
|
||
|
||
use crate::miner::BackendSpec;
|
||
use crate::stratum::StratumClient;
|
||
|
||
/// Pool used when neither `--url` nor a config file specifies one.
|
||
const DEFAULT_POOL_URL: &str = "stratum+tcp://zcl.jackpot.tools:3333";
|
||
|
||
/// Command-line options.
|
||
#[derive(Parser, Debug)]
|
||
#[command(name = "jackpotminer", version, about = "equihash 192,7 miner")]
|
||
struct Args {
|
||
/// Load options from a TOML config file. Values in the file are applied
|
||
/// unless the same option is also given on the command line (CLI overrides
|
||
/// the file overrides defaults). Keys mirror the long flag names without
|
||
/// "--"; see mine.example.toml.
|
||
#[arg(long, value_name = "FILE")]
|
||
config: Option<String>,
|
||
|
||
/// Pool URL, e.g. stratum+tcp://zcl.pool.example:3032. Defaults to
|
||
/// stratum+tcp://zcl.jackpot.tools:3333 when unset (here and in the config).
|
||
#[arg(long)]
|
||
url: Option<String>,
|
||
|
||
/// Pool port, used when --url has no ":port" (e.g. --url pool.example --port 3032).
|
||
#[arg(long)]
|
||
port: Option<u16>,
|
||
|
||
/// Worker / wallet login (e.g. address.worker).
|
||
#[arg(short = 'u', long = "user", default_value = "")]
|
||
user: String,
|
||
|
||
/// Worker password / pool mode selector. Set to "no-jackpot" for PPLNS;
|
||
/// any other value (the default) mines the jackpot. (--solo and --jackpot
|
||
/// take precedence.)
|
||
#[arg(short, long, default_value = "jackpot")]
|
||
pass: String,
|
||
|
||
/// Use PPLNS: set the pool password to "no-jackpot" (overrides --pass;
|
||
/// opts out of jackpot mining).
|
||
#[arg(long)]
|
||
no_jackpot: bool,
|
||
|
||
/// Set the pool password to "solo" (overrides --pass; for solo mining on
|
||
/// pools that use this convention).
|
||
#[arg(long, conflicts_with = "no_jackpot")]
|
||
solo: bool,
|
||
|
||
/// Jackpot participation as a whole percent, 3 (3%) to 100 (100%); sets the
|
||
/// pool password to "jackpot.<percent>" (e.g. jackpot.50), overriding --pass.
|
||
#[arg(long, value_name = "PERCENT", value_parser = clap::value_parser!(u32).range(3..=100), conflicts_with_all = ["no_jackpot", "solo"])]
|
||
jackpot: Option<u32>,
|
||
|
||
/// Pause mining if no new job arrives within this many seconds (stale work
|
||
/// guard); resumes automatically when fresh work arrives. Default 600 (10
|
||
/// minutes). 0 disables.
|
||
#[arg(long, value_name = "SECS", default_value_t = 600)]
|
||
job_timeout: u64,
|
||
|
||
/// Open a local control server on 127.0.0.1:<PORT> so the GUI config tool can
|
||
/// retrieve and adjust live settings (device enable, clocks/power, CPU group
|
||
/// size/rows) on the fly. Off by default; localhost-only, no auth.
|
||
#[arg(long, value_name = "PORT")]
|
||
control_port: Option<u16>,
|
||
|
||
/// CPU threads for the solver (defaults to all cores).
|
||
#[arg(short, long)]
|
||
threads: Option<usize>,
|
||
|
||
/// Force the CPU hashing backend even when GPU support is compiled in.
|
||
#[arg(long)]
|
||
cpu: bool,
|
||
|
||
/// Enable CPU mining at startup: the CPU mining rows (below the device table)
|
||
/// begin enabled and mine alongside the selected backend. Off by default;
|
||
/// rows can also be toggled live in the dashboard with Backspace. Note: each
|
||
/// enabled row runs one full solve (~4 GB RAM) across its cores.
|
||
#[arg(long)]
|
||
cpu_mining: bool,
|
||
|
||
/// Which logical CPU cores to use for CPU mining, e.g. "0-7", "0,2,4,6", or
|
||
/// "0-3,8-11" (default: "all"). Cores are grouped into toggleable rows of
|
||
/// --cpu-group-size (each row runs one solve with its threads pinned to its
|
||
/// cores). Combine with --cpu-mining to start immediately (e.g. headless:
|
||
/// --cpu-mining --cpu-cores 0-7).
|
||
#[arg(long, value_name = "SPEC")]
|
||
cpu_cores: Option<String>,
|
||
|
||
/// Cores per CPU mining row. Each row runs one shared solve across its
|
||
/// cores; larger groups cut memory sharply: total RAM is ~4 GB × (enabled
|
||
/// cores / this size). Rows align to core-index blocks of this size. Capped
|
||
/// by core count so the row count stays manageable — ≤4 cores toggle
|
||
/// individually (1), 5-8 cores in groups of ≤2, more than 8 in groups of ≤4
|
||
/// — and the default is that cap. Cycle it live (within the cap) with 'g'.
|
||
#[arg(long, value_name = "N", default_value_t = 4)]
|
||
cpu_group_size: usize,
|
||
|
||
/// CPU solver bucket clamp: cap each exact-collision group at N entries.
|
||
/// This bounds the naive Wagner algorithm's degenerate-collision blow-up
|
||
/// (the same bound the GPU enforces via fixed bucket slots) and is required
|
||
/// for the CPU solver to terminate on dense headers. Default 32 finds the
|
||
/// ~2 real solutions per nonce; lower is faster but may drop solutions
|
||
/// (≤8 drops real ones). `--cpu-clamp 0` runs the exact, unclamped solver
|
||
/// (WARNING: can consume tens of GB and OOM on dense headers).
|
||
#[arg(long, value_name = "N", default_value_t = 32)]
|
||
cpu_clamp: usize,
|
||
|
||
/// OpenCL device index for single-device modes (benchmark, gpu-debug).
|
||
#[arg(short, long, default_value_t = 0)]
|
||
device: usize,
|
||
|
||
/// GPU devices to mine on: comma-separated indices (e.g. "0,1") or "all".
|
||
/// Defaults to all detected devices.
|
||
#[arg(long, default_value = "all")]
|
||
devices: String,
|
||
|
||
/// GPU backend: "mixed" (default — each card on its native backend: NVIDIA
|
||
/// on CUDA, AMD/Intel on OpenCL), "opencl" (every card via OpenCL), or
|
||
/// "cuda" (NVIDIA only). In mixed mode `--devices` indexes the combined list
|
||
/// shown by --list-devices.
|
||
#[arg(long, default_value = "mixed")]
|
||
backend: String,
|
||
|
||
/// Force the OpenCL backend, disabling CUDA (overrides --backend).
|
||
#[arg(long)]
|
||
force_opencl: bool,
|
||
|
||
/// List available OpenCL devices and exit.
|
||
#[arg(long)]
|
||
list_devices: bool,
|
||
|
||
/// Print detected OpenCL/CUDA devices as JSON and exit. Used by the GUI
|
||
/// config tool to populate card-specific options.
|
||
#[arg(long)]
|
||
devices_json: bool,
|
||
|
||
/// Run internal correctness self-tests and exit.
|
||
#[arg(long)]
|
||
selftest: bool,
|
||
|
||
/// Run only the GPU solver on a fixed header with diagnostics, and exit.
|
||
#[arg(long)]
|
||
gpu_debug: bool,
|
||
|
||
/// Benchmark the selected backend over N solves (no pool) and exit.
|
||
#[arg(long, value_name = "N")]
|
||
benchmark: Option<usize>,
|
||
|
||
/// Disable the live dashboard and use periodic log lines instead. (The
|
||
/// dashboard is on by default when mining in a terminal.)
|
||
#[arg(long)]
|
||
no_tui: bool,
|
||
|
||
/// Don't put GPUs into maximum-performance mode (skip CUDA clock/power tuning).
|
||
#[arg(long)]
|
||
no_gpu_tune: bool,
|
||
|
||
/// Allow the TUI's live hardware-control keys (core/mem offset, TDP) to
|
||
/// change clocks/power. Locked by default so stray key presses can't retune.
|
||
#[arg(long)]
|
||
unlock_controls: bool,
|
||
|
||
/// Auto-tune each GPU at startup: sweep the core clock offset up to find the
|
||
/// fastest stable solve rate (overclock-for-speed; needs root). Takes ~30 s.
|
||
#[arg(long)]
|
||
auto_tune: bool,
|
||
|
||
/// Sustained-Sol/s governor: hold each GPU at/below this edge temperature (°C)
|
||
/// by pacing the solve cadence (no hardware writes, no root). Trades a little
|
||
/// throughput for lower temp/power; off by default (runs flat-out). Needs a
|
||
/// backend that reports temperature (AMD amdgpu / NVIDIA).
|
||
#[arg(long, value_name = "CELSIUS")]
|
||
target_temp: Option<u32>,
|
||
|
||
/// Efficiency: cap each GPU's power limit in watts (default: card max).
|
||
/// Lower power trades a little hashrate for much better Sol/W.
|
||
#[arg(long, value_name = "WATTS")]
|
||
power_limit: Option<u32>,
|
||
|
||
/// Efficiency: lock each GPU's core/SM clock in MHz (default: card max).
|
||
#[arg(long, value_name = "MHZ")]
|
||
gpu_clock: Option<u32>,
|
||
|
||
/// Efficiency: lock each GPU's memory clock in MHz (default: card max).
|
||
#[arg(long, value_name = "MHZ")]
|
||
mem_clock: Option<u32>,
|
||
|
||
/// Core clock V/F offset in MHz (LACT-style, e.g. 200 or -150). Combine with
|
||
/// --power-limit for undervolt-style efficiency. Signed.
|
||
#[arg(long, value_name = "MHZ", allow_hyphen_values = true)]
|
||
gpu_clock_offset: Option<i32>,
|
||
|
||
/// Memory clock V/F offset in MHz (LACT-style, signed).
|
||
#[arg(long, value_name = "MHZ", allow_hyphen_values = true)]
|
||
mem_clock_offset: Option<i32>,
|
||
}
|
||
|
||
/// The options loadable from a `--config` TOML file. Every field is optional; a
|
||
/// present value is applied to [`Args`] unless that option was also given on the
|
||
/// command line. Keys are the kebab-case long flag names (e.g. `cpu-group-size`).
|
||
#[derive(Default, serde::Deserialize)]
|
||
#[serde(default, deny_unknown_fields, rename_all = "kebab-case")]
|
||
struct FileConfig {
|
||
url: Option<String>,
|
||
port: Option<u16>,
|
||
user: Option<String>,
|
||
pass: Option<String>,
|
||
no_jackpot: Option<bool>,
|
||
solo: Option<bool>,
|
||
jackpot: Option<u32>,
|
||
job_timeout: Option<u64>,
|
||
control_port: Option<u16>,
|
||
threads: Option<usize>,
|
||
cpu: Option<bool>,
|
||
cpu_mining: Option<bool>,
|
||
cpu_cores: Option<String>,
|
||
cpu_group_size: Option<usize>,
|
||
cpu_clamp: Option<usize>,
|
||
device: Option<usize>,
|
||
devices: Option<String>,
|
||
backend: Option<String>,
|
||
force_opencl: Option<bool>,
|
||
no_tui: Option<bool>,
|
||
no_gpu_tune: Option<bool>,
|
||
unlock_controls: Option<bool>,
|
||
auto_tune: Option<bool>,
|
||
power_limit: Option<u32>,
|
||
gpu_clock: Option<u32>,
|
||
mem_clock: Option<u32>,
|
||
gpu_clock_offset: Option<i32>,
|
||
mem_clock_offset: Option<i32>,
|
||
/// Per-device GPU tuning overrides (`[[gpu]]` tables); config-file only.
|
||
#[serde(default)]
|
||
gpu: Vec<GpuDeviceCfg>,
|
||
}
|
||
|
||
/// One `[[gpu]]` config table: per-device backend selection plus tuning that
|
||
/// overrides the global tuning flags for that device index (tuning is CUDA/NVML
|
||
/// only). `backend` ("cuda" or "opencl") lets individual cards run on a
|
||
/// different backend than the global `--backend`; when unset the card uses the
|
||
/// global default.
|
||
#[derive(Default, serde::Deserialize)]
|
||
#[serde(default, deny_unknown_fields, rename_all = "kebab-case")]
|
||
struct GpuDeviceCfg {
|
||
index: usize,
|
||
backend: Option<String>,
|
||
power_limit: Option<u32>,
|
||
gpu_clock: Option<u32>,
|
||
mem_clock: Option<u32>,
|
||
gpu_clock_offset: Option<i32>,
|
||
mem_clock_offset: Option<i32>,
|
||
}
|
||
|
||
/// Read `--config` (if given) and fold its values into `args`: a file value is
|
||
/// applied only when that option was *not* passed explicitly on the command line
|
||
/// (so the CLI always wins). `matches` is used to tell explicit flags from
|
||
/// defaults.
|
||
fn apply_config(args: &mut Args, matches: &clap::ArgMatches) -> Result<Vec<GpuDeviceCfg>> {
|
||
let Some(path) = args.config.clone() else {
|
||
return Ok(Vec::new());
|
||
};
|
||
let text = std::fs::read_to_string(&path).with_context(|| format!("reading config file '{path}'"))?;
|
||
let file: FileConfig = toml::from_str(&text).with_context(|| format!("parsing config file '{path}'"))?;
|
||
|
||
let explicit = |name: &str| matches.value_source(name) == Some(clap::parser::ValueSource::CommandLine);
|
||
// Scalar/bool options: take the file value (unwrapped) when not on the CLI.
|
||
macro_rules! merge {
|
||
($($f:ident),* $(,)?) => {$(
|
||
if !explicit(stringify!($f)) {
|
||
if let Some(v) = file.$f { args.$f = v; }
|
||
}
|
||
)*};
|
||
}
|
||
// Optional options: copy the file's Option directly when not on the CLI.
|
||
macro_rules! merge_opt {
|
||
($($f:ident),* $(,)?) => {$(
|
||
if !explicit(stringify!($f)) && file.$f.is_some() {
|
||
args.$f = file.$f;
|
||
}
|
||
)*};
|
||
}
|
||
|
||
merge!(
|
||
user, pass, no_jackpot, solo, job_timeout, cpu, cpu_mining, cpu_group_size, cpu_clamp,
|
||
device, devices, backend, force_opencl, no_tui, no_gpu_tune, unlock_controls, auto_tune,
|
||
);
|
||
merge_opt!(url, port, jackpot, control_port, threads, cpu_cores, power_limit, gpu_clock, mem_clock, gpu_clock_offset, mem_clock_offset);
|
||
Ok(file.gpu)
|
||
}
|
||
|
||
/// Look for a default `mine.toml` (next to the binary, then the working dir), so
|
||
/// a double-clicked binary with a config beside it just works.
|
||
fn find_default_config() -> Option<String> {
|
||
let mut cands: Vec<std::path::PathBuf> = Vec::new();
|
||
if let Ok(exe) = std::env::current_exe() {
|
||
if let Some(dir) = exe.parent() {
|
||
cands.push(dir.join("mine.toml"));
|
||
}
|
||
}
|
||
cands.push(std::path::PathBuf::from("mine.toml"));
|
||
cands.into_iter().find(|p| p.exists()).map(|p| p.to_string_lossy().into_owned())
|
||
}
|
||
|
||
/// When launched from a GUI (no controlling terminal) for an interactive mining
|
||
/// run, relaunch ourselves inside a terminal emulator so the dashboard is
|
||
/// visible. Returns true if a window was launched (the caller should then exit).
|
||
/// Falls through (returns false) on headless hosts / when no terminal is found,
|
||
/// and is disabled by `JACKPOTMINER_NO_WINDOW=1`.
|
||
fn relaunch_in_terminal(args: &Args) -> bool {
|
||
use std::io::IsTerminal;
|
||
// Skip when already interactive, told to go headless, already relaunched, or
|
||
// explicitly disabled. Require *no* tty on either std stream (the GUI case),
|
||
// so piping output from a real shell doesn't spawn a window.
|
||
if std::io::stdout().is_terminal()
|
||
|| std::io::stdin().is_terminal()
|
||
|| args.no_tui
|
||
|| std::env::var_os("JACKPOTMINER_IN_TERMINAL").is_some()
|
||
|| std::env::var_os("JACKPOTMINER_NO_WINDOW").is_some()
|
||
{
|
||
return false;
|
||
}
|
||
// One-shot/diagnostic modes don't need a window, and there's nothing to mine
|
||
// without a pool.
|
||
if args.list_devices || args.devices_json || args.selftest || args.gpu_debug || args.benchmark.is_some() || args.url.is_none() {
|
||
return false;
|
||
}
|
||
let exe = match std::env::current_exe() {
|
||
Ok(e) => e,
|
||
Err(_) => return false,
|
||
};
|
||
let fwd: Vec<String> = std::env::args().skip(1).collect();
|
||
|
||
// (program, args before the command). The command (exe + forwarded args) is
|
||
// appended. Try $TERMINAL first, then common emulators.
|
||
let mut tries: Vec<(String, Vec<&str>)> = Vec::new();
|
||
let term_env = std::env::var("TERMINAL").ok();
|
||
if let Some(t) = &term_env {
|
||
tries.push((t.clone(), vec!["-e"]));
|
||
}
|
||
for (p, pre) in [
|
||
("x-terminal-emulator", vec!["-e"]),
|
||
("gnome-terminal", vec!["--"]),
|
||
("konsole", vec!["-e"]),
|
||
("xfce4-terminal", vec!["-x"]),
|
||
("alacritty", vec!["-e"]),
|
||
("wezterm", vec!["start", "--"]),
|
||
("kitty", vec![]),
|
||
("foot", vec![]),
|
||
("ghostty", vec!["-e"]),
|
||
("xterm", vec!["-e"]),
|
||
] {
|
||
tries.push((p.to_string(), pre));
|
||
}
|
||
|
||
for (prog, pre) in tries {
|
||
let ok = std::process::Command::new(&prog)
|
||
.args(&pre)
|
||
.arg(&exe)
|
||
.args(&fwd)
|
||
.env("JACKPOTMINER_IN_TERMINAL", "1")
|
||
.spawn()
|
||
.is_ok();
|
||
if ok {
|
||
return true;
|
||
}
|
||
}
|
||
false
|
||
}
|
||
|
||
fn main() -> Result<()> {
|
||
// Parse the CLI, but keep the matches so a `--config` file can fill in any
|
||
// option that wasn't passed explicitly.
|
||
let matches = Args::command().get_matches();
|
||
let mut args = Args::from_arg_matches(&matches).expect("clap matches convert to Args");
|
||
// Double-click convenience: with nothing specified, pick up a mine.toml.
|
||
if args.config.is_none()
|
||
&& args.url.is_none()
|
||
&& !args.selftest
|
||
&& !args.list_devices
|
||
&& !args.devices_json
|
||
&& !args.gpu_debug
|
||
&& args.benchmark.is_none()
|
||
{
|
||
args.config = find_default_config();
|
||
}
|
||
let gpu_devices = apply_config(&mut args, &matches)?;
|
||
// If started from a GUI, reopen in a terminal window so the dashboard shows.
|
||
if relaunch_in_terminal(&args) {
|
||
return Ok(());
|
||
}
|
||
|
||
// The dashboard is on by default, but only for the actual mining run (not for
|
||
// one-shot modes like --selftest/--benchmark) and only on a real terminal.
|
||
// Anything else falls back to ordinary log output.
|
||
let mining_mode =
|
||
!args.list_devices && !args.selftest && !args.gpu_debug && args.benchmark.is_none();
|
||
let tui = !args.no_tui && mining_mode && std::io::stdout().is_terminal();
|
||
if tui {
|
||
// Capture logs into the dashboard's pane instead of the screen.
|
||
tui::install_logger();
|
||
} else {
|
||
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("info")).init();
|
||
if !args.no_tui && mining_mode {
|
||
info!("no terminal detected — using log output (the dashboard needs a TTY)");
|
||
}
|
||
}
|
||
|
||
// Install the GPU tuning policy before any solver is built. Each CUDA solver
|
||
// applies it to its own card and restores defaults when dropped (covers the
|
||
// Ctrl-C shutdown path, since workers drop their solvers on exit).
|
||
#[cfg(feature = "cuda")]
|
||
{
|
||
gpu_tune::configure(gpu_tune::TuneConfig {
|
||
enabled: !args.no_gpu_tune,
|
||
power_limit_w: args.power_limit,
|
||
gpu_clock_mhz: args.gpu_clock,
|
||
mem_clock_mhz: args.mem_clock,
|
||
gpu_offset_mhz: args.gpu_clock_offset,
|
||
mem_offset_mhz: args.mem_clock_offset,
|
||
auto_tune: args.auto_tune,
|
||
unlock_controls: args.unlock_controls,
|
||
});
|
||
// Per-device overrides from `[[gpu]]` config tables.
|
||
gpu_tune::configure_devices(
|
||
gpu_devices
|
||
.iter()
|
||
.map(|g| {
|
||
(
|
||
g.index,
|
||
gpu_tune::DeviceTune {
|
||
power_limit_w: g.power_limit,
|
||
gpu_clock_mhz: g.gpu_clock,
|
||
mem_clock_mhz: g.mem_clock,
|
||
gpu_offset_mhz: g.gpu_clock_offset,
|
||
mem_offset_mhz: g.mem_clock_offset,
|
||
},
|
||
)
|
||
})
|
||
.collect(),
|
||
);
|
||
}
|
||
|
||
if args.list_devices {
|
||
list_devices();
|
||
return Ok(());
|
||
}
|
||
|
||
if args.devices_json {
|
||
println!("{}", devices_json());
|
||
return Ok(());
|
||
}
|
||
|
||
if let Some(n) = args.threads {
|
||
rayon::ThreadPoolBuilder::new()
|
||
.num_threads(n)
|
||
.build_global()
|
||
.ok();
|
||
}
|
||
|
||
if args.selftest {
|
||
return selftest(args.device);
|
||
}
|
||
|
||
#[cfg(feature = "cuda")]
|
||
if args.gpu_debug && args.backend.eq_ignore_ascii_case("cuda") {
|
||
let solver = cuda::CudaSolver::new(args.device)?;
|
||
let mut warm = vec![0x42u8; params::HEADER_LEN];
|
||
cuda_compatible(&mut warm); // CUDA needs header[128..135] == 0
|
||
solver.solve(&warm)?; // warm up
|
||
info!("CUDA per-stage timing (warm):");
|
||
solver.profile(&warm)?;
|
||
let runs = 16u32;
|
||
let start = std::time::Instant::now();
|
||
let mut valid = 0usize;
|
||
for nonce in 0..runs {
|
||
let mut header = vec![0x42u8; params::HEADER_LEN];
|
||
header[108..112].copy_from_slice(&nonce.to_le_bytes());
|
||
cuda_compatible(&mut header);
|
||
valid += solver.solve(&header)?.len();
|
||
}
|
||
let dt = start.elapsed().as_secs_f64();
|
||
info!("CUDA: {:.0} ms/solve, {:.2} valid/solve", dt * 1000.0 / runs as f64, valid as f64 / runs as f64);
|
||
return Ok(());
|
||
}
|
||
|
||
#[cfg(feature = "gpu")]
|
||
if args.gpu_debug {
|
||
let solver = gpu::GpuSolver::new(args.device)?;
|
||
info!("per-stage timing:");
|
||
solver.profile(&vec![0x42u8; params::HEADER_LEN])?;
|
||
let runs = 32u32;
|
||
let mut max_raw = 0usize;
|
||
let mut total = std::time::Duration::ZERO;
|
||
for nonce in 0..runs {
|
||
// Vary the nonce region of an otherwise fixed header.
|
||
let mut header = vec![0x42u8; params::HEADER_LEN];
|
||
header[108..112].copy_from_slice(&nonce.to_le_bytes());
|
||
let t = std::time::Instant::now();
|
||
let (raw, sols) = solver.solve_with_stats(&header)?;
|
||
let dt = t.elapsed();
|
||
total += dt;
|
||
max_raw = max_raw.max(raw);
|
||
info!(
|
||
"nonce {nonce:2}: raw_candidates={raw:6}, valid={}, {:.0} ms",
|
||
sols.len(),
|
||
dt.as_secs_f64() * 1000.0
|
||
);
|
||
}
|
||
info!(
|
||
"summary: {runs} solves, avg {:.0} ms/solve, max raw candidates={max_raw} (MAX_SOLS cap is plenty)",
|
||
total.as_secs_f64() * 1000.0 / runs as f64
|
||
);
|
||
return Ok(());
|
||
}
|
||
|
||
if let Some(runs) = args.benchmark {
|
||
let specs = backend_specs(&args, &gpu_devices)?;
|
||
return benchmark(specs, runs.max(1));
|
||
}
|
||
|
||
// Pool URL defaults to the jackpot.tools ZCL pool when not given on the CLI
|
||
// or in a config file.
|
||
let url = args.url.as_deref().unwrap_or(DEFAULT_POOL_URL);
|
||
let (host, port) = parse_url(url, args.port)?;
|
||
|
||
// Password = pool mode. --solo / --jackpot <pct> take precedence; otherwise
|
||
// PPLNS only when explicitly requested (--no-jackpot or `-p no-jackpot`),
|
||
// and jackpot for anything else.
|
||
let pass = if args.solo {
|
||
"solo".to_string()
|
||
} else if let Some(pct) = args.jackpot {
|
||
format!("jackpot.{pct}")
|
||
} else if args.no_jackpot || args.pass == "no-jackpot" {
|
||
"no-jackpot".to_string()
|
||
} else {
|
||
"jackpot".to_string()
|
||
};
|
||
info!("connecting to {host}:{port} as '{}'", args.user);
|
||
let client = Arc::new(StratumClient::connect(&host, port, &args.user, &pass)?);
|
||
|
||
#[allow(unused_mut)]
|
||
let mut specs = backend_specs(&args, &gpu_devices)?;
|
||
// CPU mining via OpenCL: if requested and an OpenCL CPU device (e.g. PoCL) is
|
||
// available, run the CPU solve through the OpenCL backend on that device
|
||
// (one worker) instead of the native AVX2 per-core groups.
|
||
#[cfg(feature = "gpu")]
|
||
let cpu_opencl = args.cpu_mining.then(gpu::cpu_device_index).flatten();
|
||
#[cfg(not(feature = "gpu"))]
|
||
let cpu_opencl: Option<usize> = None;
|
||
#[cfg(feature = "gpu")]
|
||
if let Some(idx) = cpu_opencl {
|
||
info!("CPU mining via OpenCL device {idx} (CPU); native AVX2 CPU groups stay off");
|
||
specs.push(BackendSpec::Gpu(idx));
|
||
}
|
||
info!("launching {} worker(s)", specs.len());
|
||
|
||
let running = Arc::new(AtomicBool::new(true));
|
||
{
|
||
let r = running.clone();
|
||
ctrlc::set_handler(move || {
|
||
info!("interrupt received, shutting down...");
|
||
r.store(false, Ordering::Relaxed);
|
||
})
|
||
.context("failed to install Ctrl-C handler")?;
|
||
}
|
||
|
||
let job_timeout = (args.job_timeout > 0).then(|| std::time::Duration::from_secs(args.job_timeout));
|
||
// CPU mining as toggleable rows of --cpu-group-size cores (over the cores
|
||
// selected by --cpu-cores, default all), shown below the device table and
|
||
// mined alongside the backend. Starts enabled only with --cpu-mining.
|
||
let logical_cpus = num_cpus::get();
|
||
let cpu_cores = match args.cpu_cores.as_deref() {
|
||
Some(spec) => parse_core_spec(spec, logical_cpus)?,
|
||
None => (0..logical_cpus).collect(),
|
||
};
|
||
// AVX2 per-core groups start enabled only when CPU mining is on AND we're not
|
||
// already running CPU work through OpenCL.
|
||
let cpu_mining = cpu_groups::CpuMining::new(cpu_cores, args.cpu_group_size, args.cpu_mining && cpu_opencl.is_none());
|
||
// 0 selects the exact (unclamped) CPU solver; any other value clamps.
|
||
let cpu_clamp = (args.cpu_clamp != 0).then_some(args.cpu_clamp);
|
||
// Shared per-GPU live hardware controls (adjusted from the TUI, applied by
|
||
// each device's worker); the selection also spans the CPU-group rows.
|
||
let controls = controls::Controls::new(
|
||
specs.len(),
|
||
cpu_mining.groups().len(),
|
||
args.gpu_clock_offset.unwrap_or(0),
|
||
args.mem_clock_offset.unwrap_or(0),
|
||
args.power_limit.unwrap_or(0),
|
||
args.unlock_controls,
|
||
);
|
||
// Software temp governor target (paces solve cadence; no hardware writes).
|
||
miner::set_target_temp(args.target_temp);
|
||
miner::run(client, specs, running, job_timeout, tui, format!("{host}:{port}"), controls, cpu_mining, cpu_clamp, args.control_port)
|
||
}
|
||
|
||
/// Which GPU backend the user selected.
|
||
enum BackendKind {
|
||
Cpu,
|
||
/// Each physical card on its native backend (NVIDIA→CUDA, others→OpenCL).
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
Mixed,
|
||
#[cfg(feature = "gpu")]
|
||
OpenCl,
|
||
#[cfg(feature = "cuda")]
|
||
Cuda,
|
||
}
|
||
|
||
/// Resolve the backend kind from `--cpu` / `--backend` and compiled features.
|
||
fn backend_kind(args: &Args) -> Result<BackendKind> {
|
||
if args.cpu {
|
||
return Ok(BackendKind::Cpu);
|
||
}
|
||
// --force-opencl disables CUDA regardless of --backend.
|
||
if args.force_opencl {
|
||
#[cfg(feature = "gpu")]
|
||
{
|
||
return Ok(BackendKind::OpenCl);
|
||
}
|
||
#[cfg(not(feature = "gpu"))]
|
||
{
|
||
return Err(anyhow!("--force-opencl needs the OpenCL backend compiled in (build with --features gpu)"));
|
||
}
|
||
}
|
||
match args.backend.to_ascii_lowercase().as_str() {
|
||
"mixed" => {
|
||
// Each card on its native backend; falls back to whatever single GPU
|
||
// backend is compiled, or to CPU when none is.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
{
|
||
Ok(BackendKind::Mixed)
|
||
}
|
||
#[cfg(not(any(feature = "gpu", feature = "cuda")))]
|
||
Ok(BackendKind::Cpu)
|
||
}
|
||
"cuda" => {
|
||
#[cfg(feature = "cuda")]
|
||
{
|
||
Ok(BackendKind::Cuda)
|
||
}
|
||
#[cfg(not(feature = "cuda"))]
|
||
Err(anyhow!("CUDA backend not compiled in (build with --features cuda)"))
|
||
}
|
||
"opencl" | "" => {
|
||
#[cfg(feature = "gpu")]
|
||
{
|
||
Ok(BackendKind::OpenCl)
|
||
}
|
||
#[cfg(not(feature = "gpu"))]
|
||
Ok(BackendKind::Cpu)
|
||
}
|
||
other => Err(anyhow!("unknown --backend '{other}' (expected mixed, opencl, or cuda)")),
|
||
}
|
||
}
|
||
|
||
/// Parse a `--cpu-cores` spec into a sorted, de-duplicated list of logical core
|
||
/// indices. Accepts "all", single indices, and inclusive ranges, comma-joined:
|
||
/// "0-7", "0,2,4,6", "0-3,8,10-11". Every index must be `< available`.
|
||
fn parse_core_spec(spec: &str, available: usize) -> Result<Vec<usize>> {
|
||
let spec = spec.trim();
|
||
if spec.eq_ignore_ascii_case("all") {
|
||
return Ok((0..available).collect());
|
||
}
|
||
let mut set = std::collections::BTreeSet::new();
|
||
for part in spec.split(',') {
|
||
let part = part.trim();
|
||
if part.is_empty() {
|
||
continue;
|
||
}
|
||
match part.split_once('-') {
|
||
Some((a, b)) => {
|
||
let a: usize = a.trim().parse().map_err(|_| anyhow!("bad core range '{part}'"))?;
|
||
let b: usize = b.trim().parse().map_err(|_| anyhow!("bad core range '{part}'"))?;
|
||
if a > b {
|
||
return Err(anyhow!("core range '{part}' is reversed"));
|
||
}
|
||
set.extend(a..=b);
|
||
}
|
||
None => {
|
||
set.insert(part.parse::<usize>().map_err(|_| anyhow!("bad core index '{part}'"))?);
|
||
}
|
||
}
|
||
}
|
||
let max = *set.iter().next_back().ok_or_else(|| anyhow!("--cpu-cores selected no cores"))?;
|
||
if max >= available {
|
||
return Err(anyhow!(
|
||
"--cpu-cores includes core {max}, but only {available} logical CPUs are available (0..={})",
|
||
available - 1
|
||
));
|
||
}
|
||
Ok(set.into_iter().collect())
|
||
}
|
||
|
||
/// Determine the solver workers to launch from the CLI flags. `gpu_devices` are
|
||
/// the `[[gpu]]` config tables; a card whose table sets `backend` runs on that
|
||
/// backend instead of the global `--backend` default, so a single run can mix
|
||
/// CUDA and OpenCL cards.
|
||
#[allow(unused_variables)]
|
||
fn backend_specs(args: &Args, gpu_devices: &[GpuDeviceCfg]) -> Result<Vec<BackendSpec>> {
|
||
let default = backend_kind(args)?;
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
{
|
||
// The default backend fixes the device enumeration `--devices` indexes
|
||
// into; per-card overrides then flip individual cards.
|
||
let (available, default_cuda) = match default {
|
||
BackendKind::Cpu => {
|
||
let clamp = (args.cpu_clamp != 0).then_some(args.cpu_clamp);
|
||
return Ok(vec![BackendSpec::Cpu(clamp)]);
|
||
}
|
||
// Mixed builds its own unified list (each card on its native backend).
|
||
BackendKind::Mixed => return mixed_specs(args),
|
||
#[cfg(feature = "cuda")]
|
||
BackendKind::Cuda => (cuda::device_count()?, true),
|
||
#[cfg(feature = "gpu")]
|
||
BackendKind::OpenCl => (gpu::list_devices()?.len(), false),
|
||
};
|
||
let devices = parse_devices(&args.devices, available)?;
|
||
let mut specs = Vec::with_capacity(devices.len());
|
||
for idx in devices {
|
||
let cuda = match gpu_devices.iter().find(|g| g.index == idx).and_then(|g| g.backend.as_deref()) {
|
||
Some(b) if b.eq_ignore_ascii_case("cuda") => true,
|
||
Some(b) if b.eq_ignore_ascii_case("opencl") => false,
|
||
Some(other) => {
|
||
return Err(anyhow!("device {idx}: unknown backend '{other}' (expected cuda or opencl)"))
|
||
}
|
||
None => default_cuda,
|
||
};
|
||
specs.push(gpu_spec(idx, cuda)?);
|
||
}
|
||
Ok(specs)
|
||
}
|
||
#[cfg(not(any(feature = "gpu", feature = "cuda")))]
|
||
{
|
||
// 0 selects the exact (unclamped) solver; any other value clamps.
|
||
let clamp = (args.cpu_clamp != 0).then_some(args.cpu_clamp);
|
||
Ok(vec![BackendSpec::Cpu(clamp)])
|
||
}
|
||
}
|
||
|
||
/// The unified device list for the `mixed` backend, as `(label, spec)`: each
|
||
/// physical GPU on its native backend, with no card mined twice. NVIDIA cards go
|
||
/// to CUDA (listed first); the remaining OpenCL devices (AMD/Intel, plus NVIDIA
|
||
/// when CUDA is unavailable) go to OpenCL. Shared by [`mixed_specs`] and
|
||
/// [`list_devices`]; `--devices` indexes into this list.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
fn mixed_plan() -> Vec<(String, BackendSpec)> {
|
||
/// Drop a leading `"[<n>] "` index prefix from a backend's device label, so
|
||
/// the mixed list shows its own single index instead of two.
|
||
fn strip_index(label: &str) -> &str {
|
||
label
|
||
.strip_prefix('[')
|
||
.and_then(|s| s.split_once("] "))
|
||
.map(|(_, rest)| rest)
|
||
.unwrap_or(label)
|
||
}
|
||
|
||
#[allow(unused_mut)]
|
||
let mut plan: Vec<(String, BackendSpec)> = Vec::new();
|
||
|
||
// NVIDIA cards via CUDA, when the backend is compiled and the driver loads.
|
||
#[cfg(feature = "cuda")]
|
||
let cuda_has_nvidia = {
|
||
let names = cuda::list_devices().unwrap_or_default();
|
||
for (i, label) in names.iter().enumerate() {
|
||
plan.push((format!("{} (CUDA)", strip_index(label)), BackendSpec::Cuda(i)));
|
||
}
|
||
!names.is_empty()
|
||
};
|
||
#[cfg(not(feature = "cuda"))]
|
||
let cuda_has_nvidia = false;
|
||
|
||
// Remaining OpenCL cards via OpenCL; skip NVIDIA ones already on CUDA.
|
||
#[cfg(feature = "gpu")]
|
||
{
|
||
let names = gpu::list_devices().unwrap_or_default();
|
||
let nvidia = gpu::device_is_nvidia();
|
||
for (j, label) in names.iter().enumerate() {
|
||
if nvidia.get(j).copied().unwrap_or(false) && cuda_has_nvidia {
|
||
continue;
|
||
}
|
||
plan.push((format!("{} (OpenCL)", strip_index(label)), BackendSpec::Gpu(j)));
|
||
}
|
||
}
|
||
// `cuda_has_nvidia` is only consumed by the OpenCL branch above.
|
||
#[cfg(not(feature = "gpu"))]
|
||
let _ = cuda_has_nvidia;
|
||
|
||
plan
|
||
}
|
||
|
||
/// Build the worker list for `--backend mixed`: each card on its native backend.
|
||
/// `--devices` selects into [`mixed_plan`]'s unified list.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
fn mixed_specs(args: &Args) -> Result<Vec<BackendSpec>> {
|
||
let plan = mixed_plan();
|
||
if plan.is_empty() {
|
||
return Err(anyhow!(
|
||
"no GPUs found for the mixed backend — none detected via CUDA or OpenCL"
|
||
));
|
||
}
|
||
let selected = parse_devices(&args.devices, plan.len())?;
|
||
Ok(selected.into_iter().map(|i| plan[i].1).collect())
|
||
}
|
||
|
||
/// Build a single GPU worker spec for `idx`, choosing CUDA or OpenCL, erroring if
|
||
/// the requested backend wasn't compiled in.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
fn gpu_spec(idx: usize, cuda: bool) -> Result<BackendSpec> {
|
||
if cuda {
|
||
#[cfg(feature = "cuda")]
|
||
{
|
||
Ok(BackendSpec::Cuda(idx))
|
||
}
|
||
#[cfg(not(feature = "cuda"))]
|
||
{
|
||
Err(anyhow!("device {idx} requests the CUDA backend, but it isn't compiled in (build with --features cuda)"))
|
||
}
|
||
} else {
|
||
#[cfg(feature = "gpu")]
|
||
{
|
||
Ok(BackendSpec::Gpu(idx))
|
||
}
|
||
#[cfg(not(feature = "gpu"))]
|
||
{
|
||
Err(anyhow!("device {idx} requests the OpenCL backend, but it isn't compiled in (build with --features gpu)"))
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Parse a `--devices` value: "all" (use `available`) or a comma-separated list.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
fn parse_devices(spec: &str, available: usize) -> Result<Vec<usize>> {
|
||
if spec.trim().eq_ignore_ascii_case("all") {
|
||
return Ok((0..available).collect());
|
||
}
|
||
let devices: Vec<usize> = spec
|
||
.split(',')
|
||
.map(|s| s.trim().parse::<usize>().map_err(|_| anyhow!("bad device index '{s}'")))
|
||
.collect::<Result<_>>()?;
|
||
if devices.is_empty() {
|
||
return Err(anyhow!("no devices selected"));
|
||
}
|
||
Ok(devices)
|
||
}
|
||
|
||
|
||
/// Build a JSON description of the detected devices for the GUI config tool:
|
||
/// `{"opencl":["<name>",...],"cuda":["<name>",...]}`. A backend not compiled in,
|
||
/// or with no runtime/devices, yields an empty list (so the tool can warn).
|
||
fn devices_json() -> String {
|
||
#[cfg(feature = "gpu")]
|
||
let opencl: Vec<String> = gpu::list_devices().unwrap_or_default();
|
||
#[cfg(not(feature = "gpu"))]
|
||
let opencl: Vec<String> = Vec::new();
|
||
#[cfg(feature = "cuda")]
|
||
let cuda: Vec<String> = cuda::list_devices().unwrap_or_default();
|
||
#[cfg(not(feature = "cuda"))]
|
||
let cuda: Vec<String> = Vec::new();
|
||
// Flat OpenCL index of a CPU device (e.g. PoCL), used for CPU-via-OpenCL.
|
||
#[cfg(feature = "gpu")]
|
||
let opencl_cpu: Option<usize> = gpu::cpu_device_index();
|
||
#[cfg(not(feature = "gpu"))]
|
||
let opencl_cpu: Option<usize> = None;
|
||
serde_json::json!({ "opencl": opencl, "cuda": cuda, "opencl_cpu_index": opencl_cpu }).to_string()
|
||
}
|
||
|
||
/// Print the GPU devices visible to each compiled backend.
|
||
fn list_devices() {
|
||
#[cfg(feature = "gpu")]
|
||
match gpu::list_devices() {
|
||
Ok(devs) if !devs.is_empty() => {
|
||
println!("OpenCL devices (--backend opencl):");
|
||
for d in devs {
|
||
println!(" {d}");
|
||
}
|
||
}
|
||
Ok(_) => println!("no OpenCL devices found"),
|
||
Err(e) => println!("error listing OpenCL devices: {e}"),
|
||
}
|
||
#[cfg(feature = "cuda")]
|
||
match cuda::list_devices() {
|
||
Ok(devs) if !devs.is_empty() => {
|
||
println!("CUDA devices (--backend cuda):");
|
||
for d in devs {
|
||
println!(" {d}");
|
||
}
|
||
}
|
||
Ok(_) => println!("no CUDA devices found"),
|
||
Err(e) => println!("error listing CUDA devices: {e}"),
|
||
}
|
||
// What the default `mixed` backend will mine, and the indices `--devices`
|
||
// selects from in that mode.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
{
|
||
let plan = mixed_plan();
|
||
if !plan.is_empty() {
|
||
println!("\nMixed backend (--backend mixed, the default) — `--devices` indexes this list:");
|
||
for (i, (label, _)) in plan.iter().enumerate() {
|
||
println!(" [{i}] {label}");
|
||
}
|
||
}
|
||
}
|
||
#[cfg(not(any(feature = "gpu", feature = "cuda")))]
|
||
println!("built without GPU support (rebuild with the `gpu` or `cuda` feature)");
|
||
}
|
||
|
||
/// Parse `stratum+tcp://host:port`, `tcp://host:port`, or `host:port`. When the
|
||
/// URL omits `:port`, fall back to `default_port` (from `--port`).
|
||
fn parse_url(url: &str, default_port: Option<u16>) -> Result<(String, u16)> {
|
||
let trimmed = url
|
||
.strip_prefix("stratum+tcp://")
|
||
.or_else(|| url.strip_prefix("tcp://"))
|
||
.unwrap_or(url);
|
||
match trimmed.rsplit_once(':') {
|
||
Some((host, port)) => {
|
||
let port: u16 = port.parse().with_context(|| format!("bad port in {url}"))?;
|
||
Ok((host.to_string(), port))
|
||
}
|
||
None => {
|
||
let port = default_port.ok_or_else(|| {
|
||
anyhow!("URL '{url}' has no port; include one (host:port) or pass --port")
|
||
})?;
|
||
Ok((trimmed.to_string(), port))
|
||
}
|
||
}
|
||
}
|
||
|
||
/// Run quick correctness checks: encode/decode round-trip and an end-to-end
|
||
/// solve-then-verify against a fixed header.
|
||
fn selftest(gpu_device: usize) -> Result<()> {
|
||
info!("running self-tests (this performs one full solve and may take a while)...");
|
||
|
||
// A deterministic header full of a fixed byte pattern, with the nonce tail
|
||
// zeroed so the CUDA backend (which assumes header[128..135] == 0) is
|
||
// actually exercised by the GPU/CUDA comparison below.
|
||
let mut header = vec![0x42u8; params::HEADER_LEN];
|
||
cuda_compatible(&mut header);
|
||
let base = blake::base_state(&header);
|
||
|
||
// Use the clamped solver (clamp 32, matching the GPU's fixed bucket slots):
|
||
// the unclamped path explodes on dense 192,7 headers, and the GPU it is
|
||
// compared against also bounds its buckets.
|
||
let solutions = equihash::solve_with(&header, Some(32));
|
||
info!("CPU found {} solution(s) for the test header", solutions.len());
|
||
|
||
for (i, sol) in solutions.iter().enumerate() {
|
||
let ok = equihash::is_valid_solution(&base, sol);
|
||
let packed = equihash::indices_to_solution(sol);
|
||
let back = equihash::solution_to_indices(&packed);
|
||
let roundtrip = &back == sol;
|
||
info!(
|
||
" CPU solution {i}: valid={ok}, encode_roundtrip={roundtrip}, bytes={}",
|
||
packed.len()
|
||
);
|
||
if !ok || !roundtrip {
|
||
return Err(anyhow!("self-test failed on solution {i}"));
|
||
}
|
||
}
|
||
|
||
// Validate the GPU solver against the CPU: it must produce only valid
|
||
// solutions, all of which are a subset of the CPU's exhaustive set.
|
||
#[cfg(feature = "gpu")]
|
||
{
|
||
info!("initialising GPU solver for comparison (OpenCL device {gpu_device})...");
|
||
let solver = gpu::GpuSolver::new(gpu_device)
|
||
.with_context(|| format!("init OpenCL device {gpu_device}"))?;
|
||
|
||
// Spot-check the BLAKE2b kernel against the CPU reference. The AMD kernel
|
||
// buckets its round-0 output instead of exposing per-index digests, so
|
||
// the probe is skipped there (the solve-vs-CPU check below still runs).
|
||
if solver.supports_blake_probe() {
|
||
let outputs = solver.hash_all(&header)?;
|
||
let step = params::BLAKE_CALLS / 64;
|
||
for k in 0..64 {
|
||
let g = (k * step) as u32;
|
||
let cpu = blake::generate_hash(&base, g);
|
||
let off = g as usize * params::HASH_OUTPUT;
|
||
if cpu != outputs[off..off + params::HASH_OUTPUT] {
|
||
return Err(anyhow!("GPU BLAKE2b mismatch at g={g}"));
|
||
}
|
||
}
|
||
info!("GPU BLAKE2b kernel matches CPU");
|
||
} else {
|
||
info!("skipping BLAKE2b kernel probe (AMD kernel buckets round-0 output)");
|
||
}
|
||
|
||
let gpu_solutions = solver.solve(&header)?;
|
||
info!("GPU found {} valid solution(s)", gpu_solutions.len());
|
||
|
||
let cpu_set: std::collections::HashSet<Vec<u32>> =
|
||
solutions.iter().map(|s| sorted(s)).collect();
|
||
for sol in &gpu_solutions {
|
||
if !equihash::is_valid_solution(&base, sol) {
|
||
return Err(anyhow!("GPU returned an invalid solution"));
|
||
}
|
||
if !cpu_set.contains(&sorted(sol)) {
|
||
return Err(anyhow!("GPU solution not found by the CPU reference"));
|
||
}
|
||
}
|
||
info!(
|
||
"GPU solver verified: {}/{} of the CPU solutions recovered",
|
||
gpu_solutions.len(),
|
||
solutions.len()
|
||
);
|
||
}
|
||
|
||
// Validate the CUDA solver the same way (subset of the CPU's solutions).
|
||
#[cfg(feature = "cuda")]
|
||
{
|
||
info!("initialising CUDA solver for comparison...");
|
||
let solver = cuda::CudaSolver::new(0).context("init CUDA device 0")?;
|
||
let cuda_solutions = solver.solve(&header)?;
|
||
info!("CUDA found {} valid solution(s)", cuda_solutions.len());
|
||
|
||
let cpu_set: std::collections::HashSet<Vec<u32>> =
|
||
solutions.iter().map(|s| sorted(s)).collect();
|
||
for sol in &cuda_solutions {
|
||
if !equihash::is_valid_solution(&base, sol) {
|
||
return Err(anyhow!("CUDA returned an invalid solution"));
|
||
}
|
||
if !cpu_set.contains(&sorted(sol)) {
|
||
return Err(anyhow!("CUDA solution not found by the CPU reference"));
|
||
}
|
||
}
|
||
info!(
|
||
"CUDA solver verified: {}/{} of the CPU solutions recovered",
|
||
cuda_solutions.len(),
|
||
solutions.len()
|
||
);
|
||
}
|
||
|
||
info!("self-tests passed");
|
||
Ok(())
|
||
}
|
||
|
||
/// Sorted copy of an index list, for set comparison.
|
||
#[cfg(any(feature = "gpu", feature = "cuda"))]
|
||
fn sorted(v: &[u32]) -> Vec<u32> {
|
||
let mut s = v.to_vec();
|
||
s.sort_unstable();
|
||
s
|
||
}
|
||
|
||
/// Benchmark the configured backends concurrently (one thread each), reporting
|
||
/// per-worker and aggregate throughput. With multiple GPUs this measures real
|
||
/// concurrent multi-device performance.
|
||
fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
|
||
use std::time::Instant;
|
||
info!("benchmarking {runs} solve(s) per worker across {} worker(s)", specs.len());
|
||
|
||
/// Per-worker benchmark result, including a steady-state telemetry snapshot
|
||
/// (sampled right after the timed loop, while the card is warm).
|
||
struct WorkerResult {
|
||
sols: usize,
|
||
dt: f64,
|
||
watts: Option<f64>,
|
||
temp_c: Option<u32>,
|
||
core_mhz: Option<u32>,
|
||
mem_mhz: Option<u32>,
|
||
}
|
||
|
||
let start = Instant::now();
|
||
let mut handles = Vec::new();
|
||
for (id, spec) in specs.into_iter().enumerate() {
|
||
handles.push(std::thread::spawn(move || -> Result<WorkerResult> {
|
||
let backend = spec.build()?;
|
||
backend.solve(&pseudo_header(id as u64))?; // warm up (excluded)
|
||
let t = Instant::now();
|
||
let mut sols = 0usize;
|
||
for i in 0..runs {
|
||
// Distinct nonce space per worker.
|
||
let seed = ((id as u64) << 40) | (i as u64 + 1);
|
||
sols += backend.solve(&pseudo_header(seed))?.len();
|
||
}
|
||
let dt = t.elapsed().as_secs_f64();
|
||
// Snapshot telemetry while the card is still under load.
|
||
let (core_mhz, mem_mhz) = backend.current_clocks_mhz();
|
||
Ok(WorkerResult {
|
||
sols,
|
||
dt,
|
||
watts: backend.power_watts(),
|
||
temp_c: backend.temperature_c(),
|
||
core_mhz,
|
||
mem_mhz,
|
||
})
|
||
}));
|
||
}
|
||
|
||
// Aggregate by summing per-worker steady-state rates (excludes warm-up).
|
||
let mut agg_sols = 0.0;
|
||
let mut workers = 0usize;
|
||
for h in handles {
|
||
match h.join().unwrap() {
|
||
Ok(r) => {
|
||
let sol_s = r.sols as f64 / r.dt;
|
||
// Optional telemetry tail: " | 142 W, 41.7 Sol/W, 68°C, 2700/2500 MHz".
|
||
let mut tail = String::new();
|
||
if let Some(w) = r.watts {
|
||
tail.push_str(&format!(" | {w:.0} W"));
|
||
if w > 0.0 {
|
||
tail.push_str(&format!(", {:.2} Sol/W", sol_s / w));
|
||
}
|
||
}
|
||
if let Some(t) = r.temp_c {
|
||
tail.push_str(&format!(", {t}°C"));
|
||
}
|
||
if let (Some(c), m) = (r.core_mhz, r.mem_mhz) {
|
||
tail.push_str(&format!(", {c}/{} MHz", m.map(|m| m.to_string()).unwrap_or_else(|| "?".into())));
|
||
}
|
||
info!(
|
||
" worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {} solutions{tail}",
|
||
1000.0 * r.dt / runs as f64,
|
||
r.sols
|
||
);
|
||
agg_sols += sol_s;
|
||
workers += 1;
|
||
}
|
||
Err(e) => warn!(" worker failed: {e}"),
|
||
}
|
||
}
|
||
|
||
let _ = start;
|
||
info!("aggregate: {agg_sols:.1} Sol/s across {workers} worker(s)");
|
||
Ok(())
|
||
}
|
||
|
||
/// A deterministic pseudo-random 140-byte header for benchmarking.
|
||
fn pseudo_header(seed: u64) -> Vec<u8> {
|
||
let mut header = vec![0u8; params::HEADER_LEN];
|
||
let mut x = seed.wrapping_mul(0x9E3779B97F4A7C15).wrapping_add(1);
|
||
for b in header.iter_mut() {
|
||
// xorshift64*
|
||
x ^= x >> 12;
|
||
x ^= x << 25;
|
||
x ^= x >> 27;
|
||
*b = (x.wrapping_mul(0x2545F4914F6CDD1D) >> 33) as u8;
|
||
}
|
||
cuda_compatible(&mut header);
|
||
header
|
||
}
|
||
|
||
/// Zero header bytes [128..135] (nonce[20..27]). The CUDA fatbin replay injects
|
||
/// only the midstate over header[0..128] and the 4 tail bytes [136..139]; it
|
||
/// hard-codes [128..135] = 0, so a header with those bytes set makes the CUDA
|
||
/// backend find nothing. Real pool nonces keep them zero (nonce = nonce1 ||
|
||
/// counter || zeros); synthetic test headers must do the same to exercise CUDA.
|
||
fn cuda_compatible(header: &mut [u8]) {
|
||
for b in &mut header[128..136] {
|
||
*b = 0;
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use super::{parse_core_spec, parse_url};
|
||
|
||
#[test]
|
||
fn core_spec_parsing() {
|
||
// "all" expands to every core.
|
||
assert_eq!(parse_core_spec("all", 4).unwrap(), vec![0, 1, 2, 3]);
|
||
// Ranges, lists, and a mix; result is sorted + de-duplicated.
|
||
assert_eq!(parse_core_spec("0-3", 8).unwrap(), vec![0, 1, 2, 3]);
|
||
assert_eq!(parse_core_spec("0,2,4,6", 8).unwrap(), vec![0, 2, 4, 6]);
|
||
assert_eq!(parse_core_spec("0-2,8,10-11", 12).unwrap(), vec![0, 1, 2, 8, 10, 11]);
|
||
assert_eq!(parse_core_spec("3, 3 , 1-2", 8).unwrap(), vec![1, 2, 3]);
|
||
// Out-of-range, reversed, empty, and garbage all error.
|
||
assert!(parse_core_spec("0-8", 8).is_err()); // core 8 >= 8
|
||
assert!(parse_core_spec("5-1", 8).is_err());
|
||
assert!(parse_core_spec("", 8).is_err());
|
||
assert!(parse_core_spec("x", 8).is_err());
|
||
}
|
||
|
||
#[test]
|
||
fn url_port_parsing() {
|
||
// Explicit port in the URL is used as-is (scheme stripped).
|
||
assert_eq!(parse_url("stratum+tcp://pool.example:3032", None).unwrap(), ("pool.example".to_string(), 3032));
|
||
// URL port wins over --port when both are present.
|
||
assert_eq!(parse_url("tcp://1.2.3.4:1234", Some(9999)).unwrap(), ("1.2.3.4".to_string(), 1234));
|
||
// No port in URL -> fall back to --port.
|
||
assert_eq!(parse_url("pool.example", Some(3032)).unwrap(), ("pool.example".to_string(), 3032));
|
||
// No port and no --port -> error.
|
||
assert!(parse_url("pool.example", None).is_err());
|
||
// A colon with a non-numeric suffix is still an error.
|
||
assert!(parse_url("host:notaport", Some(3032)).is_err());
|
||
}
|
||
}
|