AMD GPU telemetry + --target-temp governor

Brings AMD cards to parity with NVIDIA for monitoring/control surface, which was NVML-only. New src/amd_smi.rs is a gpu_tune::GpuTuner backed by Linux amdgpu sysfs (power1_average, temp1_input edge, freq1_input sclk, pp_dpm_sclk/mclk), matched to the device by PCI bus id from OpenCL cl_khr_pci_bus_info. gpu_tune is un-gated to compile under the gpu feature; open() probes NVML then amd_smi. GpuSolver carries the tuner and Backend::Gpu dispatches power/temp/clocks, so the TUI and --benchmark now show power, temperature, clocks and Sol/W for AMD. Telemetry-only — setters are Unsupported (amdgpu control nodes are root-only). --target-temp <C> adds an opt-in software governor (miner::govern_cadence) that paces solve cadence to hold edge temperature, no hardware writes/root. With small thermal throttle it won't beat flat-out on raw Sol/s; it's a temp/efficiency lever. Unit-tested controller; flag/plumbing verified live. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-06 20:17:59 -04:00
parent 0002e90451
commit 31aa85733e
5 changed files with 440 additions and 19 deletions
@@ -18,6 +18,10 @@ mod gpu;
 #[cfg(feature = "gpu")]
 mod gpu_amd;

+// AMD GPU telemetry via Linux amdgpu sysfs (a `gpu_tune::GpuTuner` backend).
+#[cfg(feature = "gpu")]
+mod amd_smi;
+
 // Runtime dynamic-library loader (dlopen) for the CUDA driver + NVML.
 #[cfg(feature = "cuda")]
 mod dylib;
@@ -28,7 +32,9 @@ mod cuda;
 #[cfg(feature = "cuda")]
 mod nvml;

-#[cfg(feature = "cuda")]
+// Platform-agnostic GPU tuning/telemetry surface. The trait + policy compile for
+// either GPU backend; NVML (cuda) and amd_smi (gpu) are the implementations.
+#[cfg(any(feature = "cuda", feature = "gpu"))]
 mod gpu_tune;

 use std::io::IsTerminal;
@@ -204,6 +210,13 @@ struct Args {
    #[arg(long)]
    auto_tune: bool,

+    /// Sustained-Sol/s governor: hold each GPU at/below this edge temperature (°C)
+    /// by pacing the solve cadence (no hardware writes, no root). Trades a little
+    /// throughput for lower temp/power; off by default (runs flat-out). Needs a
+    /// backend that reports temperature (AMD amdgpu / NVIDIA).
+    #[arg(long, value_name = "CELSIUS")]
+    target_temp: Option<u32>,
+
    /// Efficiency: cap each GPU's power limit in watts (default: card max).
    /// Lower power trades a little hashrate for much better Sol/W.
    #[arg(long, value_name = "WATTS")]
@@ -620,6 +633,8 @@ fn main() -> Result<()> {
        args.power_limit.unwrap_or(0),
        args.unlock_controls,
    );
+    // Software temp governor target (paces solve cadence; no hardware writes).
+    miner::set_target_temp(args.target_temp);
    miner::run(client, specs, running, job_timeout, tui, format!("{host}:{port}"), controls, cpu_mining, cpu_clamp, args.control_port)
 }

@@ -1077,10 +1092,21 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
    use std::time::Instant;
    info!("benchmarking {runs} solve(s) per worker across {} worker(s)", specs.len());

+    /// Per-worker benchmark result, including a steady-state telemetry snapshot
+    /// (sampled right after the timed loop, while the card is warm).
+    struct WorkerResult {
+        sols: usize,
+        dt: f64,
+        watts: Option<f64>,
+        temp_c: Option<u32>,
+        core_mhz: Option<u32>,
+        mem_mhz: Option<u32>,
+    }
+
    let start = Instant::now();
    let mut handles = Vec::new();
    for (id, spec) in specs.into_iter().enumerate() {
-        handles.push(std::thread::spawn(move || -> Result<(usize, f64)> {
+        handles.push(std::thread::spawn(move || -> Result<WorkerResult> {
            let backend = spec.build()?;
            backend.solve(&pseudo_header(id as u64))?; // warm up (excluded)
            let t = Instant::now();
@@ -1090,7 +1116,17 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
                let seed = ((id as u64) << 40) | (i as u64 + 1);
                sols += backend.solve(&pseudo_header(seed))?.len();
            }
-            Ok((sols, t.elapsed().as_secs_f64()))
+            let dt = t.elapsed().as_secs_f64();
+            // Snapshot telemetry while the card is still under load.
+            let (core_mhz, mem_mhz) = backend.current_clocks_mhz();
+            Ok(WorkerResult {
+                sols,
+                dt,
+                watts: backend.power_watts(),
+                temp_c: backend.temperature_c(),
+                core_mhz,
+                mem_mhz,
+            })
        }));
    }

@@ -1099,11 +1135,26 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
    let mut workers = 0usize;
    for h in handles {
        match h.join().unwrap() {
-            Ok((sols, dt)) => {
-                let sol_s = sols as f64 / dt;
+            Ok(r) => {
+                let sol_s = r.sols as f64 / r.dt;
+                // Optional telemetry tail: " | 142 W, 41.7 Sol/W, 68°C, 2700/2500 MHz".
+                let mut tail = String::new();
+                if let Some(w) = r.watts {
+                    tail.push_str(&format!(" | {w:.0} W"));
+                    if w > 0.0 {
+                        tail.push_str(&format!(", {:.2} Sol/W", sol_s / w));
+                    }
+                }
+                if let Some(t) = r.temp_c {
+                    tail.push_str(&format!(", {t}°C"));
+                }
+                if let (Some(c), m) = (r.core_mhz, r.mem_mhz) {
+                    tail.push_str(&format!(", {c}/{} MHz", m.map(|m| m.to_string()).unwrap_or_else(|| "?".into())));
+                }
                info!(
-                    "  worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {sols} solutions",
-                    1000.0 * dt / runs as f64
+                    "  worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {} solutions{tail}",
+                    1000.0 * r.dt / runs as f64,
+                    r.sols
                );
                agg_sols += sol_s;
                workers += 1;