AMD GPU telemetry + --target-temp governor

Brings AMD cards to parity with NVIDIA for monitoring/control surface, which was
NVML-only. New src/amd_smi.rs is a gpu_tune::GpuTuner backed by Linux amdgpu
sysfs (power1_average, temp1_input edge, freq1_input sclk, pp_dpm_sclk/mclk),
matched to the device by PCI bus id from OpenCL cl_khr_pci_bus_info. gpu_tune is
un-gated to compile under the gpu feature; open() probes NVML then amd_smi.
GpuSolver carries the tuner and Backend::Gpu dispatches power/temp/clocks, so the
TUI and --benchmark now show power, temperature, clocks and Sol/W for AMD.
Telemetry-only — setters are Unsupported (amdgpu control nodes are root-only).

--target-temp <C> adds an opt-in software governor (miner::govern_cadence) that
paces solve cadence to hold edge temperature, no hardware writes/root. With small
thermal throttle it won't beat flat-out on raw Sol/s; it's a temp/efficiency
lever. Unit-tested controller; flag/plumbing verified live.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
jackpotincorporated
2026-06-06 20:17:59 -04:00
parent 0002e90451
commit 31aa85733e
5 changed files with 440 additions and 19 deletions
+204
View File
@@ -0,0 +1,204 @@
//! AMD GPU telemetry for [`crate::gpu_tune::GpuTuner`] via the Linux amdgpu
//! sysfs interface (`/sys/class/drm/cardN/device/...`).
//!
//! Telemetry only: board power, edge temperature, and core/memory clocks come
//! from the world-readable hwmon + DPM nodes, so it works unprivileged. The
//! control nodes (`pp_od_clk_voltage`, power cap, performance level) are
//! root-write-only and carry GPU-hang risk, so every setter returns
//! [`SetOutcome::Unsupported`] — this backend never writes. The handle is matched
//! to the physical card by PCI bus id (e.g. from OpenCL's `cl_khr_pci_bus_info`),
//! so it lines up with whichever device the solver actually opened.
use std::path::{Path, PathBuf};
use crate::gpu_tune::{GpuTuner, SetOutcome};
/// One amdgpu card's sysfs telemetry handle.
pub struct AmdTuner {
/// Canonical `/sys/.../<PCI BDF>` device directory (holds `pp_dpm_*`).
device_dir: PathBuf,
/// `device_dir/hwmon/hwmonM` (the index M is not stable — resolved by glob).
hwmon_dir: PathBuf,
name: String,
}
// Only `PathBuf`/`String` — `AmdTuner` is `Send` automatically; no `unsafe impl`.
/// Open a telemetry handle for the amdgpu card at `pci_bus_id` (e.g.
/// "0000:03:00.0"). Returns `None` off Linux, or when no amdgpu card with
/// telemetry nodes matches the bus id.
pub fn open(pci_bus_id: &str) -> Option<Box<dyn GpuTuner>> {
#[cfg(not(target_os = "linux"))]
{
let _ = pci_bus_id;
None
}
#[cfg(target_os = "linux")]
{
let want = bdf_tail(pci_bus_id)?;
for entry in std::fs::read_dir("/sys/class/drm").ok()?.flatten() {
let fname = entry.file_name();
let fname = fname.to_string_lossy();
// Match the GPU nodes ("card0", "card1", …), not the per-connector
// dirs ("card1-DP-1") or render nodes.
if !fname.starts_with("card") || fname.contains('-') {
continue;
}
// `cardN/device` symlinks to the PCI device dir `…/<BDF>`.
let Ok(device_dir) = std::fs::canonicalize(entry.path().join("device")) else {
continue;
};
let matches = device_dir
.file_name()
.and_then(|s| s.to_str())
.and_then(bdf_tail)
.map(|t| t == want)
.unwrap_or(false);
if !matches {
continue;
}
let Some(hwmon_dir) = find_hwmon(&device_dir) else { continue };
// Require a telemetry node so we don't attach to a card without
// sensors (e.g. some virtual/headless devices).
if !hwmon_dir.join("temp1_input").exists()
&& !hwmon_dir.join("power1_average").exists()
{
continue;
}
let name = read_name(&device_dir);
return Some(Box::new(AmdTuner { device_dir, hwmon_dir, name }));
}
None
}
}
/// First `hwmon/hwmon*` subdir of a device dir (the index isn't stable).
fn find_hwmon(device_dir: &Path) -> Option<PathBuf> {
for e in std::fs::read_dir(device_dir.join("hwmon")).ok()?.flatten() {
if e.file_name().to_string_lossy().starts_with("hwmon") {
return Some(e.path());
}
}
None
}
/// A display name for the card; amdgpu sysfs rarely exposes a marketing name, so
/// fall back to a generic label.
fn read_name(device_dir: &Path) -> String {
read_trim(&device_dir.join("product_name"))
.filter(|s| !s.is_empty())
.unwrap_or_else(|| "AMD GPU".to_string())
}
/// Normalize a PCI BDF to its `bus:device.function` tail (drop the domain), so
/// e.g. "0000:03:00.0" and "03:00.0" compare equal.
fn bdf_tail(bdf: &str) -> Option<String> {
let bdf = bdf.trim().to_ascii_lowercase();
match bdf.split(':').collect::<Vec<_>>().as_slice() {
[_domain, bus, devfunc] => Some(format!("{bus}:{devfunc}")),
[bus, devfunc] => Some(format!("{bus}:{devfunc}")),
_ => None,
}
}
fn read_trim(p: &Path) -> Option<String> {
std::fs::read_to_string(p).ok().map(|s| s.trim().to_string())
}
fn read_u64(p: &Path) -> Option<u64> {
read_trim(p)?.parse().ok()
}
/// Parse the MHz value out of a `pp_dpm_*` line like `"2: 2700Mhz *"`.
fn parse_mhz(line: &str) -> Option<u32> {
let after_colon = line.split(':').nth(1)?;
let token = after_colon.trim().split_whitespace().next()?;
let digits: String = token.chars().take_while(|c| c.is_ascii_digit()).collect();
digits.parse().ok()
}
impl AmdTuner {
/// MHz of the currently active DPM level (the line marked `*`) in `file`.
fn dpm_active_mhz(&self, file: &str) -> Option<u32> {
let s = read_trim(&self.device_dir.join(file))?;
s.lines().find(|l| l.contains('*')).and_then(parse_mhz)
}
/// Highest DPM level (MHz) listed in `file`.
fn dpm_max_mhz(&self, file: &str) -> Option<u32> {
let s = read_trim(&self.device_dir.join(file))?;
s.lines().filter_map(parse_mhz).max()
}
}
impl GpuTuner for AmdTuner {
fn name(&self) -> String {
self.name.clone()
}
fn watts(&self) -> Option<f64> {
// power1_average is µW; fall back to the instantaneous power1_input.
let uw = read_u64(&self.hwmon_dir.join("power1_average"))
.or_else(|| read_u64(&self.hwmon_dir.join("power1_input")))?;
Some(uw as f64 / 1_000_000.0)
}
fn temperature_c(&self) -> Option<u32> {
// temp1 = edge (m°C).
let mc = read_u64(&self.hwmon_dir.join("temp1_input"))?;
Some(((mc + 500) / 1000) as u32)
}
fn current_power_limit_w(&self) -> Option<u32> {
// Absent on Navi 44 (RX 9060 XT); best-effort for cards that expose it.
read_u64(&self.hwmon_dir.join("power1_cap")).map(|uw| (uw / 1_000_000) as u32)
}
fn core_clock_mhz(&self) -> Option<u32> {
// freq1_input (Hz) is the live sclk; the DPM active level is the fallback.
if let Some(hz) = read_u64(&self.hwmon_dir.join("freq1_input")) {
if hz > 0 {
return Some((hz / 1_000_000) as u32);
}
}
self.dpm_active_mhz("pp_dpm_sclk")
}
fn mem_clock_mhz(&self) -> Option<u32> {
self.dpm_active_mhz("pp_dpm_mclk")
}
fn max_core_clock_mhz(&self) -> Option<u32> {
self.dpm_max_mhz("pp_dpm_sclk")
}
fn max_mem_clock_mhz(&self) -> Option<u32> {
self.dpm_max_mhz("pp_dpm_mclk")
}
fn power_limit_range_w(&self) -> Option<(u32, u32)> {
let mn = read_u64(&self.hwmon_dir.join("power1_cap_min"))?;
let mx = read_u64(&self.hwmon_dir.join("power1_cap_max"))?;
Some(((mn / 1_000_000) as u32, (mx / 1_000_000) as u32))
}
// Telemetry-only backend: never writes the root-only control nodes.
fn set_persistence(&self, _on: bool) -> SetOutcome {
SetOutcome::Unsupported
}
fn lock_core_clock_mhz(&self, _mhz: u32) -> SetOutcome {
SetOutcome::Unsupported
}
fn lock_mem_clock_mhz(&self, _mhz: u32) -> SetOutcome {
SetOutcome::Unsupported
}
fn set_power_limit_w(&self, _watts: u32) -> SetOutcome {
SetOutcome::Unsupported
}
fn set_core_offset_mhz(&self, _mhz: i32) -> SetOutcome {
SetOutcome::Unsupported
}
fn set_mem_offset_mhz(&self, _mhz: i32) -> SetOutcome {
SetOutcome::Unsupported
}
fn reset(&self) {}
}
+59 -4
View File
@@ -411,6 +411,9 @@ impl LegacySolver {
/// (`equihash.cl`) everywhere else. Forceable with `ZCL_OPENCL_KERNEL=amd|legacy`.
pub struct GpuSolver {
inner: SolverInner,
/// Per-card telemetry handle (AMD amdgpu sysfs / NVML), matched to the device
/// by PCI bus id. `None` when no telemetry backend matches.
tuner: Option<Box<dyn crate::gpu_tune::GpuTuner>>,
}
enum SolverInner {
@@ -423,13 +426,16 @@ impl GpuSolver {
/// device vendor (AMD → `equihash192_7.cl`).
pub fn new(device_index: usize) -> Result<Self> {
let (platform, device) = pick_device(device_index)?;
// Resolve a telemetry handle (AMD sysfs / NVML) from the device's PCI bus
// before `device` is consumed by the inner solver.
let tuner = device_pci_bus_id(&device).and_then(|bus| crate::gpu_tune::open(&bus));
let inner = if use_amd_kernel(&device) {
log::info!("OpenCL: AMD device — using the equihash192_7 kernel");
SolverInner::Amd(crate::gpu_amd::AmdSolver::new(platform, device)?)
} else {
SolverInner::Legacy(LegacySolver::new(platform, device)?)
};
Ok(Self { inner })
Ok(Self { inner, tuner })
}
/// This device's product name, if available.
@@ -440,6 +446,34 @@ impl GpuSolver {
}
}
/// Current board power draw in watts (telemetry handle), if available.
pub fn power_watts(&self) -> Option<f64> {
self.tuner.as_ref().and_then(|t| t.watts())
}
/// Current GPU temperature in °C, if available.
pub fn temperature_c(&self) -> Option<u32> {
self.tuner.as_ref().and_then(|t| t.temperature_c())
}
/// Currently enforced power limit in watts, if available.
pub fn current_power_limit_w(&self) -> Option<u32> {
self.tuner.as_ref().and_then(|t| t.current_power_limit_w())
}
/// (min, max) settable power limit in watts, if available.
pub fn power_limit_range_w(&self) -> Option<(u32, u32)> {
self.tuner.as_ref().and_then(|t| t.power_limit_range_w())
}
/// Current (core, memory) clocks in MHz, each `None` if unavailable.
pub fn current_clocks_mhz(&self) -> (Option<u32>, Option<u32>) {
match &self.tuner {
Some(t) => (t.core_clock_mhz(), t.mem_clock_mhz()),
None => (None, None),
}
}
/// Solve the puzzle for `header` (140 bytes).
pub fn solve(&self, header: &[u8]) -> Result<Vec<Vec<u32>>> {
match &self.inner {
@@ -475,11 +509,32 @@ impl GpuSolver {
pub fn hash_all(&self, header: &[u8]) -> Result<Vec<u8>> {
match &self.inner {
SolverInner::Legacy(s) => s.hash_all(header),
SolverInner::Amd(_) => {
Err(anyhow!("hash_all is not supported by the AMD kernel"))
SolverInner::Amd(_) => Err(anyhow!("hash_all is not supported by the AMD kernel")),
}
}
}
/// The device's PCI address as `"DDDD:BB:DD.F"` (lowercase), for matching the
/// physical card to a telemetry backend. Prefers `cl_khr_pci_bus_info`; falls
/// back to `cl_device_topology_amd`. `None` if the device exposes neither.
fn device_pci_bus_id(device: &ocl::Device) -> Option<String> {
const CL_DEVICE_PCI_BUS_INFO_KHR: u32 = 0x10F2;
const CL_DEVICE_TOPOLOGY_AMD: u32 = 0x4037;
// cl_device_pci_bus_info_khr = { u32 pci_domain, pci_bus, pci_device, pci_function }.
if let Ok(b) = device.info_raw(CL_DEVICE_PCI_BUS_INFO_KHR) {
if b.len() >= 16 {
let rd = |i: usize| u32::from_ne_bytes([b[i], b[i + 1], b[i + 2], b[i + 3]]);
return Some(format!("{:04x}:{:02x}:{:02x}.{:x}", rd(0), rd(4), rd(8), rd(12)));
}
}
// cl_device_topology_amd (PCIE branch): 24-byte struct, bus/device/function
// are the last three bytes; domain isn't exposed (assume 0000).
if let Ok(b) = device.info_raw(CL_DEVICE_TOPOLOGY_AMD) {
if b.len() >= 24 {
return Some(format!("0000:{:02x}:{:02x}.{:x}", b[21], b[22], b[23]));
}
}
None
}
/// Decide whether to drive `device` with the AMD `equihash192_7.cl` kernel.
@@ -561,7 +616,7 @@ pub fn cpu_device_index() -> Option<usize> {
/// Resolve a flat device index across all platforms, returning the device along
/// with the platform it belongs to (needed to build the context against the
/// right platform).
fn pick_device(index: usize) -> Result<(ocl::Platform, ocl::Device)> {
pub(crate) fn pick_device(index: usize) -> Result<(ocl::Platform, ocl::Device)> {
use ocl::{Device, Platform};
let mut idx = 0;
for platform in Platform::list() {
+12 -6
View File
@@ -128,18 +128,24 @@ pub trait GpuTuner: Send {
/// Open a control handle for the GPU at `pci_bus_id` (matches the physical card
/// regardless of CUDA-vs-driver index ordering). `None` if unavailable.
///
/// NVML is the backend on both Linux (`libnvidia-ml`) and Windows (`nvml.dll`);
/// the C API is identical, so the same [`crate::nvml`] code serves both.
/// Tries the NVIDIA backend first (NVML, `libnvidia-ml`/`nvml.dll`), then the AMD
/// backend ([`crate::amd_smi`], Linux amdgpu sysfs). A non-matching bus id makes
/// each backend return `None`, so probing both is safe on mixed-vendor hosts.
pub fn open(pci_bus_id: &str) -> Option<Box<dyn GpuTuner>> {
#[cfg(any(unix, windows))]
#[cfg(feature = "cuda")]
{
crate::nvml::open(pci_bus_id)
if let Some(t) = crate::nvml::open(pci_bus_id) {
return Some(t);
}
#[cfg(not(any(unix, windows)))]
}
#[cfg(feature = "gpu")]
{
if let Some(t) = crate::amd_smi::open(pci_bus_id) {
return Some(t);
}
}
let _ = pci_bus_id;
None
}
}
static WARNED_PRIVS: AtomicBool = AtomicBool::new(false);
+58 -7
View File
@@ -18,6 +18,10 @@ mod gpu;
#[cfg(feature = "gpu")]
mod gpu_amd;
// AMD GPU telemetry via Linux amdgpu sysfs (a `gpu_tune::GpuTuner` backend).
#[cfg(feature = "gpu")]
mod amd_smi;
// Runtime dynamic-library loader (dlopen) for the CUDA driver + NVML.
#[cfg(feature = "cuda")]
mod dylib;
@@ -28,7 +32,9 @@ mod cuda;
#[cfg(feature = "cuda")]
mod nvml;
#[cfg(feature = "cuda")]
// Platform-agnostic GPU tuning/telemetry surface. The trait + policy compile for
// either GPU backend; NVML (cuda) and amd_smi (gpu) are the implementations.
#[cfg(any(feature = "cuda", feature = "gpu"))]
mod gpu_tune;
use std::io::IsTerminal;
@@ -204,6 +210,13 @@ struct Args {
#[arg(long)]
auto_tune: bool,
/// Sustained-Sol/s governor: hold each GPU at/below this edge temperature (°C)
/// by pacing the solve cadence (no hardware writes, no root). Trades a little
/// throughput for lower temp/power; off by default (runs flat-out). Needs a
/// backend that reports temperature (AMD amdgpu / NVIDIA).
#[arg(long, value_name = "CELSIUS")]
target_temp: Option<u32>,
/// Efficiency: cap each GPU's power limit in watts (default: card max).
/// Lower power trades a little hashrate for much better Sol/W.
#[arg(long, value_name = "WATTS")]
@@ -620,6 +633,8 @@ fn main() -> Result<()> {
args.power_limit.unwrap_or(0),
args.unlock_controls,
);
// Software temp governor target (paces solve cadence; no hardware writes).
miner::set_target_temp(args.target_temp);
miner::run(client, specs, running, job_timeout, tui, format!("{host}:{port}"), controls, cpu_mining, cpu_clamp, args.control_port)
}
@@ -1077,10 +1092,21 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
use std::time::Instant;
info!("benchmarking {runs} solve(s) per worker across {} worker(s)", specs.len());
/// Per-worker benchmark result, including a steady-state telemetry snapshot
/// (sampled right after the timed loop, while the card is warm).
struct WorkerResult {
sols: usize,
dt: f64,
watts: Option<f64>,
temp_c: Option<u32>,
core_mhz: Option<u32>,
mem_mhz: Option<u32>,
}
let start = Instant::now();
let mut handles = Vec::new();
for (id, spec) in specs.into_iter().enumerate() {
handles.push(std::thread::spawn(move || -> Result<(usize, f64)> {
handles.push(std::thread::spawn(move || -> Result<WorkerResult> {
let backend = spec.build()?;
backend.solve(&pseudo_header(id as u64))?; // warm up (excluded)
let t = Instant::now();
@@ -1090,7 +1116,17 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
let seed = ((id as u64) << 40) | (i as u64 + 1);
sols += backend.solve(&pseudo_header(seed))?.len();
}
Ok((sols, t.elapsed().as_secs_f64()))
let dt = t.elapsed().as_secs_f64();
// Snapshot telemetry while the card is still under load.
let (core_mhz, mem_mhz) = backend.current_clocks_mhz();
Ok(WorkerResult {
sols,
dt,
watts: backend.power_watts(),
temp_c: backend.temperature_c(),
core_mhz,
mem_mhz,
})
}));
}
@@ -1099,11 +1135,26 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
let mut workers = 0usize;
for h in handles {
match h.join().unwrap() {
Ok((sols, dt)) => {
let sol_s = sols as f64 / dt;
Ok(r) => {
let sol_s = r.sols as f64 / r.dt;
// Optional telemetry tail: " | 142 W, 41.7 Sol/W, 68°C, 2700/2500 MHz".
let mut tail = String::new();
if let Some(w) = r.watts {
tail.push_str(&format!(" | {w:.0} W"));
if w > 0.0 {
tail.push_str(&format!(", {:.2} Sol/W", sol_s / w));
}
}
if let Some(t) = r.temp_c {
tail.push_str(&format!(", {t}°C"));
}
if let (Some(c), m) = (r.core_mhz, r.mem_mhz) {
tail.push_str(&format!(", {c}/{} MHz", m.map(|m| m.to_string()).unwrap_or_else(|| "?".into())));
}
info!(
" worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {sols} solutions",
1000.0 * dt / runs as f64
" worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {} solutions{tail}",
1000.0 * r.dt / runs as f64,
r.sols
);
agg_sols += sol_s;
workers += 1;
+105
View File
@@ -14,6 +14,22 @@ use crate::equihash;
use crate::params::{HEADER_LEN, SOLUTION_BYTES};
use crate::stratum::{StratumClient, Work};
/// Process-wide target edge temperature (°C) for the software solve-cadence
/// governor; `None` ⇒ run flat-out. Set once at startup from `--target-temp`.
static TARGET_TEMP_C: OnceLock<Option<u32>> = OnceLock::new();
/// Install the governor's target temperature (call once, before workers start).
pub fn set_target_temp(c: Option<u32>) {
if let Some(t) = c {
info!("temperature governor enabled: holding GPUs ≤{t}°C (paced cadence)");
}
let _ = TARGET_TEMP_C.set(c);
}
fn target_temp_c() -> Option<u32> {
TARGET_TEMP_C.get().copied().flatten()
}
/// Double SHA-256, as used for the Zcash/ZClassic block PoW hash.
fn sha256d(data: &[u8]) -> [u8; 32] {
let first = Sha256::digest(data);
@@ -136,6 +152,8 @@ impl Backend {
match self {
#[cfg(feature = "cuda")]
Backend::Cuda(solver) => solver.power_watts(),
#[cfg(feature = "gpu")]
Backend::Gpu(solver) => solver.power_watts(),
_ => None,
}
}
@@ -145,6 +163,8 @@ impl Backend {
match self {
#[cfg(feature = "cuda")]
Backend::Cuda(solver) => solver.temperature_c(),
#[cfg(feature = "gpu")]
Backend::Gpu(solver) => solver.temperature_c(),
_ => None,
}
}
@@ -154,6 +174,8 @@ impl Backend {
match self {
#[cfg(feature = "cuda")]
Backend::Cuda(solver) => solver.current_power_limit_w(),
#[cfg(feature = "gpu")]
Backend::Gpu(solver) => solver.current_power_limit_w(),
_ => None,
}
}
@@ -163,6 +185,8 @@ impl Backend {
match self {
#[cfg(feature = "cuda")]
Backend::Cuda(solver) => solver.power_limit_range_w(),
#[cfg(feature = "gpu")]
Backend::Gpu(solver) => solver.power_limit_range_w(),
_ => None,
}
}
@@ -190,6 +214,8 @@ impl Backend {
match self {
#[cfg(feature = "cuda")]
Backend::Cuda(solver) => solver.current_clocks_mhz(),
#[cfg(feature = "gpu")]
Backend::Gpu(solver) => solver.current_clocks_mhz(),
_ => (None, None),
}
}
@@ -513,6 +539,14 @@ fn worker(
let mut last_job = Instant::now();
let mut paused = false;
let mut disabled_pause = false;
// Software temperature governor: pace the solve cadence to hold edge temp at
// or below `--target-temp` (no hardware writes). `gov_sleep` is the per-pass
// pause, nudged from the periodic temperature sample.
let gov_target = target_temp_c();
let mut gov_sleep = Duration::ZERO;
if let Some(target) = gov_target {
info!("worker {id}: temperature governor active — pacing cadence to hold ≤{target}°C");
}
while running.load(Ordering::Relaxed) {
if work_handle.epoch() != current.epoch {
@@ -583,6 +617,9 @@ fn worker(
}
if let Some(t) = backend.temperature_c() {
stats.workers[id].temp_c.store(t, Ordering::Relaxed);
if let Some(target) = gov_target {
gov_sleep = govern_cadence(gov_sleep, t, target);
}
}
let (core_mhz, mem_mhz) = backend.current_clocks_mhz();
if let Some(c) = core_mhz {
@@ -614,12 +651,34 @@ fn worker(
let ctx = inflight.pop_front().unwrap();
process_results(id, &client, &ctx, &solutions, &stats);
}
// Temperature governor: pace the cadence (held off entirely when flat-out).
if !gov_sleep.is_zero() {
std::thread::sleep(gov_sleep);
}
}
drain_pipeline(id, &mut backend, &mut inflight, &client, &stats)?;
Ok(())
}
/// Nudge the per-pass governor pause to hold edge temperature near `target` (°C).
/// Over target → lengthen the pause (proportional to the overshoot); comfortably
/// under → shorten it. Called from the periodic temperature sample (~1 Hz), so
/// this is a slow integral controller — fine for the card's thermal time constant.
fn govern_cadence(cur: Duration, temp: u32, target: u32) -> Duration {
const MAX_SLEEP: Duration = Duration::from_millis(100);
let (t, target) = (temp as i64, target as i64);
if t > target {
cur.saturating_add(Duration::from_micros(400 * (t - target) as u64))
.min(MAX_SLEEP)
} else if t < target - 1 {
cur.saturating_sub(Duration::from_millis(1))
} else {
cur
}
}
/// Supervise CPU mining: spawn one worker per group for the current group size,
/// and whenever the dashboard cycles the size, stop those workers (`gen_running`),
/// rebuild the grouping, and respawn. Runs until `running` is cleared.
@@ -843,3 +902,49 @@ fn build_nonce(nonce1: &[u8], counter: u64) -> Result<[u8; 32]> {
tail[..n].copy_from_slice(&counter.to_le_bytes()[..n]);
Ok(nonce)
}
#[cfg(test)]
mod governor_tests {
use super::*;
#[test]
fn governor_idles_below_target() {
// At/under target → no pause is introduced.
assert_eq!(govern_cadence(Duration::ZERO, 60, 70), Duration::ZERO);
assert_eq!(govern_cadence(Duration::ZERO, 70, 70), Duration::ZERO);
}
#[test]
fn governor_lengthens_over_target_and_clamps() {
// Over target → pause grows proportional to the overshoot.
let s = govern_cadence(Duration::ZERO, 75, 70); // +5°C
assert_eq!(s, Duration::from_micros(400 * 5));
// Far over target saturates at the cap, never beyond.
let hot = govern_cadence(Duration::from_millis(99), 200, 70);
assert_eq!(hot, Duration::from_millis(100));
}
#[test]
fn governor_eases_off_when_cool() {
// 2°C+ under target → pause shrinks; floors at zero (no underflow).
let s = govern_cadence(Duration::from_millis(3), 67, 70);
assert_eq!(s, Duration::from_millis(2));
assert_eq!(govern_cadence(Duration::ZERO, 50, 70), Duration::ZERO);
}
#[test]
fn governor_converges_to_hold_target() {
// Simulate a card whose temp rises with duty: more pause → cooler. The
// controller should settle near the target without runaway.
let target = 70u32;
let mut sleep = Duration::ZERO;
let mut temp = 85i32; // starts hot
for _ in 0..200 {
sleep = govern_cadence(sleep, temp as u32, target);
// crude plant: each ms of pause sheds ~1.5°C off a 90°C flat-out temp.
let modeled = 90.0 - 1.5 * sleep.as_millis() as f64;
temp = modeled.round() as i32;
}
assert!((target as i32 - 2..=target as i32 + 2).contains(&temp), "settled at {temp}°C, want ~{target}");
}
}