AMD GPU telemetry + --target-temp governor
Brings AMD cards to parity with NVIDIA for monitoring/control surface, which was NVML-only. New src/amd_smi.rs is a gpu_tune::GpuTuner backed by Linux amdgpu sysfs (power1_average, temp1_input edge, freq1_input sclk, pp_dpm_sclk/mclk), matched to the device by PCI bus id from OpenCL cl_khr_pci_bus_info. gpu_tune is un-gated to compile under the gpu feature; open() probes NVML then amd_smi. GpuSolver carries the tuner and Backend::Gpu dispatches power/temp/clocks, so the TUI and --benchmark now show power, temperature, clocks and Sol/W for AMD. Telemetry-only — setters are Unsupported (amdgpu control nodes are root-only). --target-temp <C> adds an opt-in software governor (miner::govern_cadence) that paces solve cadence to hold edge temperature, no hardware writes/root. With small thermal throttle it won't beat flat-out on raw Sol/s; it's a temp/efficiency lever. Unit-tested controller; flag/plumbing verified live. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
+204
@@ -0,0 +1,204 @@
|
||||
//! AMD GPU telemetry for [`crate::gpu_tune::GpuTuner`] via the Linux amdgpu
|
||||
//! sysfs interface (`/sys/class/drm/cardN/device/...`).
|
||||
//!
|
||||
//! Telemetry only: board power, edge temperature, and core/memory clocks come
|
||||
//! from the world-readable hwmon + DPM nodes, so it works unprivileged. The
|
||||
//! control nodes (`pp_od_clk_voltage`, power cap, performance level) are
|
||||
//! root-write-only and carry GPU-hang risk, so every setter returns
|
||||
//! [`SetOutcome::Unsupported`] — this backend never writes. The handle is matched
|
||||
//! to the physical card by PCI bus id (e.g. from OpenCL's `cl_khr_pci_bus_info`),
|
||||
//! so it lines up with whichever device the solver actually opened.
|
||||
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::gpu_tune::{GpuTuner, SetOutcome};
|
||||
|
||||
/// One amdgpu card's sysfs telemetry handle.
|
||||
pub struct AmdTuner {
|
||||
/// Canonical `/sys/.../<PCI BDF>` device directory (holds `pp_dpm_*`).
|
||||
device_dir: PathBuf,
|
||||
/// `device_dir/hwmon/hwmonM` (the index M is not stable — resolved by glob).
|
||||
hwmon_dir: PathBuf,
|
||||
name: String,
|
||||
}
|
||||
|
||||
// Only `PathBuf`/`String` — `AmdTuner` is `Send` automatically; no `unsafe impl`.
|
||||
|
||||
/// Open a telemetry handle for the amdgpu card at `pci_bus_id` (e.g.
|
||||
/// "0000:03:00.0"). Returns `None` off Linux, or when no amdgpu card with
|
||||
/// telemetry nodes matches the bus id.
|
||||
pub fn open(pci_bus_id: &str) -> Option<Box<dyn GpuTuner>> {
|
||||
#[cfg(not(target_os = "linux"))]
|
||||
{
|
||||
let _ = pci_bus_id;
|
||||
None
|
||||
}
|
||||
#[cfg(target_os = "linux")]
|
||||
{
|
||||
let want = bdf_tail(pci_bus_id)?;
|
||||
for entry in std::fs::read_dir("/sys/class/drm").ok()?.flatten() {
|
||||
let fname = entry.file_name();
|
||||
let fname = fname.to_string_lossy();
|
||||
// Match the GPU nodes ("card0", "card1", …), not the per-connector
|
||||
// dirs ("card1-DP-1") or render nodes.
|
||||
if !fname.starts_with("card") || fname.contains('-') {
|
||||
continue;
|
||||
}
|
||||
// `cardN/device` symlinks to the PCI device dir `…/<BDF>`.
|
||||
let Ok(device_dir) = std::fs::canonicalize(entry.path().join("device")) else {
|
||||
continue;
|
||||
};
|
||||
let matches = device_dir
|
||||
.file_name()
|
||||
.and_then(|s| s.to_str())
|
||||
.and_then(bdf_tail)
|
||||
.map(|t| t == want)
|
||||
.unwrap_or(false);
|
||||
if !matches {
|
||||
continue;
|
||||
}
|
||||
let Some(hwmon_dir) = find_hwmon(&device_dir) else { continue };
|
||||
// Require a telemetry node so we don't attach to a card without
|
||||
// sensors (e.g. some virtual/headless devices).
|
||||
if !hwmon_dir.join("temp1_input").exists()
|
||||
&& !hwmon_dir.join("power1_average").exists()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let name = read_name(&device_dir);
|
||||
return Some(Box::new(AmdTuner { device_dir, hwmon_dir, name }));
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// First `hwmon/hwmon*` subdir of a device dir (the index isn't stable).
|
||||
fn find_hwmon(device_dir: &Path) -> Option<PathBuf> {
|
||||
for e in std::fs::read_dir(device_dir.join("hwmon")).ok()?.flatten() {
|
||||
if e.file_name().to_string_lossy().starts_with("hwmon") {
|
||||
return Some(e.path());
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// A display name for the card; amdgpu sysfs rarely exposes a marketing name, so
|
||||
/// fall back to a generic label.
|
||||
fn read_name(device_dir: &Path) -> String {
|
||||
read_trim(&device_dir.join("product_name"))
|
||||
.filter(|s| !s.is_empty())
|
||||
.unwrap_or_else(|| "AMD GPU".to_string())
|
||||
}
|
||||
|
||||
/// Normalize a PCI BDF to its `bus:device.function` tail (drop the domain), so
|
||||
/// e.g. "0000:03:00.0" and "03:00.0" compare equal.
|
||||
fn bdf_tail(bdf: &str) -> Option<String> {
|
||||
let bdf = bdf.trim().to_ascii_lowercase();
|
||||
match bdf.split(':').collect::<Vec<_>>().as_slice() {
|
||||
[_domain, bus, devfunc] => Some(format!("{bus}:{devfunc}")),
|
||||
[bus, devfunc] => Some(format!("{bus}:{devfunc}")),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn read_trim(p: &Path) -> Option<String> {
|
||||
std::fs::read_to_string(p).ok().map(|s| s.trim().to_string())
|
||||
}
|
||||
|
||||
fn read_u64(p: &Path) -> Option<u64> {
|
||||
read_trim(p)?.parse().ok()
|
||||
}
|
||||
|
||||
/// Parse the MHz value out of a `pp_dpm_*` line like `"2: 2700Mhz *"`.
|
||||
fn parse_mhz(line: &str) -> Option<u32> {
|
||||
let after_colon = line.split(':').nth(1)?;
|
||||
let token = after_colon.trim().split_whitespace().next()?;
|
||||
let digits: String = token.chars().take_while(|c| c.is_ascii_digit()).collect();
|
||||
digits.parse().ok()
|
||||
}
|
||||
|
||||
impl AmdTuner {
|
||||
/// MHz of the currently active DPM level (the line marked `*`) in `file`.
|
||||
fn dpm_active_mhz(&self, file: &str) -> Option<u32> {
|
||||
let s = read_trim(&self.device_dir.join(file))?;
|
||||
s.lines().find(|l| l.contains('*')).and_then(parse_mhz)
|
||||
}
|
||||
/// Highest DPM level (MHz) listed in `file`.
|
||||
fn dpm_max_mhz(&self, file: &str) -> Option<u32> {
|
||||
let s = read_trim(&self.device_dir.join(file))?;
|
||||
s.lines().filter_map(parse_mhz).max()
|
||||
}
|
||||
}
|
||||
|
||||
impl GpuTuner for AmdTuner {
|
||||
fn name(&self) -> String {
|
||||
self.name.clone()
|
||||
}
|
||||
|
||||
fn watts(&self) -> Option<f64> {
|
||||
// power1_average is µW; fall back to the instantaneous power1_input.
|
||||
let uw = read_u64(&self.hwmon_dir.join("power1_average"))
|
||||
.or_else(|| read_u64(&self.hwmon_dir.join("power1_input")))?;
|
||||
Some(uw as f64 / 1_000_000.0)
|
||||
}
|
||||
|
||||
fn temperature_c(&self) -> Option<u32> {
|
||||
// temp1 = edge (m°C).
|
||||
let mc = read_u64(&self.hwmon_dir.join("temp1_input"))?;
|
||||
Some(((mc + 500) / 1000) as u32)
|
||||
}
|
||||
|
||||
fn current_power_limit_w(&self) -> Option<u32> {
|
||||
// Absent on Navi 44 (RX 9060 XT); best-effort for cards that expose it.
|
||||
read_u64(&self.hwmon_dir.join("power1_cap")).map(|uw| (uw / 1_000_000) as u32)
|
||||
}
|
||||
|
||||
fn core_clock_mhz(&self) -> Option<u32> {
|
||||
// freq1_input (Hz) is the live sclk; the DPM active level is the fallback.
|
||||
if let Some(hz) = read_u64(&self.hwmon_dir.join("freq1_input")) {
|
||||
if hz > 0 {
|
||||
return Some((hz / 1_000_000) as u32);
|
||||
}
|
||||
}
|
||||
self.dpm_active_mhz("pp_dpm_sclk")
|
||||
}
|
||||
|
||||
fn mem_clock_mhz(&self) -> Option<u32> {
|
||||
self.dpm_active_mhz("pp_dpm_mclk")
|
||||
}
|
||||
|
||||
fn max_core_clock_mhz(&self) -> Option<u32> {
|
||||
self.dpm_max_mhz("pp_dpm_sclk")
|
||||
}
|
||||
|
||||
fn max_mem_clock_mhz(&self) -> Option<u32> {
|
||||
self.dpm_max_mhz("pp_dpm_mclk")
|
||||
}
|
||||
|
||||
fn power_limit_range_w(&self) -> Option<(u32, u32)> {
|
||||
let mn = read_u64(&self.hwmon_dir.join("power1_cap_min"))?;
|
||||
let mx = read_u64(&self.hwmon_dir.join("power1_cap_max"))?;
|
||||
Some(((mn / 1_000_000) as u32, (mx / 1_000_000) as u32))
|
||||
}
|
||||
|
||||
// Telemetry-only backend: never writes the root-only control nodes.
|
||||
fn set_persistence(&self, _on: bool) -> SetOutcome {
|
||||
SetOutcome::Unsupported
|
||||
}
|
||||
fn lock_core_clock_mhz(&self, _mhz: u32) -> SetOutcome {
|
||||
SetOutcome::Unsupported
|
||||
}
|
||||
fn lock_mem_clock_mhz(&self, _mhz: u32) -> SetOutcome {
|
||||
SetOutcome::Unsupported
|
||||
}
|
||||
fn set_power_limit_w(&self, _watts: u32) -> SetOutcome {
|
||||
SetOutcome::Unsupported
|
||||
}
|
||||
fn set_core_offset_mhz(&self, _mhz: i32) -> SetOutcome {
|
||||
SetOutcome::Unsupported
|
||||
}
|
||||
fn set_mem_offset_mhz(&self, _mhz: i32) -> SetOutcome {
|
||||
SetOutcome::Unsupported
|
||||
}
|
||||
fn reset(&self) {}
|
||||
}
|
||||
+60
-5
@@ -411,6 +411,9 @@ impl LegacySolver {
|
||||
/// (`equihash.cl`) everywhere else. Forceable with `ZCL_OPENCL_KERNEL=amd|legacy`.
|
||||
pub struct GpuSolver {
|
||||
inner: SolverInner,
|
||||
/// Per-card telemetry handle (AMD amdgpu sysfs / NVML), matched to the device
|
||||
/// by PCI bus id. `None` when no telemetry backend matches.
|
||||
tuner: Option<Box<dyn crate::gpu_tune::GpuTuner>>,
|
||||
}
|
||||
|
||||
enum SolverInner {
|
||||
@@ -423,13 +426,16 @@ impl GpuSolver {
|
||||
/// device vendor (AMD → `equihash192_7.cl`).
|
||||
pub fn new(device_index: usize) -> Result<Self> {
|
||||
let (platform, device) = pick_device(device_index)?;
|
||||
// Resolve a telemetry handle (AMD sysfs / NVML) from the device's PCI bus
|
||||
// before `device` is consumed by the inner solver.
|
||||
let tuner = device_pci_bus_id(&device).and_then(|bus| crate::gpu_tune::open(&bus));
|
||||
let inner = if use_amd_kernel(&device) {
|
||||
log::info!("OpenCL: AMD device — using the equihash192_7 kernel");
|
||||
SolverInner::Amd(crate::gpu_amd::AmdSolver::new(platform, device)?)
|
||||
} else {
|
||||
SolverInner::Legacy(LegacySolver::new(platform, device)?)
|
||||
};
|
||||
Ok(Self { inner })
|
||||
Ok(Self { inner, tuner })
|
||||
}
|
||||
|
||||
/// This device's product name, if available.
|
||||
@@ -440,6 +446,34 @@ impl GpuSolver {
|
||||
}
|
||||
}
|
||||
|
||||
/// Current board power draw in watts (telemetry handle), if available.
|
||||
pub fn power_watts(&self) -> Option<f64> {
|
||||
self.tuner.as_ref().and_then(|t| t.watts())
|
||||
}
|
||||
|
||||
/// Current GPU temperature in °C, if available.
|
||||
pub fn temperature_c(&self) -> Option<u32> {
|
||||
self.tuner.as_ref().and_then(|t| t.temperature_c())
|
||||
}
|
||||
|
||||
/// Currently enforced power limit in watts, if available.
|
||||
pub fn current_power_limit_w(&self) -> Option<u32> {
|
||||
self.tuner.as_ref().and_then(|t| t.current_power_limit_w())
|
||||
}
|
||||
|
||||
/// (min, max) settable power limit in watts, if available.
|
||||
pub fn power_limit_range_w(&self) -> Option<(u32, u32)> {
|
||||
self.tuner.as_ref().and_then(|t| t.power_limit_range_w())
|
||||
}
|
||||
|
||||
/// Current (core, memory) clocks in MHz, each `None` if unavailable.
|
||||
pub fn current_clocks_mhz(&self) -> (Option<u32>, Option<u32>) {
|
||||
match &self.tuner {
|
||||
Some(t) => (t.core_clock_mhz(), t.mem_clock_mhz()),
|
||||
None => (None, None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Solve the puzzle for `header` (140 bytes).
|
||||
pub fn solve(&self, header: &[u8]) -> Result<Vec<Vec<u32>>> {
|
||||
match &self.inner {
|
||||
@@ -475,13 +509,34 @@ impl GpuSolver {
|
||||
pub fn hash_all(&self, header: &[u8]) -> Result<Vec<u8>> {
|
||||
match &self.inner {
|
||||
SolverInner::Legacy(s) => s.hash_all(header),
|
||||
SolverInner::Amd(_) => {
|
||||
Err(anyhow!("hash_all is not supported by the AMD kernel"))
|
||||
}
|
||||
SolverInner::Amd(_) => Err(anyhow!("hash_all is not supported by the AMD kernel")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The device's PCI address as `"DDDD:BB:DD.F"` (lowercase), for matching the
|
||||
/// physical card to a telemetry backend. Prefers `cl_khr_pci_bus_info`; falls
|
||||
/// back to `cl_device_topology_amd`. `None` if the device exposes neither.
|
||||
fn device_pci_bus_id(device: &ocl::Device) -> Option<String> {
|
||||
const CL_DEVICE_PCI_BUS_INFO_KHR: u32 = 0x10F2;
|
||||
const CL_DEVICE_TOPOLOGY_AMD: u32 = 0x4037;
|
||||
// cl_device_pci_bus_info_khr = { u32 pci_domain, pci_bus, pci_device, pci_function }.
|
||||
if let Ok(b) = device.info_raw(CL_DEVICE_PCI_BUS_INFO_KHR) {
|
||||
if b.len() >= 16 {
|
||||
let rd = |i: usize| u32::from_ne_bytes([b[i], b[i + 1], b[i + 2], b[i + 3]]);
|
||||
return Some(format!("{:04x}:{:02x}:{:02x}.{:x}", rd(0), rd(4), rd(8), rd(12)));
|
||||
}
|
||||
}
|
||||
// cl_device_topology_amd (PCIE branch): 24-byte struct, bus/device/function
|
||||
// are the last three bytes; domain isn't exposed (assume 0000).
|
||||
if let Ok(b) = device.info_raw(CL_DEVICE_TOPOLOGY_AMD) {
|
||||
if b.len() >= 24 {
|
||||
return Some(format!("0000:{:02x}:{:02x}.{:x}", b[21], b[22], b[23]));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Decide whether to drive `device` with the AMD `equihash192_7.cl` kernel.
|
||||
/// `ZCL_OPENCL_KERNEL` forces the choice (`amd` or `legacy`); otherwise it's by
|
||||
/// device vendor.
|
||||
@@ -561,7 +616,7 @@ pub fn cpu_device_index() -> Option<usize> {
|
||||
/// Resolve a flat device index across all platforms, returning the device along
|
||||
/// with the platform it belongs to (needed to build the context against the
|
||||
/// right platform).
|
||||
fn pick_device(index: usize) -> Result<(ocl::Platform, ocl::Device)> {
|
||||
pub(crate) fn pick_device(index: usize) -> Result<(ocl::Platform, ocl::Device)> {
|
||||
use ocl::{Device, Platform};
|
||||
let mut idx = 0;
|
||||
for platform in Platform::list() {
|
||||
|
||||
+13
-7
@@ -128,18 +128,24 @@ pub trait GpuTuner: Send {
|
||||
/// Open a control handle for the GPU at `pci_bus_id` (matches the physical card
|
||||
/// regardless of CUDA-vs-driver index ordering). `None` if unavailable.
|
||||
///
|
||||
/// NVML is the backend on both Linux (`libnvidia-ml`) and Windows (`nvml.dll`);
|
||||
/// the C API is identical, so the same [`crate::nvml`] code serves both.
|
||||
/// Tries the NVIDIA backend first (NVML, `libnvidia-ml`/`nvml.dll`), then the AMD
|
||||
/// backend ([`crate::amd_smi`], Linux amdgpu sysfs). A non-matching bus id makes
|
||||
/// each backend return `None`, so probing both is safe on mixed-vendor hosts.
|
||||
pub fn open(pci_bus_id: &str) -> Option<Box<dyn GpuTuner>> {
|
||||
#[cfg(any(unix, windows))]
|
||||
#[cfg(feature = "cuda")]
|
||||
{
|
||||
crate::nvml::open(pci_bus_id)
|
||||
if let Some(t) = crate::nvml::open(pci_bus_id) {
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
#[cfg(not(any(unix, windows)))]
|
||||
#[cfg(feature = "gpu")]
|
||||
{
|
||||
let _ = pci_bus_id;
|
||||
None
|
||||
if let Some(t) = crate::amd_smi::open(pci_bus_id) {
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
let _ = pci_bus_id;
|
||||
None
|
||||
}
|
||||
|
||||
static WARNED_PRIVS: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
+58
-7
@@ -18,6 +18,10 @@ mod gpu;
|
||||
#[cfg(feature = "gpu")]
|
||||
mod gpu_amd;
|
||||
|
||||
// AMD GPU telemetry via Linux amdgpu sysfs (a `gpu_tune::GpuTuner` backend).
|
||||
#[cfg(feature = "gpu")]
|
||||
mod amd_smi;
|
||||
|
||||
// Runtime dynamic-library loader (dlopen) for the CUDA driver + NVML.
|
||||
#[cfg(feature = "cuda")]
|
||||
mod dylib;
|
||||
@@ -28,7 +32,9 @@ mod cuda;
|
||||
#[cfg(feature = "cuda")]
|
||||
mod nvml;
|
||||
|
||||
#[cfg(feature = "cuda")]
|
||||
// Platform-agnostic GPU tuning/telemetry surface. The trait + policy compile for
|
||||
// either GPU backend; NVML (cuda) and amd_smi (gpu) are the implementations.
|
||||
#[cfg(any(feature = "cuda", feature = "gpu"))]
|
||||
mod gpu_tune;
|
||||
|
||||
use std::io::IsTerminal;
|
||||
@@ -204,6 +210,13 @@ struct Args {
|
||||
#[arg(long)]
|
||||
auto_tune: bool,
|
||||
|
||||
/// Sustained-Sol/s governor: hold each GPU at/below this edge temperature (°C)
|
||||
/// by pacing the solve cadence (no hardware writes, no root). Trades a little
|
||||
/// throughput for lower temp/power; off by default (runs flat-out). Needs a
|
||||
/// backend that reports temperature (AMD amdgpu / NVIDIA).
|
||||
#[arg(long, value_name = "CELSIUS")]
|
||||
target_temp: Option<u32>,
|
||||
|
||||
/// Efficiency: cap each GPU's power limit in watts (default: card max).
|
||||
/// Lower power trades a little hashrate for much better Sol/W.
|
||||
#[arg(long, value_name = "WATTS")]
|
||||
@@ -620,6 +633,8 @@ fn main() -> Result<()> {
|
||||
args.power_limit.unwrap_or(0),
|
||||
args.unlock_controls,
|
||||
);
|
||||
// Software temp governor target (paces solve cadence; no hardware writes).
|
||||
miner::set_target_temp(args.target_temp);
|
||||
miner::run(client, specs, running, job_timeout, tui, format!("{host}:{port}"), controls, cpu_mining, cpu_clamp, args.control_port)
|
||||
}
|
||||
|
||||
@@ -1077,10 +1092,21 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
|
||||
use std::time::Instant;
|
||||
info!("benchmarking {runs} solve(s) per worker across {} worker(s)", specs.len());
|
||||
|
||||
/// Per-worker benchmark result, including a steady-state telemetry snapshot
|
||||
/// (sampled right after the timed loop, while the card is warm).
|
||||
struct WorkerResult {
|
||||
sols: usize,
|
||||
dt: f64,
|
||||
watts: Option<f64>,
|
||||
temp_c: Option<u32>,
|
||||
core_mhz: Option<u32>,
|
||||
mem_mhz: Option<u32>,
|
||||
}
|
||||
|
||||
let start = Instant::now();
|
||||
let mut handles = Vec::new();
|
||||
for (id, spec) in specs.into_iter().enumerate() {
|
||||
handles.push(std::thread::spawn(move || -> Result<(usize, f64)> {
|
||||
handles.push(std::thread::spawn(move || -> Result<WorkerResult> {
|
||||
let backend = spec.build()?;
|
||||
backend.solve(&pseudo_header(id as u64))?; // warm up (excluded)
|
||||
let t = Instant::now();
|
||||
@@ -1090,7 +1116,17 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
|
||||
let seed = ((id as u64) << 40) | (i as u64 + 1);
|
||||
sols += backend.solve(&pseudo_header(seed))?.len();
|
||||
}
|
||||
Ok((sols, t.elapsed().as_secs_f64()))
|
||||
let dt = t.elapsed().as_secs_f64();
|
||||
// Snapshot telemetry while the card is still under load.
|
||||
let (core_mhz, mem_mhz) = backend.current_clocks_mhz();
|
||||
Ok(WorkerResult {
|
||||
sols,
|
||||
dt,
|
||||
watts: backend.power_watts(),
|
||||
temp_c: backend.temperature_c(),
|
||||
core_mhz,
|
||||
mem_mhz,
|
||||
})
|
||||
}));
|
||||
}
|
||||
|
||||
@@ -1099,11 +1135,26 @@ fn benchmark(specs: Vec<BackendSpec>, runs: usize) -> Result<()> {
|
||||
let mut workers = 0usize;
|
||||
for h in handles {
|
||||
match h.join().unwrap() {
|
||||
Ok((sols, dt)) => {
|
||||
let sol_s = sols as f64 / dt;
|
||||
Ok(r) => {
|
||||
let sol_s = r.sols as f64 / r.dt;
|
||||
// Optional telemetry tail: " | 142 W, 41.7 Sol/W, 68°C, 2700/2500 MHz".
|
||||
let mut tail = String::new();
|
||||
if let Some(w) = r.watts {
|
||||
tail.push_str(&format!(" | {w:.0} W"));
|
||||
if w > 0.0 {
|
||||
tail.push_str(&format!(", {:.2} Sol/W", sol_s / w));
|
||||
}
|
||||
}
|
||||
if let Some(t) = r.temp_c {
|
||||
tail.push_str(&format!(", {t}°C"));
|
||||
}
|
||||
if let (Some(c), m) = (r.core_mhz, r.mem_mhz) {
|
||||
tail.push_str(&format!(", {c}/{} MHz", m.map(|m| m.to_string()).unwrap_or_else(|| "?".into())));
|
||||
}
|
||||
info!(
|
||||
" worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {sols} solutions",
|
||||
1000.0 * dt / runs as f64
|
||||
" worker {workers}: {sol_s:.2} Sol/s ({:.0} ms/solve), {} solutions{tail}",
|
||||
1000.0 * r.dt / runs as f64,
|
||||
r.sols
|
||||
);
|
||||
agg_sols += sol_s;
|
||||
workers += 1;
|
||||
|
||||
+105
@@ -14,6 +14,22 @@ use crate::equihash;
|
||||
use crate::params::{HEADER_LEN, SOLUTION_BYTES};
|
||||
use crate::stratum::{StratumClient, Work};
|
||||
|
||||
/// Process-wide target edge temperature (°C) for the software solve-cadence
|
||||
/// governor; `None` ⇒ run flat-out. Set once at startup from `--target-temp`.
|
||||
static TARGET_TEMP_C: OnceLock<Option<u32>> = OnceLock::new();
|
||||
|
||||
/// Install the governor's target temperature (call once, before workers start).
|
||||
pub fn set_target_temp(c: Option<u32>) {
|
||||
if let Some(t) = c {
|
||||
info!("temperature governor enabled: holding GPUs ≤{t}°C (paced cadence)");
|
||||
}
|
||||
let _ = TARGET_TEMP_C.set(c);
|
||||
}
|
||||
|
||||
fn target_temp_c() -> Option<u32> {
|
||||
TARGET_TEMP_C.get().copied().flatten()
|
||||
}
|
||||
|
||||
/// Double SHA-256, as used for the Zcash/ZClassic block PoW hash.
|
||||
fn sha256d(data: &[u8]) -> [u8; 32] {
|
||||
let first = Sha256::digest(data);
|
||||
@@ -136,6 +152,8 @@ impl Backend {
|
||||
match self {
|
||||
#[cfg(feature = "cuda")]
|
||||
Backend::Cuda(solver) => solver.power_watts(),
|
||||
#[cfg(feature = "gpu")]
|
||||
Backend::Gpu(solver) => solver.power_watts(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -145,6 +163,8 @@ impl Backend {
|
||||
match self {
|
||||
#[cfg(feature = "cuda")]
|
||||
Backend::Cuda(solver) => solver.temperature_c(),
|
||||
#[cfg(feature = "gpu")]
|
||||
Backend::Gpu(solver) => solver.temperature_c(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -154,6 +174,8 @@ impl Backend {
|
||||
match self {
|
||||
#[cfg(feature = "cuda")]
|
||||
Backend::Cuda(solver) => solver.current_power_limit_w(),
|
||||
#[cfg(feature = "gpu")]
|
||||
Backend::Gpu(solver) => solver.current_power_limit_w(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -163,6 +185,8 @@ impl Backend {
|
||||
match self {
|
||||
#[cfg(feature = "cuda")]
|
||||
Backend::Cuda(solver) => solver.power_limit_range_w(),
|
||||
#[cfg(feature = "gpu")]
|
||||
Backend::Gpu(solver) => solver.power_limit_range_w(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
@@ -190,6 +214,8 @@ impl Backend {
|
||||
match self {
|
||||
#[cfg(feature = "cuda")]
|
||||
Backend::Cuda(solver) => solver.current_clocks_mhz(),
|
||||
#[cfg(feature = "gpu")]
|
||||
Backend::Gpu(solver) => solver.current_clocks_mhz(),
|
||||
_ => (None, None),
|
||||
}
|
||||
}
|
||||
@@ -513,6 +539,14 @@ fn worker(
|
||||
let mut last_job = Instant::now();
|
||||
let mut paused = false;
|
||||
let mut disabled_pause = false;
|
||||
// Software temperature governor: pace the solve cadence to hold edge temp at
|
||||
// or below `--target-temp` (no hardware writes). `gov_sleep` is the per-pass
|
||||
// pause, nudged from the periodic temperature sample.
|
||||
let gov_target = target_temp_c();
|
||||
let mut gov_sleep = Duration::ZERO;
|
||||
if let Some(target) = gov_target {
|
||||
info!("worker {id}: temperature governor active — pacing cadence to hold ≤{target}°C");
|
||||
}
|
||||
|
||||
while running.load(Ordering::Relaxed) {
|
||||
if work_handle.epoch() != current.epoch {
|
||||
@@ -583,6 +617,9 @@ fn worker(
|
||||
}
|
||||
if let Some(t) = backend.temperature_c() {
|
||||
stats.workers[id].temp_c.store(t, Ordering::Relaxed);
|
||||
if let Some(target) = gov_target {
|
||||
gov_sleep = govern_cadence(gov_sleep, t, target);
|
||||
}
|
||||
}
|
||||
let (core_mhz, mem_mhz) = backend.current_clocks_mhz();
|
||||
if let Some(c) = core_mhz {
|
||||
@@ -614,12 +651,34 @@ fn worker(
|
||||
let ctx = inflight.pop_front().unwrap();
|
||||
process_results(id, &client, &ctx, &solutions, &stats);
|
||||
}
|
||||
|
||||
// Temperature governor: pace the cadence (held off entirely when flat-out).
|
||||
if !gov_sleep.is_zero() {
|
||||
std::thread::sleep(gov_sleep);
|
||||
}
|
||||
}
|
||||
|
||||
drain_pipeline(id, &mut backend, &mut inflight, &client, &stats)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Nudge the per-pass governor pause to hold edge temperature near `target` (°C).
|
||||
/// Over target → lengthen the pause (proportional to the overshoot); comfortably
|
||||
/// under → shorten it. Called from the periodic temperature sample (~1 Hz), so
|
||||
/// this is a slow integral controller — fine for the card's thermal time constant.
|
||||
fn govern_cadence(cur: Duration, temp: u32, target: u32) -> Duration {
|
||||
const MAX_SLEEP: Duration = Duration::from_millis(100);
|
||||
let (t, target) = (temp as i64, target as i64);
|
||||
if t > target {
|
||||
cur.saturating_add(Duration::from_micros(400 * (t - target) as u64))
|
||||
.min(MAX_SLEEP)
|
||||
} else if t < target - 1 {
|
||||
cur.saturating_sub(Duration::from_millis(1))
|
||||
} else {
|
||||
cur
|
||||
}
|
||||
}
|
||||
|
||||
/// Supervise CPU mining: spawn one worker per group for the current group size,
|
||||
/// and whenever the dashboard cycles the size, stop those workers (`gen_running`),
|
||||
/// rebuild the grouping, and respawn. Runs until `running` is cleared.
|
||||
@@ -843,3 +902,49 @@ fn build_nonce(nonce1: &[u8], counter: u64) -> Result<[u8; 32]> {
|
||||
tail[..n].copy_from_slice(&counter.to_le_bytes()[..n]);
|
||||
Ok(nonce)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod governor_tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn governor_idles_below_target() {
|
||||
// At/under target → no pause is introduced.
|
||||
assert_eq!(govern_cadence(Duration::ZERO, 60, 70), Duration::ZERO);
|
||||
assert_eq!(govern_cadence(Duration::ZERO, 70, 70), Duration::ZERO);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governor_lengthens_over_target_and_clamps() {
|
||||
// Over target → pause grows proportional to the overshoot.
|
||||
let s = govern_cadence(Duration::ZERO, 75, 70); // +5°C
|
||||
assert_eq!(s, Duration::from_micros(400 * 5));
|
||||
// Far over target saturates at the cap, never beyond.
|
||||
let hot = govern_cadence(Duration::from_millis(99), 200, 70);
|
||||
assert_eq!(hot, Duration::from_millis(100));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governor_eases_off_when_cool() {
|
||||
// 2°C+ under target → pause shrinks; floors at zero (no underflow).
|
||||
let s = govern_cadence(Duration::from_millis(3), 67, 70);
|
||||
assert_eq!(s, Duration::from_millis(2));
|
||||
assert_eq!(govern_cadence(Duration::ZERO, 50, 70), Duration::ZERO);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn governor_converges_to_hold_target() {
|
||||
// Simulate a card whose temp rises with duty: more pause → cooler. The
|
||||
// controller should settle near the target without runaway.
|
||||
let target = 70u32;
|
||||
let mut sleep = Duration::ZERO;
|
||||
let mut temp = 85i32; // starts hot
|
||||
for _ in 0..200 {
|
||||
sleep = govern_cadence(sleep, temp as u32, target);
|
||||
// crude plant: each ms of pause sheds ~1.5°C off a 90°C flat-out temp.
|
||||
let modeled = 90.0 - 1.5 * sleep.as_millis() as f64;
|
||||
temp = modeled.round() as i32;
|
||||
}
|
||||
assert!((target as i32 - 2..=target as i32 + 2).contains(&temp), "settled at {temp}°C, want ~{target}");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user