From 41db98af69915445a922e44023ac0085be3748a1 Mon Sep 17 00:00:00 2001
From: jackpotincorporated <jackpot@incorporat.ed>
Date: Sat, 6 Jun 2026 20:39:45 -0400
Subject: [PATCH] AMD kernel: drop fp64 dependency (build on rusticl/Mesa
 OpenCL)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

equihash192_7.cl's decompress/decompress2 used double + sqrt behind
cl_khr_fp64, so the kernel failed to build on OpenCL stacks without fp64
(notably rusticl/Mesa) — those workers died with 'use of type double requires
cl_khr_fp64'. Replace round(sqrt(2*x+1)) with an exact integer square root
(single-precision estimate corrected to the integer floor, then rounded; inputs
are triangular indices < ~2^26). No fp64, no behavior change on ROCm (verified
bit-identical: 77 solutions/40 nonces, same as before), and rusticl devices now
build and solve correctly.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 kernels/equihash192_7.cl | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/kernels/equihash192_7.cl b/kernels/equihash192_7.cl
index 6db2719..96b6074 100644
--- a/kernels/equihash192_7.cl
+++ b/kernels/equihash192_7.cl
@@ -2,8 +2,6 @@
 
 //#define PRINT 1
 
-#pragma OPENCL EXTENSION cl_khr_fp64 : enable
-
 __constant ulong blake_iv[] =
 {
     0x6a09e667f3bcc908, 0xbb67ae8584caa73b,
@@ -507,14 +505,22 @@ uint compress2(uint in0, uint in1) {
 	return tmp;	
 }
 
-uint2 decompress(uint2 in) {
-	double inFl = (double) (in.s0 >> 6);
-	
-	inFl *= 2.0;
-	inFl += 1.0;
+// Exact round(sqrt(x)) without fp64: a single-precision estimate corrected to the
+// exact integer floor, then rounded. Lets the kernel build on OpenCL stacks that
+// lack cl_khr_fp64 (e.g. rusticl/Mesa) while staying bit-identical on ROCm. Inputs
+// are triangular indices (< ~2^26), well within range for the float estimate +
+// integer correction.
+inline uint isqrt_round(ulong x) {
+	long m = (long) sqrt((float) x);
+	while (m > 0 && (ulong)(m * m) > x) m--;       // correct down to floor(sqrt)
+	while ((ulong)((m + 1) * (m + 1)) <= x) m++;   // correct up to floor(sqrt)
+	// round to nearest: round up iff the fractional part is >= 0.5.
+	return (uint)(((x - (ulong)(m * m)) > (ulong) m) ? (m + 1) : m);
+}
 
-	uint2 res; 
-	res.s0 = (uint) round(sqrt(inFl));
+uint2 decompress(uint2 in) {
+	uint2 res;
+	res.s0 = isqrt_round(2ul * (ulong)(in.s0 >> 6) + 1ul);
 
 
 	uint tmp = res.s0 * (res.s0-1);
@@ -535,13 +541,8 @@ uint2 decompress(uint2 in) {
 }
 
 uint2 decompress2(uint in) {
-	double inFl = (double) in;
-	
-	inFl *= 2.0;
-	inFl += 1.0;
-
-	uint2 res; 
-	res.s0 = (uint) round(sqrt(inFl));
+	uint2 res;
+	res.s0 = isqrt_round(2ul * (ulong) in + 1ul);
 
 
 	uint tmp = res.s0 * (res.s0-1);