diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index a918a66..e4d8509 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -25,7 +25,6 @@ import { validateRequestFromAdmin } from './validations/request-from-admin'; import { dmUser } from '../../utils/dm-user'; import { IConfig, getCurrentConfig } from "../../config"; import path from 'path'; -import { attachIndefiniteReconnect } from '../lib/relay-reconnect.js'; const debug = createDebug("nsecbunker:admin"); @@ -63,15 +62,6 @@ class AdminInterface { explicitRelayUrls: opts.adminRelays, signer: new NDKPrivateKeySigner(opts.key), }); - - // Override NDK's "give up after detecting flapping" behavior so the - // bunker's admin NDK keeps trying to reconnect indefinitely. The - // watchdog (when enabled) still fires after 60s of zero connected - // relays; this helper handles shorter disconnects (e.g. an lnbits - // restart that pulls the nostrrelay extension's WS for a few - // seconds) without involving the supervisor. See aiolabs/nsecbunkerd#20. - attachIndefiniteReconnect(this.ndk, 'admin'); - this.ndk.signer?.user().then((user: NDKUser) => { let connectionString = `bunker://${user.npub}`; diff --git a/src/daemon/lib/relay-reconnect.ts b/src/daemon/lib/relay-reconnect.ts deleted file mode 100644 index e3fdb4f..0000000 --- a/src/daemon/lib/relay-reconnect.ts +++ /dev/null @@ -1,101 +0,0 @@ -import NDK from "@nostr-dev-kit/ndk"; - -/** - * Attaches an aggressive-reconnect supervisor to an NDK instance. - * - * NDK 3.x's per-relay connectivity state machine gives up retrying after - * a few consecutive fast-fail (e.g. ECONNREFUSED returns in <1 ms) - * connection attempts: - * - * 1. Each attempt's duration is recorded in `_connectionStats.durations`. - * 2. After every 3 attempts, `isFlapping()` checks the std-dev of those - * durations against `FLAPPING_THRESHOLD_MS` (1 second). Three fast - * failures look identical → tiny std-dev → flapping=true → status - * transitions to FLAPPING and the per-relay retry stops. - * 3. `NDKPool.handleFlapping` catches the event and reschedules a - * reconnect via doubling backoff (5s → 10s → 20s → 40s → 80s …), - * growing unbounded. - * - * For nsecbunkerd, where the admin relay is typically a single relay we - * **must** stay subscribed to, "disconnected for 80+s after every dev - * restart" is the failure mode users hit (aiolabs/nsecbunkerd#20). The - * pool's doubling backoff is too pessimistic for our use case. - * - * This helper sidesteps the give-up path: when the pool emits `flapping` - * (the symptom that NDK has internally given up), or when we see the - * relay disconnect outside our own request, we manually call - * `relay.connect()` with a SHORT capped delay. Successful connect resets - * the attempt counter so a future disconnect storm doesn't grow the - * delay. - * - * Trade-off: we may hammer a permanently-down relay every 10s. That's - * fine for a bunker — being disconnected silently is strictly worse than - * a retry storm against localhost. Acceptable because: - * - The bunker's primary relay is typically on the same host or LAN - * (`ws://lnbits:5001/...`); TCP RSTs are cheap. - * - Public-relay setups can layer external supervision on top if they - * care about retry pressure. - */ -export function attachIndefiniteReconnect(ndk: NDK, label: string): void { - const RECONNECT_BASE_MS = 1_000; - const RECONNECT_CAP_MS = 10_000; - - const attempts = new Map(); - const pending = new Map(); - - const reconnectDelay = (n: number): number => - Math.min(RECONNECT_BASE_MS * 2 ** n, RECONNECT_CAP_MS); - - const scheduleReconnect = (relay: any): void => { - const url: string = relay.url; - if (pending.has(url)) return; - const n = attempts.get(url) ?? 0; - const delay = reconnectDelay(n); - console.log( - `🔁 ${label}: scheduling reconnect to ${url} in ${delay}ms ` + - `(attempt ${n + 1}, overriding NDK give-up)` - ); - const timer = setTimeout(() => { - pending.delete(url); - attempts.set(url, n + 1); - relay.connect().catch((e: any) => { - console.log( - `❌ ${label}: manual reconnect to ${url} failed: ` + - `${e?.message ?? e}` - ); - // Don't recurse here — the next 'flapping' or 'disconnect' - // event will fire and schedule another attempt. - }); - }, delay); - pending.set(url, timer); - }; - - ndk.pool.on("flapping", (relay: any) => { - console.log( - `⚠️ ${label}: NDK flagged ${relay.url} as flapping ` + - `(connectivity machine gave up internally)` - ); - scheduleReconnect(relay); - }); - - ndk.pool.on("relay:disconnect", (relay: any) => { - scheduleReconnect(relay); - }); - - ndk.pool.on("relay:connect", (relay: any) => { - const url: string = relay.url; - const n = attempts.get(url) ?? 0; - if (n > 0) { - console.log( - `✅ ${label}: recovered ${url} after ${n} manual reconnect ` + - `attempt(s)` - ); - } - attempts.delete(url); - const timer = pending.get(url); - if (timer) { - clearTimeout(timer); - pending.delete(url); - } - }); -} diff --git a/src/daemon/run.ts b/src/daemon/run.ts index 7eaa512..743606d 100644 --- a/src/daemon/run.ts +++ b/src/daemon/run.ts @@ -15,7 +15,6 @@ import FastifyView from '@fastify/view'; import Handlebars from "handlebars"; import {authorizeRequestWebHandler, processRequestWebHandler} from "./web/authorize.js"; import {processRegistrationWebHandler} from "./web/authorize.js"; -import { attachIndefiniteReconnect } from "./lib/relay-reconnect.js"; export type Key = { name: string; @@ -166,12 +165,6 @@ class Daemon { this.ndk.pool.on('relay:disconnect', (r) => { console.log(`🚫 Disconnected from ${r.url}`); }); - - // Override NDK's "give up after detecting flapping" behavior so the - // bunker's backend NDK keeps trying to reconnect indefinitely. - // Without this, an ECONNREFUSED storm at boot (relay not yet up) - // permanently strands the bunker. See aiolabs/nsecbunkerd#20. - attachIndefiniteReconnect(this.ndk, 'backend'); } async startWebAuth() {