Compare commits
2 commits
662dd21a60
...
fb1c239e15
| Author | SHA1 | Date | |
|---|---|---|---|
| fb1c239e15 | |||
| 1792bc489c |
1 changed files with 50 additions and 42 deletions
|
|
@ -1,5 +1,5 @@
|
|||
import "websocket-polyfill";
|
||||
import NDK, { NDKEvent, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKRpcResponse, NDKUser, NostrEvent } from '@nostr-dev-kit/ndk';
|
||||
import NDK, { NDKEvent, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKRpcResponse, NDKUser } from '@nostr-dev-kit/ndk';
|
||||
import { NDKNostrRpc } from '@nostr-dev-kit/ndk';
|
||||
import createDebug from 'debug';
|
||||
import { Key, KeyUser } from '../run';
|
||||
|
|
@ -168,12 +168,17 @@ class AdminInterface {
|
|||
this.handleRequest(req);
|
||||
});
|
||||
|
||||
// pingOrDie disabled — NDK 2.8.1 outbox model doesn't echo
|
||||
// self-published events back through subscriptions on
|
||||
// non-public relay channels, so the watchdog fires false
|
||||
// positives and exits the bunker every 50s on private relays.
|
||||
// See aiolabs/nsecbunkerd#4 + #7.
|
||||
// pingOrDie(this.ndk);
|
||||
// Connection watchdog: exit if pool reports no connected relays
|
||||
// for >60s so the process supervisor (systemd / docker restart
|
||||
// policy / k8s) can recover. Replaces the original self-echo
|
||||
// pingOrDie — see relayConnectionWatchdog comment + #4 + #7.
|
||||
// Operators with external liveness checking can disable via
|
||||
// NSEC_BUNKER_DISABLE_WATCHDOG=1.
|
||||
if (process.env.NSEC_BUNKER_DISABLE_WATCHDOG !== '1') {
|
||||
relayConnectionWatchdog(this.ndk);
|
||||
} else {
|
||||
console.log('⏸ watchdog disabled via NSEC_BUNKER_DISABLE_WATCHDOG=1');
|
||||
}
|
||||
}).catch((err) => {
|
||||
console.log('❌ admin connection failed');
|
||||
console.log(err);
|
||||
|
|
@ -454,44 +459,47 @@ class AdminInterface {
|
|||
}
|
||||
}
|
||||
|
||||
async function pingOrDie(ndk: NDK) {
|
||||
let deathTimer: NodeJS.Timeout | null = null;
|
||||
|
||||
function resetDeath() {
|
||||
if (deathTimer) clearTimeout(deathTimer);
|
||||
deathTimer = setTimeout(() => {
|
||||
console.log(`❌ No ping event received in 30 seconds. Exiting.`);
|
||||
process.exit(1);
|
||||
}, 50000);
|
||||
}
|
||||
|
||||
const self = await ndk.signer!.user();
|
||||
const sub = ndk.subscribe({
|
||||
authors: [self.pubkey],
|
||||
kinds: [NDKKind.NostrConnect],
|
||||
"#p": [self.pubkey]
|
||||
});
|
||||
sub.on("event", (event: NDKEvent) => {
|
||||
console.log(`🔔 Received ping event:`, event.created_at);
|
||||
resetDeath();
|
||||
});
|
||||
sub.start();
|
||||
|
||||
resetDeath();
|
||||
/**
|
||||
* Pool-status connection watchdog. Exits the daemon if every relay in
|
||||
* the pool stays disconnected for longer than PARTITION_THRESHOLD_MS.
|
||||
*
|
||||
* Replaces the original `pingOrDie` self-echo watchdog, which published
|
||||
* a kind-24133 event to its own pubkey every 20s and exited if it
|
||||
* didn't see the echo within 50s. That works on public relays but
|
||||
* silently breaks on single-private-relay setups: NDK 2.8.1's outbox
|
||||
* model doesn't reliably route self-publishes back through the
|
||||
* matching subscription, so the watchdog fires false positives and
|
||||
* exits the daemon every 50s while RPCs over the same channel still
|
||||
* work fine. See aiolabs/nsecbunkerd#4 + #7.
|
||||
*
|
||||
* The pool-status approach uses NDK's own connection-lifecycle
|
||||
* tracking — `pool.connectedRelays()` reports relays in
|
||||
* NDKRelayStatus.CONNECTED — which is reliable across all relay
|
||||
* configurations because it doesn't depend on round-trip
|
||||
* publish/subscribe. No event is published; no relay traffic.
|
||||
*
|
||||
* Detects partition within POLL_INTERVAL + PARTITION_THRESHOLD ms.
|
||||
* Transient disconnects shorter than PARTITION_THRESHOLD don't trip
|
||||
* the watchdog — useful for relays that flap or briefly drop on
|
||||
* network blips.
|
||||
*/
|
||||
async function relayConnectionWatchdog(ndk: NDK) {
|
||||
const POLL_INTERVAL_MS = 10_000;
|
||||
const PARTITION_THRESHOLD_MS = 60_000;
|
||||
let lastConnectedAt = Date.now();
|
||||
|
||||
setInterval(() => {
|
||||
const event = new NDKEvent(ndk, {
|
||||
kind: NDKKind.NostrConnect,
|
||||
tags: [ ["p", self.pubkey] ],
|
||||
content: "ping"
|
||||
} as NostrEvent);
|
||||
event.publish().then(() => {
|
||||
console.log(`🔔 Sent ping event:`, event.created_at);
|
||||
}).catch((e: any) => {
|
||||
console.log(`❌ Failed to send ping event:`, e.message);
|
||||
const connectedCount = ndk.pool.connectedRelays().length;
|
||||
if (connectedCount > 0) {
|
||||
lastConnectedAt = Date.now();
|
||||
return;
|
||||
}
|
||||
const elapsed = Date.now() - lastConnectedAt;
|
||||
if (elapsed > PARTITION_THRESHOLD_MS) {
|
||||
console.log(`❌ No connected relays for ${Math.floor(elapsed / 1000)}s. Exiting.`);
|
||||
process.exit(1);
|
||||
});
|
||||
}, 20000);
|
||||
}
|
||||
}, POLL_INTERVAL_MS);
|
||||
}
|
||||
|
||||
export default AdminInterface;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue