Compare commits
2 commits
662dd21a60
...
fb1c239e15
| Author | SHA1 | Date | |
|---|---|---|---|
| fb1c239e15 | |||
| 1792bc489c |
1 changed files with 50 additions and 42 deletions
|
|
@ -1,5 +1,5 @@
|
||||||
import "websocket-polyfill";
|
import "websocket-polyfill";
|
||||||
import NDK, { NDKEvent, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKRpcResponse, NDKUser, NostrEvent } from '@nostr-dev-kit/ndk';
|
import NDK, { NDKEvent, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKRpcResponse, NDKUser } from '@nostr-dev-kit/ndk';
|
||||||
import { NDKNostrRpc } from '@nostr-dev-kit/ndk';
|
import { NDKNostrRpc } from '@nostr-dev-kit/ndk';
|
||||||
import createDebug from 'debug';
|
import createDebug from 'debug';
|
||||||
import { Key, KeyUser } from '../run';
|
import { Key, KeyUser } from '../run';
|
||||||
|
|
@ -168,12 +168,17 @@ class AdminInterface {
|
||||||
this.handleRequest(req);
|
this.handleRequest(req);
|
||||||
});
|
});
|
||||||
|
|
||||||
// pingOrDie disabled — NDK 2.8.1 outbox model doesn't echo
|
// Connection watchdog: exit if pool reports no connected relays
|
||||||
// self-published events back through subscriptions on
|
// for >60s so the process supervisor (systemd / docker restart
|
||||||
// non-public relay channels, so the watchdog fires false
|
// policy / k8s) can recover. Replaces the original self-echo
|
||||||
// positives and exits the bunker every 50s on private relays.
|
// pingOrDie — see relayConnectionWatchdog comment + #4 + #7.
|
||||||
// See aiolabs/nsecbunkerd#4 + #7.
|
// Operators with external liveness checking can disable via
|
||||||
// pingOrDie(this.ndk);
|
// NSEC_BUNKER_DISABLE_WATCHDOG=1.
|
||||||
|
if (process.env.NSEC_BUNKER_DISABLE_WATCHDOG !== '1') {
|
||||||
|
relayConnectionWatchdog(this.ndk);
|
||||||
|
} else {
|
||||||
|
console.log('⏸ watchdog disabled via NSEC_BUNKER_DISABLE_WATCHDOG=1');
|
||||||
|
}
|
||||||
}).catch((err) => {
|
}).catch((err) => {
|
||||||
console.log('❌ admin connection failed');
|
console.log('❌ admin connection failed');
|
||||||
console.log(err);
|
console.log(err);
|
||||||
|
|
@ -454,44 +459,47 @@ class AdminInterface {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async function pingOrDie(ndk: NDK) {
|
/**
|
||||||
let deathTimer: NodeJS.Timeout | null = null;
|
* Pool-status connection watchdog. Exits the daemon if every relay in
|
||||||
|
* the pool stays disconnected for longer than PARTITION_THRESHOLD_MS.
|
||||||
function resetDeath() {
|
*
|
||||||
if (deathTimer) clearTimeout(deathTimer);
|
* Replaces the original `pingOrDie` self-echo watchdog, which published
|
||||||
deathTimer = setTimeout(() => {
|
* a kind-24133 event to its own pubkey every 20s and exited if it
|
||||||
console.log(`❌ No ping event received in 30 seconds. Exiting.`);
|
* didn't see the echo within 50s. That works on public relays but
|
||||||
process.exit(1);
|
* silently breaks on single-private-relay setups: NDK 2.8.1's outbox
|
||||||
}, 50000);
|
* model doesn't reliably route self-publishes back through the
|
||||||
}
|
* matching subscription, so the watchdog fires false positives and
|
||||||
|
* exits the daemon every 50s while RPCs over the same channel still
|
||||||
const self = await ndk.signer!.user();
|
* work fine. See aiolabs/nsecbunkerd#4 + #7.
|
||||||
const sub = ndk.subscribe({
|
*
|
||||||
authors: [self.pubkey],
|
* The pool-status approach uses NDK's own connection-lifecycle
|
||||||
kinds: [NDKKind.NostrConnect],
|
* tracking — `pool.connectedRelays()` reports relays in
|
||||||
"#p": [self.pubkey]
|
* NDKRelayStatus.CONNECTED — which is reliable across all relay
|
||||||
});
|
* configurations because it doesn't depend on round-trip
|
||||||
sub.on("event", (event: NDKEvent) => {
|
* publish/subscribe. No event is published; no relay traffic.
|
||||||
console.log(`🔔 Received ping event:`, event.created_at);
|
*
|
||||||
resetDeath();
|
* Detects partition within POLL_INTERVAL + PARTITION_THRESHOLD ms.
|
||||||
});
|
* Transient disconnects shorter than PARTITION_THRESHOLD don't trip
|
||||||
sub.start();
|
* the watchdog — useful for relays that flap or briefly drop on
|
||||||
|
* network blips.
|
||||||
resetDeath();
|
*/
|
||||||
|
async function relayConnectionWatchdog(ndk: NDK) {
|
||||||
|
const POLL_INTERVAL_MS = 10_000;
|
||||||
|
const PARTITION_THRESHOLD_MS = 60_000;
|
||||||
|
let lastConnectedAt = Date.now();
|
||||||
|
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
const event = new NDKEvent(ndk, {
|
const connectedCount = ndk.pool.connectedRelays().length;
|
||||||
kind: NDKKind.NostrConnect,
|
if (connectedCount > 0) {
|
||||||
tags: [ ["p", self.pubkey] ],
|
lastConnectedAt = Date.now();
|
||||||
content: "ping"
|
return;
|
||||||
} as NostrEvent);
|
}
|
||||||
event.publish().then(() => {
|
const elapsed = Date.now() - lastConnectedAt;
|
||||||
console.log(`🔔 Sent ping event:`, event.created_at);
|
if (elapsed > PARTITION_THRESHOLD_MS) {
|
||||||
}).catch((e: any) => {
|
console.log(`❌ No connected relays for ${Math.floor(elapsed / 1000)}s. Exiting.`);
|
||||||
console.log(`❌ Failed to send ping event:`, e.message);
|
|
||||||
process.exit(1);
|
process.exit(1);
|
||||||
});
|
}
|
||||||
}, 20000);
|
}, POLL_INTERVAL_MS);
|
||||||
}
|
}
|
||||||
|
|
||||||
export default AdminInterface;
|
export default AdminInterface;
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue