From 711a017e8cf774e5a81c7b88b88e558443efc7ca Mon Sep 17 00:00:00 2001 From: Padreug Date: Mon, 25 May 2026 23:59:31 +0200 Subject: [PATCH 01/14] add nix flake with devShell and native package build devShell: nodejs_20, pnpm_8, prisma + prisma-engines, sqlite, openssl, plus the env wiring so prisma uses nix-provided engines instead of fetching from binaries.prisma.sh. packages.default: full native build via pnpm_8.fetchDeps + configHook. Patches the workspace:* ndk spec to the lockfile-resolved ^2.8.1 so --frozen-lockfile accepts it, then re-runs install with scripts to trigger bcrypt's node-pre-gyp fallback-to-build (uses python311 since node-gyp 9.4.1 bundled with pnpm 8 still imports distutils). Co-Authored-By: Claude Opus 4.7 (1M context) --- flake.lock | 27 ++++++++++++ flake.nix | 51 ++++++++++++++++++++++ package.nix | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 201 insertions(+) create mode 100644 flake.lock create mode 100644 flake.nix create mode 100644 package.nix diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..f428ce8 --- /dev/null +++ b/flake.lock @@ -0,0 +1,27 @@ +{ + "nodes": { + "nixpkgs": { + "locked": { + "lastModified": 1767313136, + "narHash": "sha256-16KkgfdYqjaeRGBaYsNrhPRRENs0qzkQVUooNHtoy2w=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "ac62194c3917d5f474c1a844b6fd6da2db95077d", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixos-25.05", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "nixpkgs": "nixpkgs" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..5bb1222 --- /dev/null +++ b/flake.nix @@ -0,0 +1,51 @@ +{ + description = "nsecbunkerd — Nostr remote signing daemon (NIP-46)"; + + inputs.nixpkgs.url = "github:NixOS/nixpkgs/nixos-25.05"; + + outputs = { self, nixpkgs }: + let + systems = [ "x86_64-linux" "aarch64-linux" ]; + forAllSystems = nixpkgs.lib.genAttrs systems; + pkgsFor = system: import nixpkgs { inherit system; }; + in + { + packages = forAllSystems (system: + let pkgs = pkgsFor system; in + rec { + default = nsecbunkerd; + nsecbunkerd = pkgs.callPackage ./package.nix { }; + } + ); + + devShells = forAllSystems (system: + let pkgs = pkgsFor system; in + { + default = pkgs.mkShell { + packages = with pkgs; [ + nodejs_20 + pnpm_8 + prisma + prisma-engines + python3 + gcc + pkg-config + openssl + sqlite + ]; + + shellHook = '' + # Point prisma at the nix-provided engines so it doesn't try to + # download them from binaries.prisma.sh on every install. + export PRISMA_QUERY_ENGINE_BINARY=${pkgs.prisma-engines}/bin/query-engine + export PRISMA_QUERY_ENGINE_LIBRARY=${pkgs.prisma-engines}/lib/libquery_engine.node + export PRISMA_SCHEMA_ENGINE_BINARY=${pkgs.prisma-engines}/bin/schema-engine + export PRISMA_FMT_BINARY=${pkgs.prisma-engines}/bin/prisma-fmt + export PRISMA_INTROSPECTION_ENGINE_BINARY=${pkgs.prisma-engines}/bin/introspection-engine + export PRISMA_CLIENT_ENGINE_TYPE=binary + ''; + }; + } + ); + }; +} diff --git a/package.nix b/package.nix new file mode 100644 index 0000000..5c49bf7 --- /dev/null +++ b/package.nix @@ -0,0 +1,123 @@ +{ + lib, + stdenv, + pnpm_8, + nodejs_20, + makeWrapper, + prisma-engines, + openssl, + sqlite, + python311, + pkg-config, + node-gyp, +}: + +let + # package.json pins `@nostr-dev-kit/ndk: "workspace:*"` but the lockfile + # resolves `^2.8.1`. With --frozen-lockfile pnpm refuses the mismatch, + # so rewrite the spec to match the lockfile. + patchNdk = '' + substituteInPlace package.json \ + --replace-fail '"@nostr-dev-kit/ndk": "workspace:*"' \ + '"@nostr-dev-kit/ndk": "^2.8.1"' + ''; + + prismaEnv = { + PRISMA_SCHEMA_ENGINE_BINARY = lib.getExe' prisma-engines "schema-engine"; + PRISMA_QUERY_ENGINE_BINARY = lib.getExe' prisma-engines "query-engine"; + PRISMA_QUERY_ENGINE_LIBRARY = "${prisma-engines}/lib/libquery_engine.node"; + PRISMA_INTROSPECTION_ENGINE_BINARY = lib.getExe' prisma-engines "introspection-engine"; + PRISMA_FMT_BINARY = lib.getExe' prisma-engines "prisma-fmt"; + PRISMA_CLIENT_ENGINE_TYPE = "binary"; + }; +in +stdenv.mkDerivation (finalAttrs: { + pname = "nsecbunkerd"; + version = "0.10.5"; + + src = ./.; + + pnpmDeps = pnpm_8.fetchDeps { + inherit (finalAttrs) pname version src; + fetcherVersion = 2; + prePnpmInstall = patchNdk; + hash = "sha256-dQ+TX5jf1ZQKGoPCZgWaFwpAC3uP6iL1ZSxS0mFNdP8="; + }; + + postPatch = patchNdk; + + nativeBuildInputs = [ + pnpm_8.configHook + pnpm_8 + nodejs_20 + makeWrapper + node-gyp + python311 + pkg-config + ]; + + buildInputs = [ + openssl + sqlite + ]; + + env = prismaEnv; + + buildPhase = '' + runHook preBuild + + export npm_config_nodedir=${nodejs_20} + pnpm config set nodedir ${nodejs_20} + + # configHook ran with --ignore-scripts; re-run install to trigger + # native-module postinstall (bcrypt). --offline keeps it inside the + # store seeded by configHook. + pnpm install --force --offline --frozen-lockfile --reporter=append-only + + pnpm prisma generate + pnpm build + + pnpm prune --prod --ignore-scripts + find node_modules -xtype l -delete + + runHook postBuild + ''; + + installPhase = '' + runHook preInstall + + mkdir -p $out/{bin,share/nsecbunkerd} + cp -r dist node_modules prisma templates package.json \ + $out/share/nsecbunkerd/ + + makeWrapper ${lib.getExe nodejs_20} $out/bin/nsecbunkerd \ + --chdir $out/share/nsecbunkerd \ + --add-flags $out/share/nsecbunkerd/dist/index.js \ + --set NODE_ENV production \ + --prefix PATH : ${lib.makeBinPath [ openssl ]} \ + ${ + lib.concatStringsSep " \\\n " ( + lib.mapAttrsToList (n: v: "--set ${n} ${lib.escapeShellArg v}") prismaEnv + ) + } + + makeWrapper ${lib.getExe nodejs_20} $out/bin/nsecbunker-client \ + --chdir $out/share/nsecbunkerd \ + --add-flags $out/share/nsecbunkerd/dist/client/client.js \ + --set NODE_ENV production + + runHook postInstall + ''; + + passthru = { + inherit prisma-engines; + }; + + meta = { + description = "Nostr remote signing daemon (NIP-46)"; + homepage = "https://github.com/kind-0/nsecbunkerd"; + license = lib.licenses.mit; + mainProgram = "nsecbunkerd"; + platforms = lib.platforms.linux; + }; +}) From 06272c8f2c498dc9e7717ae983ad005cf7f80f0c Mon Sep 17 00:00:00 2001 From: Padreug Date: Tue, 26 May 2026 00:29:29 +0200 Subject: [PATCH 02/14] pin @nostr-dev-kit/ndk to 2.8.1 instead of workspace:* MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream declares the dependency as workspace:*, but the repo has no pnpm-workspace.yaml and no sibling @nostr-dev-kit/ndk package — so pnpm install fails with ERR_PNPM_WORKSPACE_PKG_NOT_FOUND on a clean clone. The shipped pnpm-lock.yaml was resolving to ndk 2.8.1, so pin to that exact version to match what the lockfile already expects. Fixes #3. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 4232435..297fd2a 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,7 @@ "@fastify/view": "^8.2.0", "@inquirer/password": "^1.1.2", "@inquirer/prompts": "^1.2.3", - "@nostr-dev-kit/ndk": "workspace:*", + "@nostr-dev-kit/ndk": "2.8.1", "@prisma/client": "^5.4.1", "@scure/base": "^1.1.1", "@types/yargs": "^17.0.24", From 960b9399e8000449fcc3440ca16e29a65a35448b Mon Sep 17 00:00:00 2001 From: Padreug Date: Tue, 26 May 2026 00:29:41 +0200 Subject: [PATCH 03/14] Dockerfile: switch from npm to pnpm + drop --frozen-lockfile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two upstream-rot issues fixed in one commit (same root cause: the upstream Dockerfile predates the move to pnpm and the lockfile has drifted): - npm install can't resolve workspace:* deps (which package.json used to declare for @nostr-dev-kit/ndk — see prior commit for the pin). Switching to pnpm@9 matches the lockfile that ships in-repo. - pnpm-lock.yaml is out of date vs package.json (likely from generation-time vs commit-time drift), so --frozen-lockfile fails with ERR_PNPM_OUTDATED_LOCKFILE. Drop the flag in both build and runtime stages to let pnpm resolve fresh, at the cost of giving up determinism — to be restored once the lockfile is regenerated. Also reorders the build stage to COPY lockfile + manifest before the source, so the install layer caches across source-only edits. Fixes #1, #2. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1eb99be..1168d8c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,20 +1,32 @@ +# Patched from upstream kind-0/nsecbunkerd Dockerfile to use pnpm — the +# upstream version uses `npm install` but package.json declares +# `@nostr-dev-kit/ndk` as `workspace:*`, which only pnpm understands. +# A clean clone of upstream fails to build with `EUNSUPPORTEDPROTOCOL` +# under npm. Switching to pnpm matches the lockfile that ships in-repo. +# Also drops `--frozen-lockfile` because the upstream pnpm-lock.yaml is +# out of date vs. package.json (ERR_PNPM_OUTDATED_LOCKFILE) — bug to +# file upstream once we've verified the rest of the stack works. + FROM node:20.11-bullseye AS build WORKDIR /app -# Copy package files and install dependencies -COPY package*.json ./ -RUN npm install +RUN npm install -g pnpm@9 + +# Copy lockfile + manifest first so the install layer caches across +# source changes. +COPY package.json pnpm-lock.yaml ./ +RUN pnpm install --no-frozen-lockfile # Copy application files COPY . . # Generate prisma client and build the application RUN npx prisma generate -RUN npm run build +RUN pnpm run build # Runtime stage -FROM node:20.11-alpine as runtime +FROM node:20.11-alpine AS runtime WORKDIR /app @@ -22,11 +34,13 @@ RUN apk update && \ apk add --no-cache openssl && \ rm -rf /var/cache/apk/* +RUN npm install -g pnpm@9 + # Copy built files from the build stage COPY --from=build /app . -# Install only runtime dependencies -RUN npm install --only=production +# Install only runtime dependencies (pnpm respects the workspace protocol) +RUN pnpm install --prod --no-frozen-lockfile EXPOSE 3000 From 42dbbd753663f4698e930582a7c8d72666e7344e Mon Sep 17 00:00:00 2001 From: Padreug Date: Tue, 26 May 2026 00:29:53 +0200 Subject: [PATCH 04/14] =?UTF-8?q?disable=20pingOrDie=20watchdog=20?= =?UTF-8?q?=E2=80=94=20false-positives=20on=20non-public=20relays?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NDK 2.8.1's outbox model doesn't reliably deliver self-published events back through subscriptions when the configured relay set is a single custom (non-public) relay. The pingOrDie self-watchdog publishes a kind-24133 event to its own pubkey every 20s and exits the bunker if it doesn't see the echo within 50s — which means on a private relay channel (e.g. LNbits's nostrrelay extension), the bunker exits cleanly every 50s even though admin RPCs over that same channel are working fine. Plain-WebSocket round-trips to the same relay echo correctly in <1s, so the issue is on NDK's side, not the relay's. Commenting out the watchdog is the minimum patch to keep the daemon alive. Real fix is either an env-flag opt-out, a simpler connectivity check that doesn't depend on self-echo, or an NDK upgrade that fixes the outbox-vs-subscribe race. Fixes #4. See also #7 for the underlying NDK echo investigation. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/index.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 4db9cc2..75dfc4b 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -122,7 +122,12 @@ class AdminInterface { this.rpc.on('request', (req) => this.handleRequest(req)); - pingOrDie(this.ndk); + // pingOrDie disabled — NDK 2.8.1 outbox model doesn't echo + // self-published events back through subscriptions on + // non-public relay channels, so the watchdog fires false + // positives and exits the bunker every 50s on private relays. + // See aiolabs/nsecbunkerd#4 + #7. + // pingOrDie(this.ndk); }).catch((err) => { console.log('❌ admin connection failed'); console.log(err); From e39eaa632db16c0d2abdd78710467639e74ddcf4 Mon Sep 17 00:00:00 2001 From: Padreug Date: Tue, 26 May 2026 00:32:39 +0200 Subject: [PATCH 05/14] startKey: decode bech32 nsec to hex before constructing NDKPrivateKeySigner MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit NDK 2.8.1's NDKPrivateKeySigner constructor forwards its arg straight to nostr-tools getPublicKey() which requires 32-byte hex/bytes/bigint and throws on bech32 input. Every key loaded through startKey (i.e. every key created via create_new_key, plus boot-time reloads of any plain-nsec entries in the config) was failing silently with the nostr-tools type error. The try/catch caught the throw and returned without loading the key, so the bunker would happily report create_new_key as successful, the key would persist encrypted on disk, but the runtime keystore would not have a signer for it. NIP-46 connect / sign_event against any admin-provisioned target therefore silently timed out from the client side — blocking essentially every signing flow. Sister bug to #5 (getKeys iterator) in a different code path. The fix matches the existing pattern in create_new_key.ts:16: hexpk = nip19.decode(nsec).data as string; Verified against the local spike harness: create_new_key now loads the target into runtime; get_keys returns the new entry (assuming #5 is patched separately for the iterator path). Fixes #8. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/run.ts | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/daemon/run.ts b/src/daemon/run.ts index 262a150..6637986 100644 --- a/src/daemon/run.ts +++ b/src/daemon/run.ts @@ -230,8 +230,14 @@ class Daemon { if (nsec.startsWith('nsec1')) { try { - const key = new NDKPrivateKeySigner(nsec); - hexpk = key.privateKey!; + // NDK 2.8.1's NDKPrivateKeySigner constructor passes its + // arg straight to nostr-tools getPublicKey() which requires + // 32-byte hex / bytes / bigint, not bech32. Without this + // decode, every key created via create_new_key fails to + // load with the nostr-tools getPublicKey type error, so + // the bunker can never sign for any target it provisions. + // See aiolabs/nsecbunkerd#8. + hexpk = nip19.decode(nsec).data as string; } catch(e) { console.error(`Error loading key ${name}:`, e); return From 8caf856ab21c04be55adf1df73b2d8de7b21e9d1 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 16:56:27 +0200 Subject: [PATCH 06/14] diag(#7): env-gated per-relay transport instrumentation Add NSEC_BUNKER_DEBUG_TRANSPORT=1 opt-in logging that emits REQUEST_IN on inbound NIP-46 RPCs, RESPONSE_SENT around NDKNostrRpc.sendResponse, and PUBLISHED / PUBLISH_FAILED per-relay on the bunker's pool. Surfaces the diagnostic signal NDKNostrRpc itself discards: sendResponse calls `event.publish(this.relaySet)` and throws away the Set it returns, so silent outbox-drops and wrong-kind responses are invisible without hooking the pool's per-relay events directly. Validated against the local bunker via the lnbits-side admin spike harness (~/dev/lnbits/misc-aio/bunker_admin_spike.py): the instrumentation made the 9-step harness reveal a wrong-kind error response path (separate fix in the next commit) that had been masquerading as an NDK echo issue for a week. With the env flag unset the daemon stays as quiet as before. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/index.ts | 50 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 75dfc4b..4684fcb 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -111,8 +111,28 @@ class AdminInterface { return; } - this.ndk.pool.on('relay:connect', () => console.log('✅ nsecBunker Admin Interface ready')); + const debugTransport = process.env.NSEC_BUNKER_DEBUG_TRANSPORT === '1'; + + // Per-relay publish-status logging for diagnosing aiolabs/nsecbunkerd#7. + // NDKNostrRpc.sendResponse calls event.publish() and discards the + // returned Set, so a silent outbox-drop is invisible without + // hooking the underlying per-relay events. Gated by env flag so + // production deployments stay quiet. + const attachRelayLogging = (relay: any) => { + relay.on('published', (event: NDKEvent) => { + console.log(`📤 PUBLISHED relay=${relay.url} kind=${event.kind} id=${event.id?.slice(0,8)}`); + }); + relay.on('publish:failed', (event: NDKEvent, err: any) => { + console.log(`❌ PUBLISH_FAILED relay=${relay.url} kind=${event.kind} id=${event.id?.slice(0,8)} err=${err?.message ?? err}`); + }); + }; + + this.ndk.pool.on('relay:connect', (relay: any) => { + console.log('✅ nsecBunker Admin Interface ready'); + if (debugTransport) attachRelayLogging(relay); + }); this.ndk.pool.on('relay:disconnect', () => console.log('❌ admin disconnected')); + this.ndk.connect(2500).then(() => { // connect for whitelisted admins this.rpc.subscribe({ @@ -120,7 +140,33 @@ class AdminInterface { "#p": [this.signerUser!.pubkey] }); - this.rpc.on('request', (req) => this.handleRequest(req)); + // Attach per-relay logging to relays that connected before our + // 'relay:connect' listener was registered above (NDK can connect + // synchronously inside .connect() under some paths). + if (debugTransport) { + this.ndk.pool.relays.forEach((relay: any) => attachRelayLogging(relay)); + + // Wrap sendResponse to log id + kind + elapsed time so we + // can correlate REQUEST_IN → RESPONSE_SENT → PUBLISHED. + const originalSendResponse = this.rpc.sendResponse.bind(this.rpc); + this.rpc.sendResponse = async (id: string, remotePubkey: string, result: string, kind?: number, error?: string) => { + const start = Date.now(); + try { + await originalSendResponse(id, remotePubkey, result, kind, error); + console.log(`📨 RESPONSE_SENT id=${id} remote=${remotePubkey.slice(0,8)} kind=${kind ?? NDKKind.NostrConnect} elapsed=${Date.now()-start}ms`); + } catch (e: any) { + console.log(`❌ RESPONSE_SEND_FAILED id=${id} remote=${remotePubkey.slice(0,8)} kind=${kind ?? NDKKind.NostrConnect} err=${e?.message ?? e}`); + throw e; + } + }; + } + + this.rpc.on('request', (req) => { + if (debugTransport) { + console.log(`📥 REQUEST_IN method=${req.method} id=${req.id} from=${req.pubkey?.slice(0,8)} kind=${req.event?.kind}`); + } + this.handleRequest(req); + }); // pingOrDie disabled — NDK 2.8.1 outbox model doesn't echo // self-published events back through subscriptions on From 0a510b7f9a0c9187e692f9acf487360b5ea490b3 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:04:31 +0200 Subject: [PATCH 07/14] fix(#7): route error responses to the request's kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catch block in handleRequest and both response paths in create_account pass `NDKKind.NostrConnectAdmin` as the response kind. That constant does NOT exist in NDK 2.8.1 — only `NostrConnect = 24133` is exported — so it resolves to `undefined` and NDKNostrRpc.sendResponse falls through to its own default of `NDKKind.NostrConnect = 24133`. Net effect: any error response to an admin-channel (kind 24134) request is published on the NIP-46 signing channel (24133) instead, which clients subscribed for 24134 never see. Looks like a transport-layer NDK-echo / silent-drop issue from the client's perspective, but the bunker IS publishing reliably — just on the wrong kind. Mirror `req.event.kind` so the error response goes back on the same channel the request came in on. Same pattern the unknown-method path and create_account's validation-error path already used; just propagate it to the remaining sites. Drops the now-unused NDKKind import from create_account.ts. Validated end-to-end against the local bunker via the lnbits-side admin spike harness — after this fix + the migration entrypoint fix + the policyId type fix, all 9 spike steps including NIP-46 sign_event pass with Schnorr-valid signatures. See coordination log entry 2026-05-27T14:30Z. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/commands/create_account.ts | 17 ++++++++++++----- src/daemon/admin/index.ts | 10 +++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/daemon/admin/commands/create_account.ts b/src/daemon/admin/commands/create_account.ts index e4632d6..f3c026a 100644 --- a/src/daemon/admin/commands/create_account.ts +++ b/src/daemon/admin/commands/create_account.ts @@ -1,4 +1,4 @@ -import { Hexpubkey, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKUserProfile } from "@nostr-dev-kit/ndk"; +import { Hexpubkey, NDKPrivateKeySigner, NDKRpcRequest, NDKUserProfile } from "@nostr-dev-kit/ndk"; import AdminInterface from ".."; import { nip19 } from 'nostr-tools'; import { setupSkeletonProfile } from "../../lib/profile"; @@ -136,7 +136,7 @@ export default async function createAccount(admin: AdminInterface, req: NDKRpcRe } /** - * This is where the real work of creating the private key, wallet, nip-05, granting access, etc happen + * This is where the real work of creating the private key, wallet, nip-05, granting access, etc happen — pragma: allowlist secret */ export async function createAccountReal( admin: AdminInterface, @@ -209,11 +209,18 @@ export async function createAccountReal( // access it without having to go through an approval flow await grantPermissions(req, keyName); - return admin.rpc.sendResponse(req.id, req.pubkey, generatedUser.pubkey, NDKKind.NostrConnectAdmin); + // NDKKind.NostrConnectAdmin doesn't exist in NDK 2.8.1 — it resolves + // to `undefined` and sendResponse defaults to NDKKind.NostrConnect + // (24133), sending the response on the wrong channel. Mirror the + // request's kind so the response goes back on the same channel the + // client subscribed for. Filed as part of aiolabs/nsecbunkerd#7 + // diagnosis 2026-05-27. + const originalKind = req.event.kind!; + return admin.rpc.sendResponse(req.id, req.pubkey, generatedUser.pubkey, originalKind); } catch (e: any) { console.trace('error', e); - return admin.rpc.sendResponse(req.id, req.pubkey, "error", NDKKind.NostrConnectAdmin, - e.message); + const originalKind = req.event.kind!; + return admin.rpc.sendResponse(req.id, req.pubkey, "error", originalKind, e.message); } } diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 4684fcb..5adafe8 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -209,7 +209,15 @@ class AdminInterface { } } catch (err: any) { debug(`Error handling request ${req.method}: ${err?.message??err}`, req.params); - return this.rpc.sendResponse(req.id, req.pubkey, "error", NDKKind.NostrConnectAdmin, err?.message); + // NDKKind.NostrConnectAdmin doesn't exist in NDK 2.8.1 — using it + // makes sendResponse fall through to its default of 24133, which + // sends the error on a different channel than the request came in + // on. Mirror req.event.kind so the response goes back where the + // client is listening. Filed as part of aiolabs/nsecbunkerd#7 + // diagnosis 2026-05-27. + const originalKind = req.event.kind!; + console.log(`⚠️ HANDLE_REQUEST_ERROR method=${req.method} id=${req.id} kind=${originalKind} err=${err?.message ?? err}`); + return this.rpc.sendResponse(req.id, req.pubkey, "error", originalKind, err?.message); } } From 5e77de1202675c9abe17ea0bbe9ebf552a0a61b9 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:04:53 +0200 Subject: [PATCH 08/14] fix: convert policyId to Int before Prisma insert in create_new_token MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wire-level `create_new_token` RPC carries `policyId` as a string (everything in NDK RPC params is string). The handler correctly parseInts it for the `findUnique({where:{id:parseInt(policyId)}})` call but then forwards the unparsed string straight into the Prisma `token.create({data:{...policyId}})` payload. Prisma rejects with "Argument `policyId`: Invalid value provided. Expected Int or Null, provided String" because `Token.policyId` is declared `Int` per the schema (references `Policy.id`, which is autoincrement Int). Hoist `policyIdInt = parseInt(policyId)` and use it for both the findUnique lookup and the create payload. Latent upstream bug — no one would have seen it before because the wrong-kind error response (fixed in the previous commit) made the symptom look like a transport timeout rather than a Prisma validation error. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/commands/create_new_token.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/daemon/admin/commands/create_new_token.ts b/src/daemon/admin/commands/create_new_token.ts index df145a2..04588c4 100644 --- a/src/daemon/admin/commands/create_new_token.ts +++ b/src/daemon/admin/commands/create_new_token.ts @@ -7,15 +7,19 @@ export default async function createNewToken(admin: AdminInterface, req: NDKRpcR if (!clientName || !policyId) throw new Error("Invalid params"); - const policy = await prisma.policy.findUnique({ where: { id: parseInt(policyId) }, include: { rules: true } }); + const policyIdInt = parseInt(policyId); + const policy = await prisma.policy.findUnique({ where: { id: policyIdInt }, include: { rules: true } }); if (!policy) throw new Error("Policy not found"); console.log({clientName, policy, durationInHours}); const token = [...Array(64)].map(() => Math.floor(Math.random() * 16).toString(16)).join(''); + // policyId must be Int per the Prisma schema (Token.policyId references + // Policy.id which is autoincrement Int). Upstream passes the raw string + // from the wire — caught during aiolabs/nsecbunkerd#7 diagnosis 2026-05-27. const data: any = { - keyName, clientName, policyId, + keyName, clientName, policyId: policyIdInt, createdBy: req.pubkey, token }; From 053357899dbe14baac80064a852dc692c01dbefa Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:05:10 +0200 Subject: [PATCH 09/14] fix(docker): entrypoint runs migrations via scripts/start.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream Dockerfile sets `ENTRYPOINT [ "node", "./dist/index.js" ]`, which boots the daemon directly and silently bypasses `scripts/start.js` — the only place that runs `prisma migrate deploy`. On a clean install, the SQLite db file at $DATABASE_URL is created empty (0 bytes) and every Policy / KeyUser / Token / SigningCondition operation throws "table does not exist." `ping` / `get_keys` / `create_new_key` happen to survive because they only touch the JSON config, not the db. Two changes: 1. ENTRYPOINT switches to `node ./scripts/start.js`. The CMD arg (`start`) and any additional argv pass through to the daemon unchanged via process.argv. 2. Runtime pnpm install drops `--prod`. The prisma CLI lives in devDependencies; with `--prod`, `npx prisma migrate deploy` tries to download prisma@latest at runtime, which OOMs in modest containers. Including devDeps at runtime adds modest image bulk for correctness. Validated end-to-end against the local regtest stack — after the rebuild the SQLite db boots populated with 22 migrations, and the lnbits-side admin spike harness passes all 9 steps including NIP-46 sign_event with Schnorr-valid signatures. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1168d8c..9ace24a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,10 +39,20 @@ RUN npm install -g pnpm@9 # Copy built files from the build stage COPY --from=build /app . -# Install only runtime dependencies (pnpm respects the workspace protocol) -RUN pnpm install --prod --no-frozen-lockfile +# Install all dependencies (including devDeps). The prisma CLI lives in +# devDependencies but scripts/start.js invokes `prisma migrate deploy` +# at boot, so it must be available at runtime. Dropping --prod adds the +# CLI tooling to the runtime image — a modest size cost for the +# correctness of the migration step. +RUN pnpm install --no-frozen-lockfile EXPOSE 3000 -ENTRYPOINT [ "node", "./dist/index.js" ] +# Run via scripts/start.js so `prisma migrate deploy` applies pending +# migrations before the daemon spawns. The upstream Dockerfile invokes +# ./dist/index.js directly, which silently bypasses the migration step +# and leaves the SQLite db empty on first boot — every command that +# touches Policy/KeyUser/Token/etc. then throws "table does not exist." +# Caught during aiolabs/nsecbunkerd#7 diagnosis 2026-05-27. +ENTRYPOINT [ "node", "./scripts/start.js" ] CMD ["start"] From ccfde02d70a73d6de0cbf60023fe828299db0821 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:05:24 +0200 Subject: [PATCH 10/14] fix(start.js): resolve sibling paths from script location, not cwd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The launcher previously assumed cwd was the package root: `mkdir config` in cwd, `npm run prisma:migrate` in cwd, `node ./dist/index.js`. Works under docker (WORKDIR /app, writable) but breaks anywhere cwd differs from the package root — e.g. a nix-built bunker invoked from a systemd unit whose WorkingDirectory is the state dir (/var/lib/nsecbunkerd) and not the nix store path that holds dist/, scripts/, prisma/. Resolve sibling paths via `path.resolve(__dirname, '..')` so the package-internal layout is robust to cwd. Use `path.join(pkgRoot, 'dist/index.js')` for the daemon spawn and `{ cwd: pkgRoot }` for the npm migrate exec. Switch `mkdir config` (which only works in writable cwd) to `fs.mkdirSync(configDir, { recursive: true })` where configDir defaults to `./config` relative to cwd, overrideable via NSEC_BUNKER_CONFIG_DIR. This lets the nix package install the launcher into the read-only store while the systemd unit still does its config/state work in /var/lib/nsecbunkerd with no shell wrapping. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/start.js | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/scripts/start.js b/scripts/start.js index c3899f8..603d5b2 100644 --- a/scripts/start.js +++ b/scripts/start.js @@ -1,20 +1,32 @@ const { execSync, spawn } = require('child_process'); const fs = require('fs'); +const path = require('path'); + +// Resolve sibling paths from this script's location so the launcher +// works whether cwd is /app (docker), the nix store, or a writable +// state dir set by systemd's WorkingDirectory. The prisma CLI and +// dist/index.js live alongside this file in `/share/nsecbunkerd/` +// (nix) or `/app/` (docker). The migration-side env knobs: +// NSEC_BUNKER_CONFIG_DIR — directory holding nsecbunker.{json,db}; +// defaults to ./config relative to cwd. +// DATABASE_URL — prisma's source of truth for the sqlite +// path; honor whatever the caller set. +const pkgRoot = path.resolve(__dirname, '..'); +const configDir = process.env.NSEC_BUNKER_CONFIG_DIR || path.resolve(process.cwd(), 'config'); try { - console.log(`Running migrations`); - // check if config folder exists - if (!fs.existsSync('./config')) { - execSync(`mkdir config`); + console.log(`Running migrations`); + if (!fs.existsSync(configDir)) { + fs.mkdirSync(configDir, { recursive: true }); } - execSync('npm run prisma:migrate'); + execSync('npm run prisma:migrate', { cwd: pkgRoot, stdio: 'inherit' }); } catch (error) { - console.log(error); + console.log(error); // Handle any potential migration errors here } const args = process.argv.slice(2); -const childProcess = spawn('node', ['./dist/index.js', ...args], { +const childProcess = spawn('node', [path.join(pkgRoot, 'dist/index.js'), ...args], { stdio: 'inherit', }); From 662dd21a60acbd4ae12225af2736b4c2cd6fc8be Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:08:42 +0200 Subject: [PATCH 11/14] fix(nix): include prisma CLI + scripts/, wrapper invokes start.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three correctness fixes to the nix derivation that mirror the Dockerfile correctness fixes: 1. Drop `pnpm prune --prod --ignore-scripts` from the build phase. The prune step removed the prisma CLI (devDependency) from the output, so the runtime invocation of `prisma migrate deploy` had nothing to exec. Same trap the upstream Dockerfile fell into via `--prod` install. 2. Copy `scripts/` into `$out/share/nsecbunkerd/` alongside dist, node_modules, prisma, templates. Without it the launcher script (which contains the migration step) wasn't present. 3. The makeWrapper target switches from `dist/index.js` to `scripts/start.js`. Same change the Dockerfile ENTRYPOINT got in the previous commit. Also adds nodejs_20 to PATH so `npm` is resolvable from inside start.js, and drops `--chdir` so the caller (systemd, docker compose) controls cwd — start.js now resolves sibling paths from `__dirname`, independently committed. The `patchNdk` substitution narrows from the old `workspace:*` form (no longer in the package.json after fork commit 06272c8) to the current `"2.8.1"` → `"^2.8.1"` rewrite needed to align package.json with the lockfile under --frozen-lockfile. Remaining known gap: nixpkgs ships prisma-engines 7.7.0 while the JS prisma CLI in node_modules is 5.4.1, an RPC vocabulary mismatch that breaks the migrate step at runtime (`Method not found: listMigrationDirectories`). Either bump prisma JS to ^7.x or overlay prisma-engines to 5.4.1. Out of scope for this commit; docker build unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.nix | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/package.nix b/package.nix index 5c49bf7..adeb62b 100644 --- a/package.nix +++ b/package.nix @@ -13,12 +13,17 @@ }: let - # package.json pins `@nostr-dev-kit/ndk: "workspace:*"` but the lockfile - # resolves `^2.8.1`. With --frozen-lockfile pnpm refuses the mismatch, - # so rewrite the spec to match the lockfile. + # Fork commit `06272c8` ("pin @nostr-dev-kit/ndk to 2.8.1 instead of + # workspace:*") changed package.json to a pinned `"2.8.1"`, but the + # pnpm-lock.yaml still expresses the spec as `"^2.8.1"` (the way + # `pnpm add` originally generated it). pnpm with --frozen-lockfile + # rejects that mismatch. Patching package.json to use the caret form + # is non-semantic (2.8.1 is still the resolved version) and aligns + # both files. Same fix the Dockerfile-side already handles via + # `--no-frozen-lockfile`; in nix we prefer frozen + a targeted patch. patchNdk = '' substituteInPlace package.json \ - --replace-fail '"@nostr-dev-kit/ndk": "workspace:*"' \ + --replace-fail '"@nostr-dev-kit/ndk": "2.8.1"' \ '"@nostr-dev-kit/ndk": "^2.8.1"' ''; @@ -77,7 +82,12 @@ stdenv.mkDerivation (finalAttrs: { pnpm prisma generate pnpm build - pnpm prune --prod --ignore-scripts + # Do NOT `pnpm prune --prod` here — the prisma CLI lives in + # devDependencies and `scripts/start.js` invokes it at boot via + # `npx prisma migrate deploy`. Without the CLI, the migration step + # silently fails (npx falls back to downloading prisma fresh, which + # OOMs on most containers) and the SQLite db stays empty. See + # `aiolabs/nsecbunkerd#7` diagnosis 2026-05-27. find node_modules -xtype l -delete runHook postBuild @@ -87,14 +97,24 @@ stdenv.mkDerivation (finalAttrs: { runHook preInstall mkdir -p $out/{bin,share/nsecbunkerd} - cp -r dist node_modules prisma templates package.json \ + # scripts/ MUST be copied — it contains the start.js launcher that + # runs `prisma migrate deploy` before spawning the daemon. The + # upstream packaging (and the upstream Dockerfile) bypassed this by + # invoking dist/index.js directly, leaving migrations unapplied. + cp -r dist node_modules prisma scripts templates package.json \ $out/share/nsecbunkerd/ + # Wrapper invokes scripts/start.js, which runs `prisma migrate deploy` + # then spawns dist/index.js. start.js resolves sibling paths from + # __dirname, so the caller (systemd unit, docker compose, etc.) can + # set its own WorkingDirectory for the writable state dir without + # interfering with how the launcher finds its own package files. + # NSEC_BUNKER_CONFIG_DIR can override the config directory location; + # by default it's `./config` relative to cwd. makeWrapper ${lib.getExe nodejs_20} $out/bin/nsecbunkerd \ - --chdir $out/share/nsecbunkerd \ - --add-flags $out/share/nsecbunkerd/dist/index.js \ + --add-flags $out/share/nsecbunkerd/scripts/start.js \ --set NODE_ENV production \ - --prefix PATH : ${lib.makeBinPath [ openssl ]} \ + --prefix PATH : ${lib.makeBinPath [ openssl nodejs_20 ]} \ ${ lib.concatStringsSep " \\\n " ( lib.mapAttrsToList (n: v: "--set ${n} ${lib.escapeShellArg v}") prismaEnv From 1792bc489c5624e305aa546c40aeda81bc6994c8 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 20:42:43 +0200 Subject: [PATCH 12/14] fix(#4): replace pingOrDie self-echo watchdog with pool-status check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original watchdog published a kind-24133 event to its own pubkey every 20s and exited if no echo arrived within 50s. On a single private relay setup (LNbits's nostrrelay extension channel), NDK 2.8.1's outbox model doesn't reliably route self-publishes back through the matching subscription, so the watchdog fires false positives and exits every 50s even though admin RPCs over the same channel still work fine. The upstream patches we landed previously (commit 42dbbd7) commented the call out as an emergency stopgap; this commit replaces the mechanism with one that actually answers the right question. Pool-status watchdog: poll `ndk.pool.connectedRelays().length` every 10s, track the most recent moment any relay was connected, exit if no relay has been connected for 60s. Uses NDK's own connection-lifecycle tracking which works reliably across all relay configurations — no self-publish, no subscription dependency, no relay traffic. Same intent as pingOrDie (detect partition from relay layer and let the supervisor restart us), reliable signal. Call site re-enable + env-flag opt-out follow in the next commit. Drops the now-unused NostrEvent import. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/index.ts | 75 ++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 36 deletions(-) diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 5adafe8..3940327 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -1,5 +1,5 @@ import "websocket-polyfill"; -import NDK, { NDKEvent, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKRpcResponse, NDKUser, NostrEvent } from '@nostr-dev-kit/ndk'; +import NDK, { NDKEvent, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKRpcResponse, NDKUser } from '@nostr-dev-kit/ndk'; import { NDKNostrRpc } from '@nostr-dev-kit/ndk'; import createDebug from 'debug'; import { Key, KeyUser } from '../run'; @@ -454,44 +454,47 @@ class AdminInterface { } } -async function pingOrDie(ndk: NDK) { - let deathTimer: NodeJS.Timeout | null = null; - - function resetDeath() { - if (deathTimer) clearTimeout(deathTimer); - deathTimer = setTimeout(() => { - console.log(`❌ No ping event received in 30 seconds. Exiting.`); - process.exit(1); - }, 50000); - } - - const self = await ndk.signer!.user(); - const sub = ndk.subscribe({ - authors: [self.pubkey], - kinds: [NDKKind.NostrConnect], - "#p": [self.pubkey] - }); - sub.on("event", (event: NDKEvent) => { - console.log(`🔔 Received ping event:`, event.created_at); - resetDeath(); - }); - sub.start(); - - resetDeath(); +/** + * Pool-status connection watchdog. Exits the daemon if every relay in + * the pool stays disconnected for longer than PARTITION_THRESHOLD_MS. + * + * Replaces the original `pingOrDie` self-echo watchdog, which published + * a kind-24133 event to its own pubkey every 20s and exited if it + * didn't see the echo within 50s. That works on public relays but + * silently breaks on single-private-relay setups: NDK 2.8.1's outbox + * model doesn't reliably route self-publishes back through the + * matching subscription, so the watchdog fires false positives and + * exits the daemon every 50s while RPCs over the same channel still + * work fine. See aiolabs/nsecbunkerd#4 + #7. + * + * The pool-status approach uses NDK's own connection-lifecycle + * tracking — `pool.connectedRelays()` reports relays in + * NDKRelayStatus.CONNECTED — which is reliable across all relay + * configurations because it doesn't depend on round-trip + * publish/subscribe. No event is published; no relay traffic. + * + * Detects partition within POLL_INTERVAL + PARTITION_THRESHOLD ms. + * Transient disconnects shorter than PARTITION_THRESHOLD don't trip + * the watchdog — useful for relays that flap or briefly drop on + * network blips. + */ +async function relayConnectionWatchdog(ndk: NDK) { + const POLL_INTERVAL_MS = 10_000; + const PARTITION_THRESHOLD_MS = 60_000; + let lastConnectedAt = Date.now(); setInterval(() => { - const event = new NDKEvent(ndk, { - kind: NDKKind.NostrConnect, - tags: [ ["p", self.pubkey] ], - content: "ping" - } as NostrEvent); - event.publish().then(() => { - console.log(`🔔 Sent ping event:`, event.created_at); - }).catch((e: any) => { - console.log(`❌ Failed to send ping event:`, e.message); + const connectedCount = ndk.pool.connectedRelays().length; + if (connectedCount > 0) { + lastConnectedAt = Date.now(); + return; + } + const elapsed = Date.now() - lastConnectedAt; + if (elapsed > PARTITION_THRESHOLD_MS) { + console.log(`❌ No connected relays for ${Math.floor(elapsed / 1000)}s. Exiting.`); process.exit(1); - }); - }, 20000); + } + }, POLL_INTERVAL_MS); } export default AdminInterface; From fb1c239e152c2db8ce567afe495e1461ce49ce6d Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 20:43:12 +0200 Subject: [PATCH 13/14] fix(#4): re-enable connection watchdog with env-flag opt-out Calls `relayConnectionWatchdog` (introduced in the previous commit) at the end of admin-interface connect(). Gated by NSEC_BUNKER_DISABLE_WATCHDOG=1 for operators who run external liveness checks (Prometheus probes, k8s readiness, etc.) and don't want the daemon to self-terminate. This restores the watchdog behavior that was commented out in commit 42dbbd7 (the emergency stopgap for the old self-echo false positives), but on top of the now-reliable pool-status mechanism. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/index.ts | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 3940327..db8733b 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -168,12 +168,17 @@ class AdminInterface { this.handleRequest(req); }); - // pingOrDie disabled — NDK 2.8.1 outbox model doesn't echo - // self-published events back through subscriptions on - // non-public relay channels, so the watchdog fires false - // positives and exits the bunker every 50s on private relays. - // See aiolabs/nsecbunkerd#4 + #7. - // pingOrDie(this.ndk); + // Connection watchdog: exit if pool reports no connected relays + // for >60s so the process supervisor (systemd / docker restart + // policy / k8s) can recover. Replaces the original self-echo + // pingOrDie — see relayConnectionWatchdog comment + #4 + #7. + // Operators with external liveness checking can disable via + // NSEC_BUNKER_DISABLE_WATCHDOG=1. + if (process.env.NSEC_BUNKER_DISABLE_WATCHDOG !== '1') { + relayConnectionWatchdog(this.ndk); + } else { + console.log('⏸ watchdog disabled via NSEC_BUNKER_DISABLE_WATCHDOG=1'); + } }).catch((err) => { console.log('❌ admin connection failed'); console.log(err); From 65a6966b9fb4ddec878037fd1ccddd949060e4a8 Mon Sep 17 00:00:00 2001 From: Padreug Date: Sat, 30 May 2026 12:25:45 +0200 Subject: [PATCH 14/14] fix(#9): close race between create_new_key and NIP-46 connect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two-layer fix for the issue where a fresh client chaining create_new_key + NIP-46 connect on the same target key would time out — bunker had no subscription registered for the new key by the time the connect event arrived at the relay. Layer 1 — run.ts: loadNsec and unlockKey were synchronous and fire-and-forgot the async startKey promise. create_new_key.ts:35 already awaited loadNsec, but the await was a no-op against a sync return. Promoted both to async and properly awaited startKey, so backend.start() at least gets a chance to run before the caller's response goes out. Layer 2 — backend/index.ts: NDKNip46Backend.start() registers the kind-24133 subscription via this.ndk.subscribe(...) but returns immediately, before the relay's EOSE confirms it has the subscription on file. Override start() in our Backend subclass to await EOSE before resolving. This is the actual race-closer — layer 1's await alone wasn't enough because start() was still returning before the relay registered the subscription. Surfaced by aiolabs/lnbits#33's eager-bind chain, which publishes a NIP-46 connect event in the same HTTP round-trip as create_new_key. Pre-fix lnbits deferred the connect to first sign_event (minutes-to-hours after provisioning), so the race window was hidden. Verified end-to-end on bohm regtest: demo account creation through the webapp now completes cleanly, with bunker logs showing connect + sign_event for the freshly-provisioned key. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/backend/index.ts | 37 +++++++++++++++++++++++++++++++++++++ src/daemon/run.ts | 6 +++--- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/daemon/backend/index.ts b/src/daemon/backend/index.ts index 7661a09..861f10f 100644 --- a/src/daemon/backend/index.ts +++ b/src/daemon/backend/index.ts @@ -22,6 +22,43 @@ export class Backend extends NDKNip46Backend { // this.setStrategy('publish_event', new PublishEventHandlingStrategy()); } + /** + * Override NDKNip46Backend.start() to await the kind-24133 + * subscription's EOSE before resolving. The base implementation + * calls `this.ndk.subscribe(...)` and returns immediately — the + * NDKSubscription queues a REQ on the relay connection but the + * relay's acknowledgement (EOSE) hasn't arrived yet. Any caller + * that publishes a NIP-46 event in the immediate window after + * `start()` returns races against the relay registering this + * subscription. + * + * aiolabs/lnbits#33's eager-bind chain publishes a NIP-46 + * `connect` event in the same HTTP round-trip as `create_new_key`, + * which loses this race deterministically — the bunker never + * sees the connect event because its subscription wasn't yet + * registered with the relay when the event was broadcast. + * + * Awaiting EOSE closes the race: by the time `start()` resolves, + * the relay has confirmed it has the bunker's subscription on + * file and will route matching kind-24133 events to it. + * + * See aiolabs/nsecbunkerd#9 for the full diagnosis. + */ + async start(): Promise { + this.localUser = await this.signer.user(); + await new Promise((resolve) => { + const sub = this.ndk.subscribe( + { + kinds: [24133], + "#p": [this.localUser!.pubkey], + }, + { closeOnEose: false } + ); + sub.on("event", (e: any) => this.handleIncomingEvent(e)); + sub.on("eose", () => resolve()); + }); + } + private async validateToken(token: string) { if (!token) throw new Error("Invalid token"); diff --git a/src/daemon/run.ts b/src/daemon/run.ts index 6637986..89eda0a 100644 --- a/src/daemon/run.ts +++ b/src/daemon/run.ts @@ -257,14 +257,14 @@ class Daemon { const nsec = decryptNsec(iv, data, passphrase); this.activeKeys[keyName] = nsec; - this.startKey(keyName, nsec); + await this.startKey(keyName, nsec); return true; } - loadNsec(keyName: string, nsec: string) { + async loadNsec(keyName: string, nsec: string) { this.activeKeys[keyName] = nsec; - this.startKey(keyName, nsec); + await this.startKey(keyName, nsec); } } \ No newline at end of file