From 8caf856ab21c04be55adf1df73b2d8de7b21e9d1 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 16:56:27 +0200 Subject: [PATCH 1/6] diag(#7): env-gated per-relay transport instrumentation Add NSEC_BUNKER_DEBUG_TRANSPORT=1 opt-in logging that emits REQUEST_IN on inbound NIP-46 RPCs, RESPONSE_SENT around NDKNostrRpc.sendResponse, and PUBLISHED / PUBLISH_FAILED per-relay on the bunker's pool. Surfaces the diagnostic signal NDKNostrRpc itself discards: sendResponse calls `event.publish(this.relaySet)` and throws away the Set it returns, so silent outbox-drops and wrong-kind responses are invisible without hooking the pool's per-relay events directly. Validated against the local bunker via the lnbits-side admin spike harness (~/dev/lnbits/misc-aio/bunker_admin_spike.py): the instrumentation made the 9-step harness reveal a wrong-kind error response path (separate fix in the next commit) that had been masquerading as an NDK echo issue for a week. With the env flag unset the daemon stays as quiet as before. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/index.ts | 50 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 48 insertions(+), 2 deletions(-) diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 75dfc4b..4684fcb 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -111,8 +111,28 @@ class AdminInterface { return; } - this.ndk.pool.on('relay:connect', () => console.log('✅ nsecBunker Admin Interface ready')); + const debugTransport = process.env.NSEC_BUNKER_DEBUG_TRANSPORT === '1'; + + // Per-relay publish-status logging for diagnosing aiolabs/nsecbunkerd#7. + // NDKNostrRpc.sendResponse calls event.publish() and discards the + // returned Set, so a silent outbox-drop is invisible without + // hooking the underlying per-relay events. Gated by env flag so + // production deployments stay quiet. + const attachRelayLogging = (relay: any) => { + relay.on('published', (event: NDKEvent) => { + console.log(`📤 PUBLISHED relay=${relay.url} kind=${event.kind} id=${event.id?.slice(0,8)}`); + }); + relay.on('publish:failed', (event: NDKEvent, err: any) => { + console.log(`❌ PUBLISH_FAILED relay=${relay.url} kind=${event.kind} id=${event.id?.slice(0,8)} err=${err?.message ?? err}`); + }); + }; + + this.ndk.pool.on('relay:connect', (relay: any) => { + console.log('✅ nsecBunker Admin Interface ready'); + if (debugTransport) attachRelayLogging(relay); + }); this.ndk.pool.on('relay:disconnect', () => console.log('❌ admin disconnected')); + this.ndk.connect(2500).then(() => { // connect for whitelisted admins this.rpc.subscribe({ @@ -120,7 +140,33 @@ class AdminInterface { "#p": [this.signerUser!.pubkey] }); - this.rpc.on('request', (req) => this.handleRequest(req)); + // Attach per-relay logging to relays that connected before our + // 'relay:connect' listener was registered above (NDK can connect + // synchronously inside .connect() under some paths). + if (debugTransport) { + this.ndk.pool.relays.forEach((relay: any) => attachRelayLogging(relay)); + + // Wrap sendResponse to log id + kind + elapsed time so we + // can correlate REQUEST_IN → RESPONSE_SENT → PUBLISHED. + const originalSendResponse = this.rpc.sendResponse.bind(this.rpc); + this.rpc.sendResponse = async (id: string, remotePubkey: string, result: string, kind?: number, error?: string) => { + const start = Date.now(); + try { + await originalSendResponse(id, remotePubkey, result, kind, error); + console.log(`📨 RESPONSE_SENT id=${id} remote=${remotePubkey.slice(0,8)} kind=${kind ?? NDKKind.NostrConnect} elapsed=${Date.now()-start}ms`); + } catch (e: any) { + console.log(`❌ RESPONSE_SEND_FAILED id=${id} remote=${remotePubkey.slice(0,8)} kind=${kind ?? NDKKind.NostrConnect} err=${e?.message ?? e}`); + throw e; + } + }; + } + + this.rpc.on('request', (req) => { + if (debugTransport) { + console.log(`📥 REQUEST_IN method=${req.method} id=${req.id} from=${req.pubkey?.slice(0,8)} kind=${req.event?.kind}`); + } + this.handleRequest(req); + }); // pingOrDie disabled — NDK 2.8.1 outbox model doesn't echo // self-published events back through subscriptions on From 0a510b7f9a0c9187e692f9acf487360b5ea490b3 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:04:31 +0200 Subject: [PATCH 2/6] fix(#7): route error responses to the request's kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The catch block in handleRequest and both response paths in create_account pass `NDKKind.NostrConnectAdmin` as the response kind. That constant does NOT exist in NDK 2.8.1 — only `NostrConnect = 24133` is exported — so it resolves to `undefined` and NDKNostrRpc.sendResponse falls through to its own default of `NDKKind.NostrConnect = 24133`. Net effect: any error response to an admin-channel (kind 24134) request is published on the NIP-46 signing channel (24133) instead, which clients subscribed for 24134 never see. Looks like a transport-layer NDK-echo / silent-drop issue from the client's perspective, but the bunker IS publishing reliably — just on the wrong kind. Mirror `req.event.kind` so the error response goes back on the same channel the request came in on. Same pattern the unknown-method path and create_account's validation-error path already used; just propagate it to the remaining sites. Drops the now-unused NDKKind import from create_account.ts. Validated end-to-end against the local bunker via the lnbits-side admin spike harness — after this fix + the migration entrypoint fix + the policyId type fix, all 9 spike steps including NIP-46 sign_event pass with Schnorr-valid signatures. See coordination log entry 2026-05-27T14:30Z. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/commands/create_account.ts | 17 ++++++++++++----- src/daemon/admin/index.ts | 10 +++++++++- 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/daemon/admin/commands/create_account.ts b/src/daemon/admin/commands/create_account.ts index e4632d6..f3c026a 100644 --- a/src/daemon/admin/commands/create_account.ts +++ b/src/daemon/admin/commands/create_account.ts @@ -1,4 +1,4 @@ -import { Hexpubkey, NDKKind, NDKPrivateKeySigner, NDKRpcRequest, NDKUserProfile } from "@nostr-dev-kit/ndk"; +import { Hexpubkey, NDKPrivateKeySigner, NDKRpcRequest, NDKUserProfile } from "@nostr-dev-kit/ndk"; import AdminInterface from ".."; import { nip19 } from 'nostr-tools'; import { setupSkeletonProfile } from "../../lib/profile"; @@ -136,7 +136,7 @@ export default async function createAccount(admin: AdminInterface, req: NDKRpcRe } /** - * This is where the real work of creating the private key, wallet, nip-05, granting access, etc happen + * This is where the real work of creating the private key, wallet, nip-05, granting access, etc happen — pragma: allowlist secret */ export async function createAccountReal( admin: AdminInterface, @@ -209,11 +209,18 @@ export async function createAccountReal( // access it without having to go through an approval flow await grantPermissions(req, keyName); - return admin.rpc.sendResponse(req.id, req.pubkey, generatedUser.pubkey, NDKKind.NostrConnectAdmin); + // NDKKind.NostrConnectAdmin doesn't exist in NDK 2.8.1 — it resolves + // to `undefined` and sendResponse defaults to NDKKind.NostrConnect + // (24133), sending the response on the wrong channel. Mirror the + // request's kind so the response goes back on the same channel the + // client subscribed for. Filed as part of aiolabs/nsecbunkerd#7 + // diagnosis 2026-05-27. + const originalKind = req.event.kind!; + return admin.rpc.sendResponse(req.id, req.pubkey, generatedUser.pubkey, originalKind); } catch (e: any) { console.trace('error', e); - return admin.rpc.sendResponse(req.id, req.pubkey, "error", NDKKind.NostrConnectAdmin, - e.message); + const originalKind = req.event.kind!; + return admin.rpc.sendResponse(req.id, req.pubkey, "error", originalKind, e.message); } } diff --git a/src/daemon/admin/index.ts b/src/daemon/admin/index.ts index 4684fcb..5adafe8 100644 --- a/src/daemon/admin/index.ts +++ b/src/daemon/admin/index.ts @@ -209,7 +209,15 @@ class AdminInterface { } } catch (err: any) { debug(`Error handling request ${req.method}: ${err?.message??err}`, req.params); - return this.rpc.sendResponse(req.id, req.pubkey, "error", NDKKind.NostrConnectAdmin, err?.message); + // NDKKind.NostrConnectAdmin doesn't exist in NDK 2.8.1 — using it + // makes sendResponse fall through to its default of 24133, which + // sends the error on a different channel than the request came in + // on. Mirror req.event.kind so the response goes back where the + // client is listening. Filed as part of aiolabs/nsecbunkerd#7 + // diagnosis 2026-05-27. + const originalKind = req.event.kind!; + console.log(`⚠️ HANDLE_REQUEST_ERROR method=${req.method} id=${req.id} kind=${originalKind} err=${err?.message ?? err}`); + return this.rpc.sendResponse(req.id, req.pubkey, "error", originalKind, err?.message); } } From 5e77de1202675c9abe17ea0bbe9ebf552a0a61b9 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:04:53 +0200 Subject: [PATCH 3/6] fix: convert policyId to Int before Prisma insert in create_new_token MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The wire-level `create_new_token` RPC carries `policyId` as a string (everything in NDK RPC params is string). The handler correctly parseInts it for the `findUnique({where:{id:parseInt(policyId)}})` call but then forwards the unparsed string straight into the Prisma `token.create({data:{...policyId}})` payload. Prisma rejects with "Argument `policyId`: Invalid value provided. Expected Int or Null, provided String" because `Token.policyId` is declared `Int` per the schema (references `Policy.id`, which is autoincrement Int). Hoist `policyIdInt = parseInt(policyId)` and use it for both the findUnique lookup and the create payload. Latent upstream bug — no one would have seen it before because the wrong-kind error response (fixed in the previous commit) made the symptom look like a transport timeout rather than a Prisma validation error. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/daemon/admin/commands/create_new_token.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/daemon/admin/commands/create_new_token.ts b/src/daemon/admin/commands/create_new_token.ts index df145a2..04588c4 100644 --- a/src/daemon/admin/commands/create_new_token.ts +++ b/src/daemon/admin/commands/create_new_token.ts @@ -7,15 +7,19 @@ export default async function createNewToken(admin: AdminInterface, req: NDKRpcR if (!clientName || !policyId) throw new Error("Invalid params"); - const policy = await prisma.policy.findUnique({ where: { id: parseInt(policyId) }, include: { rules: true } }); + const policyIdInt = parseInt(policyId); + const policy = await prisma.policy.findUnique({ where: { id: policyIdInt }, include: { rules: true } }); if (!policy) throw new Error("Policy not found"); console.log({clientName, policy, durationInHours}); const token = [...Array(64)].map(() => Math.floor(Math.random() * 16).toString(16)).join(''); + // policyId must be Int per the Prisma schema (Token.policyId references + // Policy.id which is autoincrement Int). Upstream passes the raw string + // from the wire — caught during aiolabs/nsecbunkerd#7 diagnosis 2026-05-27. const data: any = { - keyName, clientName, policyId, + keyName, clientName, policyId: policyIdInt, createdBy: req.pubkey, token }; From 053357899dbe14baac80064a852dc692c01dbefa Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:05:10 +0200 Subject: [PATCH 4/6] fix(docker): entrypoint runs migrations via scripts/start.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Upstream Dockerfile sets `ENTRYPOINT [ "node", "./dist/index.js" ]`, which boots the daemon directly and silently bypasses `scripts/start.js` — the only place that runs `prisma migrate deploy`. On a clean install, the SQLite db file at $DATABASE_URL is created empty (0 bytes) and every Policy / KeyUser / Token / SigningCondition operation throws "table does not exist." `ping` / `get_keys` / `create_new_key` happen to survive because they only touch the JSON config, not the db. Two changes: 1. ENTRYPOINT switches to `node ./scripts/start.js`. The CMD arg (`start`) and any additional argv pass through to the daemon unchanged via process.argv. 2. Runtime pnpm install drops `--prod`. The prisma CLI lives in devDependencies; with `--prod`, `npx prisma migrate deploy` tries to download prisma@latest at runtime, which OOMs in modest containers. Including devDeps at runtime adds modest image bulk for correctness. Validated end-to-end against the local regtest stack — after the rebuild the SQLite db boots populated with 22 migrations, and the lnbits-side admin spike harness passes all 9 steps including NIP-46 sign_event with Schnorr-valid signatures. Co-Authored-By: Claude Opus 4.7 (1M context) --- Dockerfile | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/Dockerfile b/Dockerfile index 1168d8c..9ace24a 100644 --- a/Dockerfile +++ b/Dockerfile @@ -39,10 +39,20 @@ RUN npm install -g pnpm@9 # Copy built files from the build stage COPY --from=build /app . -# Install only runtime dependencies (pnpm respects the workspace protocol) -RUN pnpm install --prod --no-frozen-lockfile +# Install all dependencies (including devDeps). The prisma CLI lives in +# devDependencies but scripts/start.js invokes `prisma migrate deploy` +# at boot, so it must be available at runtime. Dropping --prod adds the +# CLI tooling to the runtime image — a modest size cost for the +# correctness of the migration step. +RUN pnpm install --no-frozen-lockfile EXPOSE 3000 -ENTRYPOINT [ "node", "./dist/index.js" ] +# Run via scripts/start.js so `prisma migrate deploy` applies pending +# migrations before the daemon spawns. The upstream Dockerfile invokes +# ./dist/index.js directly, which silently bypasses the migration step +# and leaves the SQLite db empty on first boot — every command that +# touches Policy/KeyUser/Token/etc. then throws "table does not exist." +# Caught during aiolabs/nsecbunkerd#7 diagnosis 2026-05-27. +ENTRYPOINT [ "node", "./scripts/start.js" ] CMD ["start"] From ccfde02d70a73d6de0cbf60023fe828299db0821 Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:05:24 +0200 Subject: [PATCH 5/6] fix(start.js): resolve sibling paths from script location, not cwd MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The launcher previously assumed cwd was the package root: `mkdir config` in cwd, `npm run prisma:migrate` in cwd, `node ./dist/index.js`. Works under docker (WORKDIR /app, writable) but breaks anywhere cwd differs from the package root — e.g. a nix-built bunker invoked from a systemd unit whose WorkingDirectory is the state dir (/var/lib/nsecbunkerd) and not the nix store path that holds dist/, scripts/, prisma/. Resolve sibling paths via `path.resolve(__dirname, '..')` so the package-internal layout is robust to cwd. Use `path.join(pkgRoot, 'dist/index.js')` for the daemon spawn and `{ cwd: pkgRoot }` for the npm migrate exec. Switch `mkdir config` (which only works in writable cwd) to `fs.mkdirSync(configDir, { recursive: true })` where configDir defaults to `./config` relative to cwd, overrideable via NSEC_BUNKER_CONFIG_DIR. This lets the nix package install the launcher into the read-only store while the systemd unit still does its config/state work in /var/lib/nsecbunkerd with no shell wrapping. Co-Authored-By: Claude Opus 4.7 (1M context) --- scripts/start.js | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/scripts/start.js b/scripts/start.js index c3899f8..603d5b2 100644 --- a/scripts/start.js +++ b/scripts/start.js @@ -1,20 +1,32 @@ const { execSync, spawn } = require('child_process'); const fs = require('fs'); +const path = require('path'); + +// Resolve sibling paths from this script's location so the launcher +// works whether cwd is /app (docker), the nix store, or a writable +// state dir set by systemd's WorkingDirectory. The prisma CLI and +// dist/index.js live alongside this file in `/share/nsecbunkerd/` +// (nix) or `/app/` (docker). The migration-side env knobs: +// NSEC_BUNKER_CONFIG_DIR — directory holding nsecbunker.{json,db}; +// defaults to ./config relative to cwd. +// DATABASE_URL — prisma's source of truth for the sqlite +// path; honor whatever the caller set. +const pkgRoot = path.resolve(__dirname, '..'); +const configDir = process.env.NSEC_BUNKER_CONFIG_DIR || path.resolve(process.cwd(), 'config'); try { - console.log(`Running migrations`); - // check if config folder exists - if (!fs.existsSync('./config')) { - execSync(`mkdir config`); + console.log(`Running migrations`); + if (!fs.existsSync(configDir)) { + fs.mkdirSync(configDir, { recursive: true }); } - execSync('npm run prisma:migrate'); + execSync('npm run prisma:migrate', { cwd: pkgRoot, stdio: 'inherit' }); } catch (error) { - console.log(error); + console.log(error); // Handle any potential migration errors here } const args = process.argv.slice(2); -const childProcess = spawn('node', ['./dist/index.js', ...args], { +const childProcess = spawn('node', [path.join(pkgRoot, 'dist/index.js'), ...args], { stdio: 'inherit', }); From 662dd21a60acbd4ae12225af2736b4c2cd6fc8be Mon Sep 17 00:00:00 2001 From: Padreug Date: Wed, 27 May 2026 17:08:42 +0200 Subject: [PATCH 6/6] fix(nix): include prisma CLI + scripts/, wrapper invokes start.js MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three correctness fixes to the nix derivation that mirror the Dockerfile correctness fixes: 1. Drop `pnpm prune --prod --ignore-scripts` from the build phase. The prune step removed the prisma CLI (devDependency) from the output, so the runtime invocation of `prisma migrate deploy` had nothing to exec. Same trap the upstream Dockerfile fell into via `--prod` install. 2. Copy `scripts/` into `$out/share/nsecbunkerd/` alongside dist, node_modules, prisma, templates. Without it the launcher script (which contains the migration step) wasn't present. 3. The makeWrapper target switches from `dist/index.js` to `scripts/start.js`. Same change the Dockerfile ENTRYPOINT got in the previous commit. Also adds nodejs_20 to PATH so `npm` is resolvable from inside start.js, and drops `--chdir` so the caller (systemd, docker compose) controls cwd — start.js now resolves sibling paths from `__dirname`, independently committed. The `patchNdk` substitution narrows from the old `workspace:*` form (no longer in the package.json after fork commit 06272c8) to the current `"2.8.1"` → `"^2.8.1"` rewrite needed to align package.json with the lockfile under --frozen-lockfile. Remaining known gap: nixpkgs ships prisma-engines 7.7.0 while the JS prisma CLI in node_modules is 5.4.1, an RPC vocabulary mismatch that breaks the migrate step at runtime (`Method not found: listMigrationDirectories`). Either bump prisma JS to ^7.x or overlay prisma-engines to 5.4.1. Out of scope for this commit; docker build unaffected. Co-Authored-By: Claude Opus 4.7 (1M context) --- package.nix | 38 +++++++++++++++++++++++++++++--------- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/package.nix b/package.nix index 5c49bf7..adeb62b 100644 --- a/package.nix +++ b/package.nix @@ -13,12 +13,17 @@ }: let - # package.json pins `@nostr-dev-kit/ndk: "workspace:*"` but the lockfile - # resolves `^2.8.1`. With --frozen-lockfile pnpm refuses the mismatch, - # so rewrite the spec to match the lockfile. + # Fork commit `06272c8` ("pin @nostr-dev-kit/ndk to 2.8.1 instead of + # workspace:*") changed package.json to a pinned `"2.8.1"`, but the + # pnpm-lock.yaml still expresses the spec as `"^2.8.1"` (the way + # `pnpm add` originally generated it). pnpm with --frozen-lockfile + # rejects that mismatch. Patching package.json to use the caret form + # is non-semantic (2.8.1 is still the resolved version) and aligns + # both files. Same fix the Dockerfile-side already handles via + # `--no-frozen-lockfile`; in nix we prefer frozen + a targeted patch. patchNdk = '' substituteInPlace package.json \ - --replace-fail '"@nostr-dev-kit/ndk": "workspace:*"' \ + --replace-fail '"@nostr-dev-kit/ndk": "2.8.1"' \ '"@nostr-dev-kit/ndk": "^2.8.1"' ''; @@ -77,7 +82,12 @@ stdenv.mkDerivation (finalAttrs: { pnpm prisma generate pnpm build - pnpm prune --prod --ignore-scripts + # Do NOT `pnpm prune --prod` here — the prisma CLI lives in + # devDependencies and `scripts/start.js` invokes it at boot via + # `npx prisma migrate deploy`. Without the CLI, the migration step + # silently fails (npx falls back to downloading prisma fresh, which + # OOMs on most containers) and the SQLite db stays empty. See + # `aiolabs/nsecbunkerd#7` diagnosis 2026-05-27. find node_modules -xtype l -delete runHook postBuild @@ -87,14 +97,24 @@ stdenv.mkDerivation (finalAttrs: { runHook preInstall mkdir -p $out/{bin,share/nsecbunkerd} - cp -r dist node_modules prisma templates package.json \ + # scripts/ MUST be copied — it contains the start.js launcher that + # runs `prisma migrate deploy` before spawning the daemon. The + # upstream packaging (and the upstream Dockerfile) bypassed this by + # invoking dist/index.js directly, leaving migrations unapplied. + cp -r dist node_modules prisma scripts templates package.json \ $out/share/nsecbunkerd/ + # Wrapper invokes scripts/start.js, which runs `prisma migrate deploy` + # then spawns dist/index.js. start.js resolves sibling paths from + # __dirname, so the caller (systemd unit, docker compose, etc.) can + # set its own WorkingDirectory for the writable state dir without + # interfering with how the launcher finds its own package files. + # NSEC_BUNKER_CONFIG_DIR can override the config directory location; + # by default it's `./config` relative to cwd. makeWrapper ${lib.getExe nodejs_20} $out/bin/nsecbunkerd \ - --chdir $out/share/nsecbunkerd \ - --add-flags $out/share/nsecbunkerd/dist/index.js \ + --add-flags $out/share/nsecbunkerd/scripts/start.js \ --set NODE_ENV production \ - --prefix PATH : ${lib.makeBinPath [ openssl ]} \ + --prefix PATH : ${lib.makeBinPath [ openssl nodejs_20 ]} \ ${ lib.concatStringsSep " \\\n " ( lib.mapAttrsToList (n: v: "--set ${n} ${lib.escapeShellArg v}") prismaEnv