diff --git a/README.md b/README.md
index a886e8f..33163e4 100644
--- a/README.md
+++ b/README.md
@@ -669,6 +669,12 @@ scripts, so an injected `"), "#");
+ assert.equal(safeUrl("vbscript:msgbox(1)"), "#");
+ assert.equal(safeUrl("mailto:x@y.z"), "#");
+ // Control-char / leading-whitespace obfuscation can't slip a scheme past the check (browsers
+ // strip TAB/CR/LF and leading controls before resolving the scheme).
+ assert.equal(safeUrl("java\tscript:alert(1)"), "#");
+ assert.equal(safeUrl("java\nscript:alert(1)"), "#");
+ assert.equal(safeUrl(" javascript:alert(1)"), "#");
+ // Empty / control-only ⇒ a safe no-op href.
+ assert.equal(safeUrl(""), "#");
+ // Leading Unicode whitespace above U+0020 (NBSP/NEL/LS) is left as-is on purpose: a browser only
+ // strips C0+space when resolving an href, so a NBSP-prefixed "javascript:" is an invalid scheme to
+ // it too \u2014 it resolves as a relative reference, not script. Documented so the strip set isn't widened later.
+ assert.equal(safeUrl("\u00a0javascript:alert(1)"), "\u00a0javascript:alert(1)");
+});
+
+test("localPath: accepts host-relative paths, rejects absolute / protocol-relative / odd input", () => {
+ assert.equal(localPath("/admin/users?q=1&page=2"), "/admin/users?q=1&page=2");
+ assert.equal(localPath("/"), "/");
+ // Protocol-relative and backslash variants are off-origin → rejected (open-redirect guard).
+ assert.equal(localPath("//evil.com"), null);
+ assert.equal(localPath("/\\evil.com"), null);
+ assert.equal(localPath("https://evil.com"), null);
+ assert.equal(localPath("javascript:alert(1)"), null);
+ assert.equal(localPath("relative/no-leading-slash"), null);
+ // Control chars / whitespace (a return_to is a server-built path) ⇒ rejected.
+ assert.equal(localPath("/x\nSet-Cookie: y"), null);
+ assert.equal(localPath("/a b"), null);
+ assert.equal(localPath(""), null);
+ assert.equal(localPath(null), null);
+ assert.equal(localPath(undefined), null);
+});
diff --git a/src/safe-url.ts b/src/safe-url.ts
new file mode 100644
index 0000000..df8fdbc
--- /dev/null
+++ b/src/safe-url.ts
@@ -0,0 +1,35 @@
+// URL safety helpers (todo §9). Two pure, dependency-free guards:
+//
+// safeUrl(value) — sanitise an untrusted URL before rendering it in an href/src attribute.
+// Partials escape *text*, but a URL field is emitted verbatim, so a
+// `javascript:`/`data:` URL from upstream/user data would be live XSS. The
+// contract (docs/plugin-contract.md) is: a relative or http(s) URL is allowed,
+// anything else collapses to "#". Exported to plugins via plugin-api.ts.
+//
+// localPath(value) — validate a redirect target is a *same-origin* path (the redirect-URI
+// allowlist). Used for `return_to`: a host-relative "/a/b?x=1" passes, an
+// absolute or protocol-relative ("//evil.com", "https://evil.com") is rejected
+// so a crafted ?return_to= can't turn login completion into an open redirect.
+
+// ASCII control chars + space that browsers strip/ignore when resolving a URL — strip them before
+// the scheme check so "java\tscript:" / a leading space can't masquerade as relative.
+const CONTROL_G = /[\u0000-\u0020\u007f]/g;
+const CONTROL = /[\u0000-\u0020\u007f]/;
+const HAS_SCHEME = /^[a-z][a-z0-9+.-]*:/i; // a URL scheme prefix, e.g. "javascript:", "http:"
+const HTTP_SCHEME = /^https?:/i;
+
+export function safeUrl(value: string): string {
+ const cleaned = value.replace(CONTROL_G, "");
+ if (!cleaned) return "#";
+ // A scheme present? Allow only http(s). No scheme ⇒ relative ⇒ safe. Return the original once
+ // deemed safe (EJS still HTML-escapes it into the attribute; the inert control chars don't matter).
+ if (HAS_SCHEME.test(cleaned) && !HTTP_SCHEME.test(cleaned)) return "#";
+ return value;
+}
+
+export function localPath(value: string | null | undefined): string | null {
+ if (!value || CONTROL.test(value)) return null;
+ if (!value.startsWith("/")) return null; // must be host-relative
+ if (value.startsWith("//") || value.startsWith("/\\")) return null; // protocol-relative ⇒ off-origin
+ return value;
+}
diff --git a/todo.md b/todo.md
index b2926ae..3ce7d27 100644
--- a/todo.md
+++ b/todo.md
@@ -131,7 +131,7 @@ everything via Docker.
- [x] Structured logging / basic observability. use @larvit/log for OTLP compability dig down in how to use it properly. → Structured, OTLP-native logging on **`@larvit/log`** (2.3.0, pinned; itself zero-dependency — the one new runtime dep, justified by this item). New pure `src/logger.ts`: `createLogger({format,level,otlpEndpoint,otlpProtocol,stdout,stderr})` → one app `Log` tagged `service.name=plainpages` (the OTLP resource attr Loki/Tempo group by); `requestLogger(appLog,{requestId,traceparent})` **clones** it per request (own root trace — *not* nested under one app-lifetime span — inheriting level/format/streams/OTLP) into a "request" span, **adopting** an inbound W3C `traceparent` so a request continues an upstream proxy's distributed trace (malformed/duplicate ⇒ fresh trace; verified `clone` honours a passed `traceparent` while dropping the parent's, unlike `parentLog`). Wired: `app.ts` builds the per-request log at the top of the handler and on `res` **"close"** (fires on both completion *and* abort/truncation, unlike "finish", so aborted/static-stream-error requests are still logged) emits one access line (`method`/`path` — query dropped, may carry tokens — `status`/`ms`/`requestId`, guarded by try/catch) then `end()`s to flush the span (fire-and-forget `.catch`, so a flaky collector never crashes a served request); the catch-all 500 + the Ory-unreachable re-mint now log via `reqLog.error`/`warn`; `static.ts`'s mid-stream error takes an injected `onError` (default console.error for standalone use). `server.ts` builds the app logger from config, logs discovery/listen/shutdown, and `end()`-flushes on SIGTERM/SIGINT (re-entry-guarded). `bootstrap.ts` events go structured; the human first-run banner stays a raw console.log (UX, not a log event). Config (environment-agnostic, fail-loud): `LOG_LEVEL` (info), `LOG_FORMAT` (text; prod compose → json), `OTLP_ENDPOINT` (unset ⇒ console-only; set ⇒ export logs + spans to an OTel Collector → Loki/Tempo), `OTLP_PROTOCOL` (http/json|http/protobuf). compose: base sets `LOG_FORMAT=json` (prod pipelines), dev override flips it to `text`. Tests-first: `logger.test.ts` (service.name/severity-routing/level-gate/format, level-none silent, OTLP-only-when-endpoint, a stubbed-global-fetch proof it POSTs `/v1/logs`, requestLogger context-merge / own-root-trace / traceparent-continue / malformed-ignored), `config.test.ts` (the 4 toggles + enum/URL validation), `app.test.ts` (a live request emits the JSON access line), `compose.test.ts` (prod json / dev text). Per the §9 security-headers/denylist precedent: unit + app-HTTP integration, **no new browser E2E** (no new user-facing page) — and live-boot-verified (dev text+colour, prod json, access lines for page/static/404, graceful-shutdown line). Stability-reviewer on the diff: **APPROVE, no Critical/High** — addressed both yellow nits (access line guarded + switched "finish"→"close" so aborted requests log; shutdown re-entry guard) and the green ones (README collector-outage stderr note, double-`end()` guard). README (config table, new **Observability** section, Status, Layout, runtime-deps) + AGENTS (deps) updated. typecheck + **326 units** green (317 → 326). **Follow-up (route all fetch through the logger · ENV service name · leveled logging throughout):** an `AsyncLocalStorage` makes the per-request logger ambient (`runWithLog`/`currentLog`), so **every outbound `fetch`** traces with no signature churn — `tracedFetch` (a `typeof fetch`) routes through the active request log (client span + propagated W3C `traceparent`) and `server.ts` wires it under the Ory timeout into **all** Kratos/Keto/Hydra + JWKS calls; off the request path it's a plain fetch. `RequestContext` gained **`ctx.log`** (the request logger; additive/contract-stable) so a handler/plugin logs in-trace and `ctx.log.fetch(url)` traces its upstream calls — the reference plugin's `createUpstream` defaults to `tracedFetch`, and `plugin-api.ts` exports `tracedFetch` + the `Log` class. `SERVICE_NAME` (config + `createLogger({serviceName})`) makes the OTLP `service.name` implementer-brandable. Leveled logging across the app: who-did-what **audit** `info` lines on every admin write (user/group/role/client create·delete·assign, with `actor`/`target`/no secrets), `info` on login (session mint) + logout, `warn` on missing-role 403 + CSRF rejections + Ory-unreachable, `debug` on a JWKS kid-miss reload. app.ts's handler body was extracted to `handleRequest` and run inside `runWithLog`; `end()` is coordinated to fire exactly once after **both** the handler unwinds **and** the response closes, so a client abort mid-handler can't end the log out from under a still-running `ctx.log`/`tracedFetch` (regression-tested). Tests extended (logger: serviceName/runWithLog/currentLog/tracedFetch-continues-trace; config: SERVICE_NAME; context: ctx.log default+passthrough; app: ctx.log in-trace + ctx.log.fetch propagation + the abort race; plugin-api: tracedFetch+Log surface). Stability-reviewer on the diff: **APPROVE, no Critical/High** (one yellow — the abort-race `end()` — fixed as above; green nits addressed: traced-fetch comment, app-logger backstop on a handler escape; confirmed the Ory timeout still honoured through `log.fetch` and no secret reaches a log line). `docs/plugin-contract.md` (`ctx.log`/`ctx.log.fetch`/`tracedFetch`), README (config + Observability tracing/serviceName, plugin note, Layout) updated. typecheck + **333 units** + the full `scripts/ci.sh` E2E gate green (326 → 333).
- [x] JWT signing-key rotation runbook. → Expanded the README **JWT signing key & rotation** section from a 3-line note into an operational runbook, and closed the tooling gap that made the documented steps unrunnable: the old "prepend a key / drop it later" required hand-editing a JSON file holding a private signing key. Tests-first: new pure `rotateJwks(current, {prune})` in `gen-jwks.ts` — `--prepend` puts a fresh ES256 key first (Kratos signs with `keys[0]`, the old keys still verify in-flight JWTs) and keeps the rest in order; `--prune` keeps only the newest (drop superseded post-TTL). CLI reads the existing set from a path arg and writes the new set to stdout (header documents the temp-file redirect so the shell's `>` can't truncate the input). `gen-jwks.test.ts` covers prepend (length+1, fresh kid first, old set preserved) + prune (→ 1 key). Runbook documents: the two-sided install (Kratos signer env/mount + web `JWKS_URL`, `file://` hot-reloads / `base64://` immutable), why it's zero-downtime (sign-with-first + verify-by-`kid`), the **scheduled** path (prepend → `restart kratos` → verify new kid → wait ~12 min = 10m TTL + skew → prune; rollback before prune) and the **emergency** path (replace with a single key → every leaked-key token fails signature → forced re-login; the §9 denylist is moot since the signature is already invalid). Verified the CLI live against the committed dev JWKS (bare→1 key, `--prepend`→2 with the old kid second, `--prune`→1). Docs/CLI-only behaviour, covered by units (per the §9 precedent, no new browser E2E). README Status + Layout updated. typecheck + **335 units** green (333 → 335).
- [x] Refresh README `Layout` + drop `_(planned)_` markers as pieces land. → The `_(planned)_` markers were already dropped as each piece landed (swept the whole README — none remain, and the **Status** paragraph already reflects the built state). Refreshed the `Layout` block, which had drifted: added the three source modules it was missing — `fetch-timeout.ts` (`withTimeout()`, the Ory outbound-call deadline wrapper, §8), `guards.ts` (`requireSession()/can()/check()`, in-handler authz + `GuardError`, §4), `hooks.ts` (`runBoot/Request/ResponseHooks()`, plugin lifecycle, §2) — plus the `scripts/ci.sh` CI gate (§8). Cross-checked mechanically: every non-test `src/*.ts` and every top-level dir (bar `node_modules`) now has a line; `public/`/`plugins/`/`examples/` descriptions still match their contents. Docs-only.
-- [ ] Run the architecture and the product reviewer agents on the _whole_ project, not just the latest changes, and address their issues.
+- [x] Run the architecture and the product reviewer agents on the _whole_ project, not just the latest changes, and address their issues. → Ran both (systems-architect + product-owner) on the whole project: **no Critical/High** — both report a converged, fit-for-purpose scaffold. **Fixed now (tests-first), the in-scope §9 customer-facing/security items:** (1) **`return_to` deep-link preservation** (product 🟡, explicitly deferred *to* §9) — a gated request hit while signed out (the plugin-route gate in `app.ts`, `requireSession`, `requireAdmin`) now bounces to `/login?return_to=` via a new `loginRedirect(ctx)` (GET/HEAD only, skips `/`). `/login` bakes it into the Kratos flow: a **host-relative** target is wrapped through `/auth/complete?return_to=` so the session JWT is minted *before* the user lands there, while an **absolute** target (the §6 OAuth2 login challenge) passes to Kratos unchanged (Kratos allow-lists it). `/auth/complete` redirects to the requested page; the OAuth2 path is untouched. (2) **`safeUrl()` + `localPath()`** — the doc-promised §9 URL-safety helpers, new pure `src/safe-url.ts`: `safeUrl` sanitises an untrusted href/src to relative-or-`http(s)` (else `"#"`), exported to plugins via `plugin-api.ts` and documented in the contract with a usage snippet (closing the "planned for §9" vapor pointer); `localPath` is the host-relative redirect-allowlist guard backing `return_to` (rejects `//evil.com`/`https:`/control chars, double-checked at both `/login` and `/auth/complete` so a crafted `?return_to=` can't open-redirect). (3) **Honest 503 on Ory-unreachable sign-in** (product 🟡) — the auth-flow block renders a new `views/503.ejs` ("sign-in temporarily unavailable") on a Kratos 5xx/network throw instead of the misattributed catch-all 500; an expired-flow 4xx still restarts, other 4xx still rethrow. Tests-first throughout: `safe-url.test.ts` (scheme/obfuscation + host-relative matrix); `guards`/`admin-nav`/`app` assertions updated to the new `return_to` contract + the `/login` wrap + the 503 (incl. a network-throw case) + `/auth/complete` honour/reject; `plugin-api` export guard; a full-flow **E2E** (deep link → login → lands on the page) and the visual-suite gated-bounce assertion. Stability-reviewer on the diff: **APPROVE** (no Critical/High) — open-redirect double-layered + the OAuth2 path verified intact; addressed its one Medium (scoped the 503 catch to genuine connectivity errors — the success render moved out of the `try` so a template bug hits the 500 with a stack, not a misleading 503) and its NBSP-strip-set note (documenting test). README (Production return_to/503 note, Layout) + `docs/plugin-contract.md` (return_to framing, `safeUrl` usage) updated. typecheck + **339 units** + the full `scripts/ci.sh` gate green (visual 9 · auth 1 · oauth 2 · full 7). **Deferred, with justification:** the **`app.ts` route-table** refactor (arch Medium — fold the admin/oauth if-ladder into a `{method,prefix,handler}` table, fixing the built-in-route 405/`allowedMethods` blindness + the dep-absent-404 honesty) → the architect explicitly recommends a **standalone change** (not bundled into review-fixes) and it's a **§10 prerequisite**; the **mock dashboard + dead `#` affordances** (arch/product) → **§10 line 139** (gate `/` + make it plugin-replaceable); **public-page blessing/docs** (product 🟢) → **§10 line 140**; success-flash after writes (stateless, known) and plugin-upstream-config visibility (no change recommended) → left as-is.
- [ ] Go over all comments in the code and the README and try to make it shorter and more information dense. Remove not strictly needed stuff.
- [ ] Go over all tests and combine/unify ones that cover the same stuff or are very related and could be combined in a good way. Remove tests that aren't helping, we only want tests that are actually helpful to us.
diff --git a/views/503.ejs b/views/503.ejs
new file mode 100644
index 0000000..b629904
--- /dev/null
+++ b/views/503.ejs
@@ -0,0 +1,16 @@
+
+
+
+
+
+ <%= title %>
+
+
+
+
+
Sign-in is temporarily unavailable
+
We can't reach the identity service right now (503). Please try again in a moment.