diff --git a/AGENTS.md b/AGENTS.md index ca70869..c7383bc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -37,10 +37,10 @@ commands and layout. Docker Compose. **Never run `node`, `npm`, or `tsc` on the host.** ```bash -docker compose up # dev server, live reload -docker compose run --rm web npm run typecheck # strict type check -docker compose run --rm web npm test # tests -docker compose -f compose.yml up --build -d # production +docker compose up # dev server, live reload +docker compose run --rm --no-deps web npm run typecheck # strict type check (--no-deps: skip Ory) +docker compose run --rm --no-deps web npm test # tests +docker compose -f compose.yml up --build -d # production ``` ## Rules diff --git a/README.md b/README.md index 34bcd8f..d838f22 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ auto-merged by `docker compose up`) turns them back off for live editing. | `REQUIRE_SECURE_SECRETS` | `false` | when `true`, the two secrets must be supplied and differ from the dev throwaways | | `KRATOS_PUBLIC_URL` / `KRATOS_ADMIN_URL` | `http://kratos:4433` / `:4434` | identity (self-service / admin) | | `KETO_READ_URL` / `KETO_WRITE_URL` | `http://keto:4466` / `:4467` | permission check / write | -| `JWKS_URL` | Kratos tokenizer JWKS | verifies the session JWT (§4) | +| `JWKS_URL` | `file://…/tokenizer/jwks.json` | the Kratos tokenizer signing key; verifies the session JWT (§4) | | `COOKIE_SECRET` / `CSRF_SECRET` | dev throwaways | enforced by `REQUIRE_SECURE_SECRETS` | ### What you must supply (the only manual prep) @@ -190,7 +190,7 @@ cookie/cipher secrets in `kratos.yml`) — a clean clone works; **never run it i production**. (Re)generate with the bundled generator: ```bash -docker compose run --rm -T web node src/gen-jwks.ts > ory/kratos/tokenizer/jwks.json +docker compose run --rm -T --no-deps web node src/gen-jwks.ts > ory/kratos/tokenizer/jwks.json ``` **Production:** mount a real key over that path, or set @@ -203,10 +203,13 @@ one for ~one token TTL (10m) so in-flight JWTs still verify, then drop it. ## Type check & tests ```bash -docker compose run --rm web npm run typecheck # strict tsc --noEmit -docker compose run --rm web npm test # node --test (units) +docker compose run --rm --no-deps web npm run typecheck # strict tsc --noEmit +docker compose run --rm --no-deps web npm test # node --test (units) ``` +`--no-deps` keeps these off the Ory stack — units need no Postgres/Kratos/Keto, and `web` +otherwise drags up its `depends_on` services. + ### End-to-end (Playwright) E2E runs in the official Playwright image (browsers preinstalled) against the live `web` diff --git a/compose.yml b/compose.yml index 078859d..43c1c61 100644 --- a/compose.yml +++ b/compose.yml @@ -20,6 +20,10 @@ services: condition: service_healthy keto: condition: service_healthy + # Read the session-JWT verify key from the same tokenizer JWKS Kratos signs with + # (config.ts JWKS_URL default; §4 verifier). Read-only — bootstrap is the only writer. + volumes: + - ./ory/kratos/tokenizer:/etc/config/kratos/tokenizer:ro restart: unless-stopped # Ory's storage only (Kratos/Keto/Hydra) — the web app never connects here. @@ -125,7 +129,10 @@ services: volumes: - ./ory/kratos/tokenizer:/etc/config/kratos/tokenizer command: node src/bootstrap.ts - restart: on-failure + # Bounded retry: the seed is idempotent (409-create + idempotent PUT), so transient Ory + # blips recover — but a permanent error must give up, not loop forever and hang `web` + # (which gates on service_completed_successfully). + restart: "on-failure:5" # Ory Hydra — OAuth2/OIDC provider (other apps log in *through* plainpages; README). # DSN is the per-service `hydra` DB (init.sql). Issuer + login/consent/logout run at diff --git a/src/config.test.ts b/src/config.test.ts index 0e785d4..14dd098 100644 --- a/src/config.test.ts +++ b/src/config.test.ts @@ -1,5 +1,7 @@ import assert from "node:assert/strict"; +import { readFileSync } from "node:fs"; import { test } from "node:test"; +import { fileURLToPath } from "node:url"; import { loadConfig } from "./config.ts"; // Explicit secure-secret enforcement (no environment sniffing): secrets are the only @@ -18,11 +20,26 @@ test("loads dev defaults when the environment is empty", () => { assert.equal(c.kratosAdminUrl, "http://kratos:4434"); assert.equal(c.ketoReadUrl, "http://keto:4466"); assert.equal(c.ketoWriteUrl, "http://keto:4467"); - assert.match(c.jwksUrl, /jwks/); assert.match(c.cookieSecret, /dev-insecure/); assert.match(c.csrfSecret, /dev-insecure/); }); +test("JWKS_URL defaults to the committed Kratos tokenizer signing key, not an http endpoint", () => { + // The session JWT is signed by the tokenizer key (kratos.yml jwks_url); Kratos does NOT + // republish it at /.well-known/jwks.json, so the §4 verifier reads that same file://. + const url = new URL(loadConfig({}).jwksUrl); + assert.equal(url.protocol, "file:"); + assert.match(url.pathname, /tokenizer\/jwks\.json$/); + + // And that file is a real ES256 signing JWKS carrying a kid (what the verifier resolves by). + const path = fileURLToPath(new URL("../ory/kratos/tokenizer/jwks.json", import.meta.url)); + const key = (JSON.parse(readFileSync(path, "utf8")) as { keys: { alg: string; kid: string; kty: string }[] }).keys[0]; + assert.ok(key, "tokenizer JWKS must have a key"); + assert.equal(key.alg, "ES256"); + assert.equal(key.kty, "EC"); + assert.ok(key.kid, "tokenizer JWKS key must carry a kid"); +}); + test("parses explicit boolean toggles and rejects non-boolean values", () => { assert.equal(loadConfig({ CACHE_TEMPLATES: "true" }).cacheTemplates, true); assert.equal(loadConfig({ CACHE_TEMPLATES: "false" }).cacheTemplates, false); diff --git a/src/config.ts b/src/config.ts index 0034b1d..01665fb 100644 --- a/src/config.ts +++ b/src/config.ts @@ -68,7 +68,11 @@ export function loadConfig(env: Env = process.env): Config { cacheTemplates: readBool(env, "CACHE_TEMPLATES", false), cookieSecret: readSecret(env, "COOKIE_SECRET", "dev-insecure-cookie-secret", requireSecure), csrfSecret: readSecret(env, "CSRF_SECRET", "dev-insecure-csrf-secret", requireSecure), - jwksUrl: readUrl(env, "JWKS_URL", "http://kratos:4433/.well-known/jwks.json"), + // The session JWT is signed by the Kratos tokenizer key (kratos.yml jwks_url); the §4 + // verifier reads that same key. Kratos does not republish it over HTTP, so default to a + // file:// of the tokenizer JWKS mounted into the web container (compose.yml) — not a + // well-known endpoint. Prod overrides with a real key (README: JWT signing key & rotation). + jwksUrl: readUrl(env, "JWKS_URL", "file:///etc/config/kratos/tokenizer/jwks.json"), ketoReadUrl: readUrl(env, "KETO_READ_URL", "http://keto:4466"), ketoWriteUrl: readUrl(env, "KETO_WRITE_URL", "http://keto:4467"), kratosAdminUrl: readUrl(env, "KRATOS_ADMIN_URL", "http://kratos:4434"), diff --git a/todo.md b/todo.md index 00f125f..4368574 100644 --- a/todo.md +++ b/todo.md @@ -70,7 +70,7 @@ everything via Docker. - [x] **One-command bootstrap** (the MVP bar): `docker compose up` brings up web + all Ory services + Postgres with *zero* manual prep. Commit working default Ory configs; auto-run migrations on first boot; auto-generate the JWKS signing key if absent; seed an admin identity + its Keto roles + a demo password (`admin`/`admin`) idempotently. Land an `OPL`/namespace bootstrap so Keto answers checks out of the box. → `src/bootstrap.ts` + a one-shot `bootstrap` compose service: runs after kratos+keto are healthy (web gates on its `service_completed_successfully`), idempotent so every `up` re-runs cleanly. (1) `ensureJwks` generates the ES256 signing key (reuses `gen-jwks.ts`) only when the committed dev key is absent — tokenizer dir mounted rw so it can land. (2) `seedAdmin` creates `admin@plainpages.local`/`admin` via the Kratos admin API (a re-run's 409 → look up + reuse the id). (3) grants `Role:admin#members@user:` via the Keto write API (PUT, idempotent) — the source of truth the §4 login flow projects into the JWT. Migrations + default Ory configs already auto-run/committed (§3); OPL/namespaces load from `keto.yml` (§3). The password policy is bypassed by the admin API, so `admin`/`admin` is accepted. Tests-first: `bootstrap.test.ts` (payload builders, seed idempotency via mock fetch, generate-if-absent) + `compose.test.ts` (service wiring). Boot-verified the whole chain on the live stack: `docker compose up --wait` seeds with zero prep, Keto `check` → `allowed:true`, login with `admin@plainpages.local`/`admin` issues a session + tokenizes a JWT; re-run → "already present"; moving the committed key → "generated a JWKS signing key". JWT `roles` stays `[]` until §4 wires the Keto→`metadata_admin` projection. typecheck + 151 units green. The first-run banner (login URL + creds) and the prod-secret/SSO exception docs are the next §3 items. - [x] First-run banner / log line printing the login URL + seeded admin creds, with a clear "change these before production" warning. → `firstRunBanner()` in `src/bootstrap.ts` (pure, testable) renders a boxed banner — login URL · seeded email/password · "⚠ change before production" — that `main()` prints after seeding. Login URL from `APP_URL` (compose default `http://localhost:3000`, overridable per deployment); creds reuse the seeded `ADMIN_EMAIL`/`ADMIN_PASSWORD`. Tests-first (`bootstrap.test.ts`: asserts URL + creds + warning present); README **Development** notes the banner. Live-verified: rebuilt bootstrap prints the banner after the admin line; typecheck + 152 units green; stack torn down. - [x] Document the *only* things that can't be auto-generated: third-party **SSO provider** client id/secret (optional — password login works without them) and **production secrets** (real cookie/CSRF secret + signing key, supplied via env, replacing the dev throwaways). Everything else must work from a clean clone. → New README **What you must supply (the only manual prep)** subsection (under Configuration) consolidates the previously-scattered facts into one authoritative list: a clean clone needs nothing; exactly two production-only things can't be auto-generated — (1) production secrets (`COOKIE_SECRET`/`CSRF_SECRET` + the JWT signing key, with `REQUIRE_SECURE_SECRETS=true` refusing throwaways) and (2) optional SSO provider creds (no creds ⇒ no button). States everything else (Ory migrations, dev signing key, demo admin + Keto roles, OPL model) is generated/seeded on first boot. Cross-links the existing SSO + JWT-rotation subsections (no duplication) and adds a pointer from **Production / deployment**. All four anchors verified; docs-only — typecheck + 152 units green. -- [ ] Run the architecture _and_ the stability reviewer agents on the _whole_ project, not just the latest changes, and address their issues. +- [x] Run the architecture _and_ the stability reviewer agents on the _whole_ project, not just the latest changes, and address their issues. → Ran both on the whole project (weighted to the §3 Ory stack). Verdict: architecture sound + disciplined, no Critical; both independently flagged the *same* top issue. **Fixed now:** (1) HIGH (both agents) — `JWKS_URL` default was `http://kratos:4433/.well-known/jwks.json`, but Kratos does **not** republish the session-tokenizer key there (no OIDC discovery on Kratos — that's Hydra), so the §4 verifier would have fetched the wrong/empty set and *no one* could be authorized. Repointed the default to `file:///etc/config/kratos/tokenizer/jwks.json` — the exact key Kratos signs with (`kratos.yml` `jwks_url`) — and mounted that tokenizer dir **read-only into `web`** (`compose.yml`) so the verifier resolves the live key in dev *and* prod (same file bootstrap regenerates). `config.test.ts` now locks the default to the tokenizer file + asserts the committed key is a real ES256 JWKS carrying a `kid` (the regression the old `/jwks/` match missed). (2) MEDIUM (stability) — `bootstrap` had uncapped `restart: on-failure`; a *permanent* seed error would loop forever and silently hang `web` (gates on `service_completed_successfully`). Capped to `on-failure:5` (seed is idempotent — 409-create + idempotent PUT — so transient Ory blips still recover, permanent ones give up loud). (3) §3's new `web` `depends_on` made the documented `docker compose run --rm web …` typecheck/test/gen-jwks commands drag up the whole Ory stack — added `--no-deps` (README + AGENTS.md). **Deferred (reviewer-scoped, not §3):** extract `buildShellContext` out of `dashboard.ts` + route built-in screens through `matchRoute`/`isAuthorized` → §5 (forcing function arrives with the 2nd/3rd screen); seed the demo admin's `metadata_admin.roles` projection so first login is non-empty → §4 (the login-completion projection owns it); enforce Ory `*.yml` prod secrets + self-service return-URLs via env → §9 (ops). typecheck + 153 units green; both compose files validated. - [ ] Go over all comments in the code and the README and try to make it shorter and more information dense. Remove not strictly needed stuff. - [ ] Go over all tests and combine/unify ones that cover the same stuff or are very related and could be combined in a good way. Remove tests that aren't helping, we only want tests that are actually helpful to us.