paperclip/server/src/routes/health.ts
Dotta 8da50dbcf8
[codex] Add private browser first-admin claim flow (#6755)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - Fresh self-hosted deployments need an operator path before any
invite exists.
> - Umbrel installs are private LAN deployments, so a one-time browser
claim is appropriate only when the deployment is private and unclaimed.
> - Public deployments and installs with active invites must keep the
existing invite-only model so admin creation is not exposed broadly.
> - GitHub PR #2927 established the useful direction, but it needed to
be adapted onto current `master` rather than merged as-is.
> - This pull request adds that adapted private-only claim flow across
server, UI, docs, and regression coverage.
> - The benefit is that a fresh private Umbrel-style install can be
claimed from the browser without weakening public deployment access.

## What Changed

- Added a first-admin claim service and access route support for
one-time admin claim eligibility on private unclaimed deployments.
- Updated the bootstrap/access UI so eligible private installs show a
setup claim path, while public and invited deployments keep invite-first
behavior.
- Added a bootstrap-pending setup UX lab covering claim, invite, public,
and signed-in access states.
- Updated deployment and local development docs for authenticated
private/public behavior and the Umbrel-style claim path.
- Added server and UI regression tests for private claim, public
no-claim, active invite fallback, existing board/no-access flows, and
health exposure reporting.
- Stabilized PR handoff verification by serializing the aggregate server
Vitest workspace run, forcing `NODE_ENV=test`, and relaxing the
heartbeat batching test around legitimate recovery follow-up runs.

## Verification

- `pnpm -r typecheck`
- `pnpm build`
- `pnpm vitest --run
server/src/__tests__/heartbeat-comment-wake-batching.test.ts`
- `pnpm vitest --run
server/src/__tests__/health-dev-server-token.test.ts`
- `pnpm test:run`
- QA validation: PAP-10115 passed browser validation with screenshots
for private fresh install claim, active invite versus claim conflict,
public invite-only/claim-absent behavior, existing invite fallback, and
normal board/no-access flows.
- GitHub closeout: issue #2579 and PR #2927 were updated with the
accepted direction: adapt the implementation, do not direct-merge #2927
as-is.

## Risks

- The claim endpoint must remain private-only and one-time; a regression
here could expose admin creation on public deployments.
- Existing invite behavior must remain intact for public deployments and
installs that already have an active invite.
- The stable Vitest harness now serializes the aggregate server
workspace group; this is slower, but it avoids DB-backed suite
collisions under root workspace mode.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected - check the roadmap
first. See `CONTRIBUTING.md`.
>
> ROADMAP.md checked: this is a scoped deployment bootstrap/access fix
and does not duplicate a listed roadmap project.

## Model Used

- OpenAI GPT-5 Codex via Paperclip `codex_local` for product
engineering, implementation, and verification, with tool-enabled local
code execution. Paperclip QA browser validation was performed in
PAP-10115 by the assigned QA agent; exact adapter model metadata for
that QA run is not exposed in this PR context.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-05-27 21:15:01 -10:00

184 lines
6.1 KiB
TypeScript

import { timingSafeEqual } from "node:crypto";
import { Router } from "express";
import type { Db } from "@paperclipai/db";
import { and, count, eq, gt, inArray, isNull, sql } from "drizzle-orm";
import { heartbeatRuns, instanceUserRoles, invites } from "@paperclipai/db";
import type { DeploymentExposure, DeploymentMode } from "@paperclipai/shared";
import { readPersistedDevServerStatus, toDevServerHealthStatus, writeDevServerRestartRequest } from "../dev-server-status.js";
import { logger } from "../middleware/logger.js";
import { instanceSettingsService } from "../services/instance-settings.js";
import { serverVersion } from "../version.js";
function shouldExposeFullHealthDetails(
actorType: "none" | "board" | "agent" | null | undefined,
deploymentMode: DeploymentMode,
) {
if (deploymentMode !== "authenticated") return true;
return actorType === "board" || actorType === "agent";
}
function hasDevServerStatusToken(providedToken: string | undefined) {
const expectedToken = process.env.PAPERCLIP_DEV_SERVER_STATUS_TOKEN?.trim();
const token = providedToken?.trim();
if (!expectedToken || !token) return false;
const expected = Buffer.from(expectedToken);
const provided = Buffer.from(token);
if (expected.length !== provided.length) return false;
return timingSafeEqual(expected, provided);
}
export function healthRoutes(
db?: Db,
opts: {
deploymentMode: DeploymentMode;
deploymentExposure: DeploymentExposure;
authReady: boolean;
companyDeletionEnabled: boolean;
} = {
deploymentMode: "local_trusted",
deploymentExposure: "private",
authReady: true,
companyDeletionEnabled: true,
},
) {
const router = Router();
router.post("/dev-server/restart", async (req, res) => {
const actorType = "actor" in req ? req.actor?.type : null;
if (opts.deploymentMode === "authenticated" && actorType !== "board") {
res.status(403).json({ error: "board_access_required" });
return;
}
const persistedDevServerStatus = readPersistedDevServerStatus();
if (!persistedDevServerStatus) {
res.status(404).json({ error: "dev_server_supervisor_unavailable" });
return;
}
const restartRequired =
persistedDevServerStatus.dirty ||
persistedDevServerStatus.changedPathCount > 0 ||
persistedDevServerStatus.pendingMigrations.length > 0;
if (!restartRequired) {
res.status(409).json({ error: "restart_not_required" });
return;
}
const written = writeDevServerRestartRequest({
requestedAt: new Date().toISOString(),
reason: "manual_restart_now",
});
if (!written) {
res.status(404).json({ error: "dev_server_supervisor_unavailable" });
return;
}
res.status(202).json({ status: "restart_requested" });
});
router.get("/", async (req, res) => {
const actorType = "actor" in req ? req.actor?.type : null;
const exposeFullDetails = shouldExposeFullHealthDetails(
actorType,
opts.deploymentMode,
);
const exposeDevServerDetails =
exposeFullDetails || hasDevServerStatusToken(req.get("x-paperclip-dev-server-status-token"));
if (!db) {
res.json(
exposeFullDetails
? { status: "ok", version: serverVersion }
: { status: "ok", deploymentMode: opts.deploymentMode },
);
return;
}
try {
await db.execute(sql`SELECT 1`);
} catch (error) {
logger.warn({ err: error }, "Health check database probe failed");
res.status(503).json({
status: "unhealthy",
version: serverVersion,
error: "database_unreachable"
});
return;
}
let bootstrapStatus: "ready" | "bootstrap_pending" = "ready";
let bootstrapInviteActive = false;
if (opts.deploymentMode === "authenticated") {
const roleCount = await db
.select({ count: count() })
.from(instanceUserRoles)
.where(sql`${instanceUserRoles.role} = 'instance_admin'`)
.then((rows) => Number(rows[0]?.count ?? 0));
bootstrapStatus = roleCount > 0 ? "ready" : "bootstrap_pending";
if (bootstrapStatus === "bootstrap_pending") {
const now = new Date();
const inviteCount = await db
.select({ count: count() })
.from(invites)
.where(
and(
eq(invites.inviteType, "bootstrap_ceo"),
isNull(invites.revokedAt),
isNull(invites.acceptedAt),
gt(invites.expiresAt, now),
),
)
.then((rows) => Number(rows[0]?.count ?? 0));
bootstrapInviteActive = inviteCount > 0;
}
}
const persistedDevServerStatus = readPersistedDevServerStatus();
let devServer: ReturnType<typeof toDevServerHealthStatus> | undefined;
if (exposeDevServerDetails && persistedDevServerStatus && typeof (db as { select?: unknown }).select === "function") {
const instanceSettings = instanceSettingsService(db);
const experimentalSettings = await instanceSettings.getExperimental();
const activeRunCount = await db
.select({ count: count() })
.from(heartbeatRuns)
.where(inArray(heartbeatRuns.status, ["queued", "running"]))
.then((rows) => Number(rows[0]?.count ?? 0));
devServer = toDevServerHealthStatus(persistedDevServerStatus, {
autoRestartEnabled: experimentalSettings.autoRestartDevServerWhenIdle ?? false,
activeRunCount,
});
}
if (!exposeFullDetails) {
res.json({
status: "ok",
deploymentMode: opts.deploymentMode,
deploymentExposure: opts.deploymentExposure,
bootstrapStatus,
bootstrapInviteActive,
...(devServer ? { devServer } : {}),
});
return;
}
res.json({
status: "ok",
version: serverVersion,
deploymentMode: opts.deploymentMode,
deploymentExposure: opts.deploymentExposure,
authReady: opts.authReady,
bootstrapStatus,
bootstrapInviteActive,
features: {
companyDeletionEnabled: opts.companyDeletionEnabled,
},
...(devServer ? { devServer } : {}),
});
});
return router;
}