[codex] Add configurable liveness auto-recovery controls (#4587)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - Heartbeat liveness recovery decides when stalled issue trees need
manager-visible follow-up.
> - Automatic recovery issue creation is useful, but operators need
instance-level controls for how aggressive it is.
> - Without controls, recovery behavior is harder to tune for local
development, production operations, and noisy edge cases.
> - This pull request adds configurable liveness auto-recovery settings
across shared contracts, API routes, services, and the instance
experimental settings UI.
> - The benefit is that operators can keep liveness findings advisory or
enable bounded recovery automation with explicit intervals and lookback
windows.

## What Changed

- Added shared types and validators for liveness auto-recovery settings.
- Extended instance settings routes and services to persist and validate
the new controls.
- Wired heartbeat/recovery services to honor enablement, minimum
interval, and lookback settings.
- Added UI controls for liveness recovery under instance experimental
settings.
- Covered the new server behavior with instance settings and liveness
escalation tests.

## Verification

- `pnpm exec vitest run --project @paperclipai/server
server/src/__tests__/heartbeat-issue-liveness-escalation.test.ts
server/src/__tests__/instance-settings-routes.test.ts --pool=forks
--poolOptions.forks.isolate=true`
- `pnpm --filter @paperclipai/shared typecheck`
- `pnpm --filter @paperclipai/server typecheck`
- `pnpm --filter @paperclipai/ui typecheck`

## Risks

- Moderate behavioral risk because recovery automation timing changes
when enabled; defaults keep existing advisory behavior unless the
setting is turned on.
- No database migration in this PR; settings are stored through the
existing instance settings path.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, `gpt-5`, coding model with tool use and local command
execution; context window not exposed by the runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-04-27 08:46:44 -05:00 committed by GitHub
parent f0f9460d1d
commit fda296ee4f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 679 additions and 54 deletions

View file

@ -103,6 +103,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
await instanceSettingsService(db).updateExperimental({
enableIssueGraphLivenessAutoRecovery: false,
enableIsolatedWorkspaces: false,
issueGraphLivenessAutoRecoveryLookbackHours: 24,
});
});
@ -116,7 +117,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
});
}
async function seedBlockedChain(opts: { stale?: boolean } = {}) {
async function seedBlockedChain(opts: { outsideLookback?: boolean } = {}) {
const companyId = randomUUID();
const managerId = randomUUID();
const coderId = randomUUID();
@ -157,9 +158,9 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
},
]);
const issueTimestamp = opts.stale === false
? new Date()
: new Date(Date.now() - 25 * 60 * 60 * 1000);
const issueTimestamp = opts.outsideLookback === true
? new Date(Date.now() - 25 * 60 * 60 * 1000)
: new Date(Date.now() - 60 * 60 * 1000);
await db.insert(issues).values([
{
id: blockedIssueId,
@ -197,6 +198,9 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
}
it("keeps liveness findings advisory when auto recovery is disabled", async () => {
await instanceSettingsService(db).updateExperimental({
enableIssueGraphLivenessAutoRecovery: false,
});
const { companyId } = await seedBlockedChain();
const heartbeat = heartbeatService(db);
@ -214,16 +218,16 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
expect(escalations).toHaveLength(0);
});
it("does not create recovery issues until the dependency path is stale for 24 hours", async () => {
it("does not create recovery issues outside the configured lookback window", async () => {
await enableAutoRecovery();
const { companyId } = await seedBlockedChain({ stale: false });
const { companyId } = await seedBlockedChain({ outsideLookback: true });
const heartbeat = heartbeatService(db);
const result = await heartbeat.reconcileIssueGraphLiveness();
expect(result.findings).toBe(1);
expect(result.escalationsCreated).toBe(0);
expect(result.skippedAutoRecoveryTooYoung).toBe(1);
expect(result.skippedOutsideLookback).toBe(1);
const escalations = await db
.select()
@ -424,7 +428,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
const dependentExecutionWorkspaceId = randomUUID();
const blockerExecutionWorkspaceId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const issueTimestamp = new Date(Date.now() - 25 * 60 * 60 * 1000);
const issueTimestamp = new Date(Date.now() - 60 * 60 * 1000);
await db.insert(companies).values({
id: companyId,
@ -559,7 +563,7 @@ describeEmbeddedPostgres("heartbeat issue graph liveness escalation", () => {
const { companyId, blockedIssueId, blockerIssueId } = await seedBlockedChain();
const secondBlockedIssueId = randomUUID();
const issuePrefix = `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const issueTimestamp = new Date(Date.now() - 25 * 60 * 60 * 1000);
const issueTimestamp = new Date(Date.now() - 60 * 60 * 1000);
await db.insert(issues).values({
id: secondBlockedIssueId,
companyId,

View file

@ -9,10 +9,15 @@ const mockInstanceSettingsService = vi.hoisted(() => ({
updateExperimental: vi.fn(),
listCompanyIds: vi.fn(),
}));
const mockHeartbeatService = vi.hoisted(() => ({
buildIssueGraphLivenessAutoRecoveryPreview: vi.fn(),
reconcileIssueGraphLiveness: vi.fn(),
}));
const mockLogActivity = vi.hoisted(() => vi.fn());
function registerModuleMocks() {
vi.doMock("../services/index.js", () => ({
heartbeatService: () => mockHeartbeatService,
instanceSettingsService: () => mockInstanceSettingsService,
logActivity: mockLogActivity,
}));
@ -48,6 +53,8 @@ describe("instance settings routes", () => {
mockInstanceSettingsService.updateGeneral.mockReset();
mockInstanceSettingsService.updateExperimental.mockReset();
mockInstanceSettingsService.listCompanyIds.mockReset();
mockHeartbeatService.buildIssueGraphLivenessAutoRecoveryPreview.mockReset();
mockHeartbeatService.reconcileIssueGraphLiveness.mockReset();
mockLogActivity.mockReset();
mockInstanceSettingsService.getGeneral.mockResolvedValue({
censorUsernameInLogs: false,
@ -58,7 +65,8 @@ describe("instance settings routes", () => {
enableEnvironments: false,
enableIsolatedWorkspaces: false,
autoRestartDevServerWhenIdle: false,
enableIssueGraphLivenessAutoRecovery: false,
enableIssueGraphLivenessAutoRecovery: true,
issueGraphLivenessAutoRecoveryLookbackHours: 24,
});
mockInstanceSettingsService.updateGeneral.mockResolvedValue({
id: "instance-settings-1",
@ -74,10 +82,32 @@ describe("instance settings routes", () => {
enableEnvironments: true,
enableIsolatedWorkspaces: true,
autoRestartDevServerWhenIdle: false,
enableIssueGraphLivenessAutoRecovery: false,
enableIssueGraphLivenessAutoRecovery: true,
issueGraphLivenessAutoRecoveryLookbackHours: 24,
},
});
mockInstanceSettingsService.listCompanyIds.mockResolvedValue(["company-1", "company-2"]);
mockHeartbeatService.buildIssueGraphLivenessAutoRecoveryPreview.mockResolvedValue({
lookbackHours: 24,
cutoff: "2026-04-26T12:00:00.000Z",
generatedAt: "2026-04-27T12:00:00.000Z",
findings: 1,
recoverableFindings: 1,
skippedOutsideLookback: 0,
items: [],
});
mockHeartbeatService.reconcileIssueGraphLiveness.mockResolvedValue({
findings: 1,
autoRecoveryEnabled: true,
lookbackHours: 24,
cutoff: "2026-04-26T12:00:00.000Z",
escalationsCreated: 1,
existingEscalations: 0,
skipped: 0,
skippedAutoRecoveryDisabled: 0,
skippedOutsideLookback: 0,
escalationIssueIds: ["issue-2"],
});
});
it("allows local board users to read and update experimental settings", async () => {
@ -94,7 +124,8 @@ describe("instance settings routes", () => {
enableEnvironments: false,
enableIsolatedWorkspaces: false,
autoRestartDevServerWhenIdle: false,
enableIssueGraphLivenessAutoRecovery: false,
enableIssueGraphLivenessAutoRecovery: true,
issueGraphLivenessAutoRecoveryLookbackHours: 24,
});
const patchRes = await request(app)
@ -138,14 +169,58 @@ describe("instance settings routes", () => {
await request(app)
.patch("/api/instance/settings/experimental")
.send({ enableIssueGraphLivenessAutoRecovery: true })
.send({
enableIssueGraphLivenessAutoRecovery: true,
issueGraphLivenessAutoRecoveryLookbackHours: 12,
})
.expect(200);
expect(mockInstanceSettingsService.updateExperimental).toHaveBeenCalledWith({
enableIssueGraphLivenessAutoRecovery: true,
issueGraphLivenessAutoRecoveryLookbackHours: 12,
});
});
it("previews issue graph liveness recovery candidates before enabling", async () => {
const app = await createApp({
type: "board",
userId: "local-board",
source: "local_implicit",
isInstanceAdmin: true,
});
const res = await request(app)
.post("/api/instance/settings/experimental/issue-graph-liveness-auto-recovery/preview")
.send({ lookbackHours: 12 })
.expect(200);
expect(res.body).toMatchObject({ lookbackHours: 24, recoverableFindings: 1 });
expect(mockHeartbeatService.buildIssueGraphLivenessAutoRecoveryPreview).toHaveBeenCalledWith({
lookbackHours: 12,
});
});
it("kicks off issue graph liveness recovery on demand", async () => {
const app = await createApp({
type: "board",
userId: "local-board",
source: "local_implicit",
isInstanceAdmin: true,
});
await request(app)
.post("/api/instance/settings/experimental/issue-graph-liveness-auto-recovery/run")
.send({ lookbackHours: 12 })
.expect(200);
expect(mockHeartbeatService.reconcileIssueGraphLiveness).toHaveBeenCalledWith({
runId: null,
force: true,
lookbackHours: 12,
});
expect(mockLogActivity).toHaveBeenCalledTimes(2);
});
it("allows local board users to update environment controls", async () => {
const app = await createApp({
type: "board",