mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-15 10:30:37 +09:00
## Thinking Path > - Paperclip is a control plane for AI-agent companies, so issue checkout and execution ownership are core safety contracts. > - The affected subsystem is the issue service and route layer that gates agent writes by `checkoutRunId` and `executionRunId`. > - PAP-1982 exposed a stale-lock failure mode where a terminal heartbeat run could leave `executionRunId` pinned after checkout ownership had moved or been cleared. > - That stale execution lock could reject legitimate PATCH/comment/release requests from the rightful assignee after a harness restart. > - This pull request centralizes terminal-run cleanup, applies it before ownership-gated writes, and adds a board-only recovery endpoint for operator intervention. > - The benefit is that crashed or terminal runs no longer strand issues behind stale execution locks, while live execution locks still block conflicting writes. ## What Changed - Added `issueService.clearExecutionRunIfTerminal()` to atomically lock the issue/run rows and clear terminal or missing execution-run locks. - Reused stale execution-lock cleanup from checkout, `assertCheckoutOwner()`, and `release()`. - Allowed the same assigned agent/current run to adopt an unowned `in_progress` checkout after stale execution-lock cleanup. - Updated release to clear `executionRunId`, `executionAgentNameKey`, and `executionLockedAt`. - Added board-only `POST /api/issues/:id/admin/force-release` with company access checks, optional `clearAssignee=true`, and `issue.admin_force_release` audit logging. - Added embedded Postgres service tests and route integration tests for stale-lock recovery, release behavior, and admin force-release authorization/audit behavior. - Documented the new force-release API in `doc/SPEC-implementation.md`. ## Verification - `pnpm vitest run server/src/__tests__/issues-service.test.ts server/src/__tests__/issue-stale-execution-lock-routes.test.ts` passed. - `pnpm vitest run server/src/__tests__/issue-stale-execution-lock-routes.test.ts server/src/__tests__/approval-routes-idempotency.test.ts server/src/__tests__/issue-comment-reopen-routes.test.ts server/src/__tests__/issue-telemetry-routes.test.ts` passed. - `pnpm -r typecheck` passed. - `pnpm build` passed. - `git diff --check` passed. - `pnpm lint` could not run because this repo has no `lint` command. - Full `pnpm test:run` completed with 4 failures in existing route suites: `approval-routes-idempotency.test.ts` (2), `issue-comment-reopen-routes.test.ts` (1), and `issue-telemetry-routes.test.ts` (1). Those same files pass when run isolated and when run together with the new stale-lock route test, so this appears to be a whole-suite ordering/mock-isolation issue outside this patch path. ## Risks - Medium: this changes ownership-gated write behavior. The new adoption path is limited to the current run, the current assignee, `in_progress` issues, and rows with no checkout owner after terminal-lock cleanup. - Low: the admin force-release endpoint is board-only and company-scoped, but misuse can intentionally clear a live lock. It writes an audit event with prior lock IDs. - No schema or migration changes. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5 coding agent (`gpt-5`), agentic coding with terminal/tool use and local test execution. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge
286 lines
8.3 KiB
TypeScript
286 lines
8.3 KiB
TypeScript
import { randomUUID } from "node:crypto";
|
|
import express from "express";
|
|
import request from "supertest";
|
|
import { eq } from "drizzle-orm";
|
|
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
|
|
import {
|
|
activityLog,
|
|
agents,
|
|
companies,
|
|
createDb,
|
|
heartbeatRuns,
|
|
issueComments,
|
|
issueRelations,
|
|
issues,
|
|
} from "@paperclipai/db";
|
|
import {
|
|
getEmbeddedPostgresTestSupport,
|
|
startEmbeddedPostgresTestDatabase,
|
|
} from "./helpers/embedded-postgres.js";
|
|
import { errorHandler } from "../middleware/index.js";
|
|
import { issueRoutes } from "../routes/issues.js";
|
|
|
|
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
|
|
const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip;
|
|
|
|
if (!embeddedPostgresSupport.supported) {
|
|
console.warn(
|
|
`Skipping embedded Postgres stale execution lock route tests on this host: ${
|
|
embeddedPostgresSupport.reason ?? "unsupported environment"
|
|
}`,
|
|
);
|
|
}
|
|
|
|
describeEmbeddedPostgres("stale issue execution lock routes", () => {
|
|
let db!: ReturnType<typeof createDb>;
|
|
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;
|
|
|
|
beforeAll(async () => {
|
|
tempDb = await startEmbeddedPostgresTestDatabase("paperclip-stale-execution-lock-routes-");
|
|
db = createDb(tempDb.connectionString);
|
|
}, 20_000);
|
|
|
|
afterEach(async () => {
|
|
await db.delete(issueComments);
|
|
await db.delete(issueRelations);
|
|
await db.delete(activityLog);
|
|
await db.delete(issues);
|
|
await db.delete(heartbeatRuns);
|
|
await db.delete(agents);
|
|
await db.delete(companies);
|
|
});
|
|
|
|
afterAll(async () => {
|
|
await tempDb?.cleanup();
|
|
});
|
|
|
|
function createApp(actor: Express.Request["actor"]) {
|
|
const app = express();
|
|
app.use(express.json());
|
|
app.use((req, _res, next) => {
|
|
req.actor = actor;
|
|
next();
|
|
});
|
|
app.use("/api", issueRoutes(db, {} as any));
|
|
app.use(errorHandler);
|
|
return app;
|
|
}
|
|
|
|
async function seedCompanyAgentAndRuns() {
|
|
const companyId = randomUUID();
|
|
const agentId = randomUUID();
|
|
const failedRunId = randomUUID();
|
|
const currentRunId = randomUUID();
|
|
|
|
await db.insert(companies).values({
|
|
id: companyId,
|
|
name: "Paperclip",
|
|
issuePrefix: `T${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`,
|
|
requireBoardApprovalForNewAgents: false,
|
|
});
|
|
await db.insert(agents).values({
|
|
id: agentId,
|
|
companyId,
|
|
name: "CodexCoder",
|
|
role: "engineer",
|
|
status: "active",
|
|
adapterType: "codex_local",
|
|
adapterConfig: {},
|
|
runtimeConfig: {},
|
|
permissions: {},
|
|
});
|
|
await db.insert(heartbeatRuns).values([
|
|
{
|
|
id: failedRunId,
|
|
companyId,
|
|
agentId,
|
|
status: "failed",
|
|
invocationSource: "manual",
|
|
finishedAt: new Date(),
|
|
},
|
|
{
|
|
id: currentRunId,
|
|
companyId,
|
|
agentId,
|
|
status: "running",
|
|
invocationSource: "manual",
|
|
startedAt: new Date(),
|
|
},
|
|
]);
|
|
|
|
return { companyId, agentId, failedRunId, currentRunId };
|
|
}
|
|
|
|
function agentActor(companyId: string, agentId: string, runId: string): Express.Request["actor"] {
|
|
return {
|
|
type: "agent",
|
|
agentId,
|
|
companyId,
|
|
runId,
|
|
source: "agent_jwt",
|
|
};
|
|
}
|
|
|
|
function boardActor(companyId: string): Express.Request["actor"] {
|
|
return {
|
|
type: "board",
|
|
userId: "board-user",
|
|
companyIds: [companyId],
|
|
memberships: [{ companyId, membershipRole: "admin", status: "active" }],
|
|
isInstanceAdmin: false,
|
|
source: "session",
|
|
};
|
|
}
|
|
|
|
it("allows an assigned agent PATCH to recover a terminal stale executionRunId", async () => {
|
|
const { companyId, agentId, failedRunId, currentRunId } = await seedCompanyAgentAndRuns();
|
|
const issueId = randomUUID();
|
|
await db.insert(issues).values({
|
|
id: issueId,
|
|
companyId,
|
|
title: "Stale execution lock",
|
|
status: "in_progress",
|
|
priority: "high",
|
|
assigneeAgentId: agentId,
|
|
checkoutRunId: null,
|
|
executionRunId: failedRunId,
|
|
executionAgentNameKey: "codexcoder",
|
|
executionLockedAt: new Date(),
|
|
});
|
|
|
|
const res = await request(createApp(agentActor(companyId, agentId, currentRunId)))
|
|
.patch(`/api/issues/${issueId}`)
|
|
.send({ title: "Recovered execution lock" });
|
|
|
|
expect(res.status, JSON.stringify(res.body)).toBe(200);
|
|
expect(res.body.title).toBe("Recovered execution lock");
|
|
|
|
const row = await db
|
|
.select({
|
|
title: issues.title,
|
|
checkoutRunId: issues.checkoutRunId,
|
|
executionRunId: issues.executionRunId,
|
|
})
|
|
.from(issues)
|
|
.where(eq(issues.id, issueId))
|
|
.then((rows) => rows[0]);
|
|
expect(row).toEqual({
|
|
title: "Recovered execution lock",
|
|
checkoutRunId: currentRunId,
|
|
executionRunId: currentRunId,
|
|
});
|
|
});
|
|
|
|
it("allows the rightful assignee to release after the owning run failed", async () => {
|
|
const { companyId, agentId, failedRunId, currentRunId } = await seedCompanyAgentAndRuns();
|
|
const issueId = randomUUID();
|
|
await db.insert(issues).values({
|
|
id: issueId,
|
|
companyId,
|
|
title: "Failed run release",
|
|
status: "in_progress",
|
|
priority: "high",
|
|
assigneeAgentId: agentId,
|
|
checkoutRunId: failedRunId,
|
|
executionRunId: failedRunId,
|
|
executionAgentNameKey: "codexcoder",
|
|
executionLockedAt: new Date(),
|
|
});
|
|
|
|
const res = await request(createApp(agentActor(companyId, agentId, currentRunId)))
|
|
.post(`/api/issues/${issueId}/release`)
|
|
.send();
|
|
|
|
expect(res.status, JSON.stringify(res.body)).toBe(200);
|
|
|
|
const row = await db
|
|
.select({
|
|
status: issues.status,
|
|
assigneeAgentId: issues.assigneeAgentId,
|
|
checkoutRunId: issues.checkoutRunId,
|
|
executionRunId: issues.executionRunId,
|
|
executionLockedAt: issues.executionLockedAt,
|
|
})
|
|
.from(issues)
|
|
.where(eq(issues.id, issueId))
|
|
.then((rows) => rows[0]);
|
|
expect(row).toEqual({
|
|
status: "todo",
|
|
assigneeAgentId: null,
|
|
checkoutRunId: null,
|
|
executionRunId: null,
|
|
executionLockedAt: null,
|
|
});
|
|
});
|
|
|
|
it("restricts admin force-release to board users with company access and writes an audit event", async () => {
|
|
const { companyId, agentId, failedRunId, currentRunId } = await seedCompanyAgentAndRuns();
|
|
const issueId = randomUUID();
|
|
await db.insert(issues).values({
|
|
id: issueId,
|
|
companyId,
|
|
title: "Admin force release",
|
|
status: "in_progress",
|
|
priority: "high",
|
|
assigneeAgentId: agentId,
|
|
checkoutRunId: currentRunId,
|
|
executionRunId: failedRunId,
|
|
executionAgentNameKey: "codexcoder",
|
|
executionLockedAt: new Date(),
|
|
});
|
|
|
|
await request(createApp(agentActor(companyId, agentId, currentRunId)))
|
|
.post(`/api/issues/${issueId}/admin/force-release`)
|
|
.expect(403);
|
|
await request(createApp({
|
|
type: "board",
|
|
userId: "outside-user",
|
|
companyIds: [],
|
|
memberships: [],
|
|
isInstanceAdmin: false,
|
|
source: "session",
|
|
}))
|
|
.post(`/api/issues/${issueId}/admin/force-release`)
|
|
.expect(403);
|
|
|
|
const res = await request(createApp(boardActor(companyId)))
|
|
.post(`/api/issues/${issueId}/admin/force-release?clearAssignee=true`)
|
|
.send();
|
|
|
|
expect(res.status, JSON.stringify(res.body)).toBe(200);
|
|
expect(res.body.issue).toMatchObject({
|
|
id: issueId,
|
|
assigneeAgentId: null,
|
|
checkoutRunId: null,
|
|
executionRunId: null,
|
|
executionLockedAt: null,
|
|
});
|
|
expect(res.body.previous).toEqual({
|
|
checkoutRunId: currentRunId,
|
|
executionRunId: failedRunId,
|
|
});
|
|
|
|
const audit = await db
|
|
.select({
|
|
action: activityLog.action,
|
|
actorType: activityLog.actorType,
|
|
actorId: activityLog.actorId,
|
|
details: activityLog.details,
|
|
})
|
|
.from(activityLog)
|
|
.where(eq(activityLog.action, "issue.admin_force_release"))
|
|
.then((rows) => rows[0]);
|
|
expect(audit).toMatchObject({
|
|
action: "issue.admin_force_release",
|
|
actorType: "user",
|
|
actorId: "board-user",
|
|
details: {
|
|
issueId,
|
|
actorUserId: "board-user",
|
|
prevCheckoutRunId: currentRunId,
|
|
prevExecutionRunId: failedRunId,
|
|
clearAssignee: true,
|
|
},
|
|
});
|
|
});
|
|
});
|