mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-17 19:20:39 +09:00
fix: harden release registry verification against npm lag (#4816)
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - Its release automation publishes canary packages to npm and then validates the published registry state before considering the release healthy > - The failing canary run `25139465018` showed that npm can expose a newly published version through version-specific endpoints before the root package document has fully converged > - That made a successful canary publish look like a failed release because the verifier trusted stale root metadata too early > - This pull request hardens the registry verification path by preferring version-specific manifest checks, retrying convergence-sensitive failures, and distinguishing permanent failures from propagation lag > - While validating that change in CI, a separate teardown race in `heartbeat-stale-queue-invalidation.test.ts` surfaced and was hardened so the PR could pass reliably > - The benefit is that transient npm propagation lag no longer fails a successful canary publish, while genuine registry-state and dependency-integrity failures still stop the release flow promptly ## What Changed - Hardened `scripts/verify-release-registry-state.mjs` so it prefers version-specific manifest resolution over stale root metadata, adds bounded registry-fetch timeouts, and classifies failures as retriable vs non-retriable. - Updated `scripts/release-lib.sh` and `scripts/release.sh` so post-publish registry verification retries only convergence-sensitive failures and reports immediate permanent failures clearly. - Expanded `scripts/verify-release-registry-state.test.mjs` with regression coverage for stale root metadata, fetch timeout behavior, peer dependency range handling, non-retriable canary-latest cases, and related verifier edge cases. - Hardened `server/src/__tests__/heartbeat-stale-queue-invalidation.test.ts` teardown to tolerate the late-comment foreign-key race that CI exposed while validating this branch. ## Verification - `pnpm run test:release-registry` - `node --check scripts/verify-release-registry-state.mjs` - `bash -n scripts/release.sh && bash -n scripts/release-lib.sh` - PR checks passed on head `5c422600fc12acac61f6b7c267a4dc915df622b1`: `policy`, `verify`, `e2e`, `security/snyk`, and `Greptile Review` ## Risks - Low risk. The main behavioral changes are limited to release automation and verifier retry semantics, plus a test-only teardown hardening for a CI race. > I checked [`ROADMAP.md`](ROADMAP.md). This is a narrow release bugfix and does not overlap planned core feature work. ## Model Used - OpenAI Codex via Paperclip `codex_local` with tool use and local code execution enabled. This agent session runs on a GPT-5-class coding model; the exact backend model ID/context window is not exposed by the local adapter runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [ ] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I have addressed all Greptile and reviewer comments before requesting merge
This commit is contained in:
parent
a1b2875165
commit
a72731f118
9 changed files with 785 additions and 125 deletions
|
|
@ -19,6 +19,7 @@ import {
|
|||
issueTreeHolds,
|
||||
issues,
|
||||
} from "@paperclipai/db";
|
||||
import { ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY } from "@paperclipai/shared";
|
||||
import {
|
||||
getEmbeddedPostgresTestSupport,
|
||||
startEmbeddedPostgresTestDatabase,
|
||||
|
|
@ -87,6 +88,40 @@ async function waitForCondition(fn: () => Promise<boolean>, timeoutMs = 3_000) {
|
|||
return fn();
|
||||
}
|
||||
|
||||
async function cleanupHeartbeatInvalidationFixture(db: ReturnType<typeof createDb>) {
|
||||
for (let attempt = 0; attempt < 5; attempt += 1) {
|
||||
try {
|
||||
await db.delete(companySkills);
|
||||
await db.delete(issueComments);
|
||||
await db.delete(issueDocuments);
|
||||
await db.delete(documentRevisions);
|
||||
await db.delete(documents);
|
||||
await db.delete(issueRelations);
|
||||
await db.delete(issueTreeHolds);
|
||||
await db.delete(issues);
|
||||
await db.delete(heartbeatRunEvents);
|
||||
await db.delete(activityLog);
|
||||
await db.delete(heartbeatRuns);
|
||||
await db.delete(agentWakeupRequests);
|
||||
await db.delete(agentRuntimeState);
|
||||
await db.delete(agents);
|
||||
await db.delete(companies);
|
||||
return;
|
||||
} catch (error) {
|
||||
const isLateCommentRace =
|
||||
error instanceof Error &&
|
||||
error.message.includes("issue_comments_issue_id_issues_id_fk");
|
||||
if (!isLateCommentRace || attempt === 4) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Heartbeat completion can write issue-thread comments shortly after the
|
||||
// run leaves queued/running. Retry the dependent deletes once those land.
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type SeedOptions = {
|
||||
agentName?: string;
|
||||
agentRole?: string;
|
||||
|
|
@ -103,6 +138,9 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
let heartbeat!: ReturnType<typeof heartbeatService>;
|
||||
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;
|
||||
|
||||
const countExecuteCallsForRun = (runId: string) =>
|
||||
mockAdapterExecute.mock.calls.filter(([context]) => context?.runId === runId).length;
|
||||
|
||||
beforeAll(async () => {
|
||||
tempDb = await startEmbeddedPostgresTestDatabase("paperclip-heartbeat-stale-queue-");
|
||||
db = createDb(tempDb.connectionString);
|
||||
|
|
@ -137,22 +175,7 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||
await db.delete(companySkills);
|
||||
await db.delete(issueComments);
|
||||
await db.delete(issueDocuments);
|
||||
await db.delete(documentRevisions);
|
||||
await db.delete(documents);
|
||||
await db.delete(issueRelations);
|
||||
await db.delete(issueTreeHolds);
|
||||
await db.delete(issueComments);
|
||||
await db.delete(issues);
|
||||
await db.delete(heartbeatRunEvents);
|
||||
await db.delete(activityLog);
|
||||
await db.delete(heartbeatRuns);
|
||||
await db.delete(agentWakeupRequests);
|
||||
await db.delete(agentRuntimeState);
|
||||
await db.delete(agents);
|
||||
await db.delete(companies);
|
||||
await cleanupHeartbeatInvalidationFixture(db);
|
||||
});
|
||||
|
||||
afterAll(async () => {
|
||||
|
|
@ -230,6 +253,43 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
return { runId, wakeupRequestId };
|
||||
}
|
||||
|
||||
async function seedContinuationSummary(input: {
|
||||
companyId: string;
|
||||
issueId: string;
|
||||
agentId: string;
|
||||
body: string;
|
||||
}) {
|
||||
const documentId = randomUUID();
|
||||
const revisionId = randomUUID();
|
||||
await db.insert(documents).values({
|
||||
id: documentId,
|
||||
companyId: input.companyId,
|
||||
title: "Continuation Summary",
|
||||
format: "markdown",
|
||||
latestBody: input.body,
|
||||
latestRevisionId: revisionId,
|
||||
latestRevisionNumber: 1,
|
||||
createdByAgentId: input.agentId,
|
||||
updatedByAgentId: input.agentId,
|
||||
});
|
||||
await db.insert(documentRevisions).values({
|
||||
id: revisionId,
|
||||
companyId: input.companyId,
|
||||
documentId,
|
||||
revisionNumber: 1,
|
||||
title: "Continuation Summary",
|
||||
format: "markdown",
|
||||
body: input.body,
|
||||
createdByAgentId: input.agentId,
|
||||
});
|
||||
await db.insert(issueDocuments).values({
|
||||
companyId: input.companyId,
|
||||
issueId: input.issueId,
|
||||
documentId,
|
||||
key: ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY,
|
||||
});
|
||||
}
|
||||
|
||||
it("cancels queued runs when the issue assignee changes before the run starts", async () => {
|
||||
const { companyId, agentId } = await seedCompanyAndAgent({ agentName: "OriginalCoder" });
|
||||
const replacementAgentId = randomUUID();
|
||||
|
|
@ -300,7 +360,7 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
expect(run?.resultJson).toMatchObject({ stopReason: "issue_assignee_changed" });
|
||||
expect(wakeup?.status).toBe("skipped");
|
||||
expect(wakeup?.error).toContain("assignee changed");
|
||||
expect(mockAdapterExecute).not.toHaveBeenCalled();
|
||||
expect(countExecuteCallsForRun(runId)).toBe(0);
|
||||
});
|
||||
|
||||
it("cancels queued runs when the issue reaches a terminal status before the run starts", async () => {
|
||||
|
|
@ -349,7 +409,7 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
expect(run?.status).toBe("cancelled");
|
||||
expect(run?.errorCode).toBe("issue_terminal_status");
|
||||
expect(wakeup?.status).toBe("skipped");
|
||||
expect(mockAdapterExecute).not.toHaveBeenCalled();
|
||||
expect(countExecuteCallsForRun(runId)).toBe(0);
|
||||
});
|
||||
|
||||
it("cancels queued max-turn continuations when the issue is no longer in_progress before the run starts", async () => {
|
||||
|
|
@ -409,7 +469,7 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
expect(run?.resultJson).toMatchObject({ stopReason: "issue_not_in_progress" });
|
||||
expect(wakeup?.status).toBe("skipped");
|
||||
expect(wakeup?.error).toContain("no longer in_progress");
|
||||
expect(mockAdapterExecute).not.toHaveBeenCalled();
|
||||
expect(countExecuteCallsForRun(runId)).toBe(0);
|
||||
});
|
||||
|
||||
it("cancels queued max-turn continuations when another continuation owns the issue lock", async () => {
|
||||
|
|
@ -497,7 +557,7 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
expect(wakeup?.status).toBe("skipped");
|
||||
expect(wakeup?.error).toContain("execution lock");
|
||||
expect(issue?.executionRunId).toBe(lockOwnerRunId);
|
||||
expect(mockAdapterExecute).not.toHaveBeenCalled();
|
||||
expect(countExecuteCallsForRun(runId)).toBe(0);
|
||||
});
|
||||
|
||||
it("cancels queued in_review runs when the current participant changes before the run starts", async () => {
|
||||
|
|
@ -577,7 +637,7 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
expect(run?.resultJson).toMatchObject({ stopReason: "issue_review_participant_changed" });
|
||||
expect(wakeup?.status).toBe("skipped");
|
||||
expect(wakeup?.error).toContain("in-review participant changed");
|
||||
expect(mockAdapterExecute).not.toHaveBeenCalled();
|
||||
expect(countExecuteCallsForRun(runId)).toBe(0);
|
||||
});
|
||||
|
||||
it("still runs comment-driven wakes on in_review issues even when the agent is no longer the current participant", async () => {
|
||||
|
|
@ -695,6 +755,77 @@ describeEmbeddedPostgres("heartbeat stale queued-run invalidation", () => {
|
|||
.then((rows) => rows[0] ?? null);
|
||||
expect(run?.status).toBe("succeeded");
|
||||
expect(run?.errorCode).toBeNull();
|
||||
expect(mockAdapterExecute).toHaveBeenCalledTimes(1);
|
||||
expect(countExecuteCallsForRun(runId)).toBe(1);
|
||||
});
|
||||
|
||||
it("cancels queued continuation recovery when the continuation summary parks executor work for review", async () => {
|
||||
const { companyId, agentId } = await seedCompanyAndAgent();
|
||||
const issueId = randomUUID();
|
||||
await db.insert(issues).values({
|
||||
id: issueId,
|
||||
companyId,
|
||||
title: "Implementation parked for review",
|
||||
status: "in_progress",
|
||||
priority: "medium",
|
||||
assigneeAgentId: agentId,
|
||||
});
|
||||
await seedContinuationSummary({
|
||||
companyId,
|
||||
issueId,
|
||||
agentId,
|
||||
body: [
|
||||
"# Continuation Summary",
|
||||
"",
|
||||
"## Next Action",
|
||||
"",
|
||||
"- Wait for reviewer feedback or approval before continuing executor work.",
|
||||
].join("\n"),
|
||||
});
|
||||
|
||||
const { runId, wakeupRequestId } = await seedQueuedRun({
|
||||
companyId,
|
||||
agentId,
|
||||
issueId,
|
||||
wakeReason: "issue_continuation_needed",
|
||||
invocationSource: "automation",
|
||||
contextExtras: {
|
||||
retryReason: "issue_continuation_needed",
|
||||
},
|
||||
});
|
||||
|
||||
await heartbeat.resumeQueuedRuns();
|
||||
|
||||
await waitForCondition(async () => {
|
||||
const run = await db
|
||||
.select({ status: heartbeatRuns.status })
|
||||
.from(heartbeatRuns)
|
||||
.where(eq(heartbeatRuns.id, runId))
|
||||
.then((rows) => rows[0] ?? null);
|
||||
return run?.status === "cancelled";
|
||||
});
|
||||
|
||||
const [run, wakeup] = await Promise.all([
|
||||
db
|
||||
.select({
|
||||
status: heartbeatRuns.status,
|
||||
errorCode: heartbeatRuns.errorCode,
|
||||
resultJson: heartbeatRuns.resultJson,
|
||||
})
|
||||
.from(heartbeatRuns)
|
||||
.where(eq(heartbeatRuns.id, runId))
|
||||
.then((rows) => rows[0] ?? null),
|
||||
db
|
||||
.select({ status: agentWakeupRequests.status, error: agentWakeupRequests.error })
|
||||
.from(agentWakeupRequests)
|
||||
.where(eq(agentWakeupRequests.id, wakeupRequestId))
|
||||
.then((rows) => rows[0] ?? null),
|
||||
]);
|
||||
|
||||
expect(run?.status).toBe("cancelled");
|
||||
expect(run?.errorCode).toBe("issue_continuation_waiting_on_review");
|
||||
expect(run?.resultJson).toMatchObject({ stopReason: "issue_continuation_waiting_on_review" });
|
||||
expect(wakeup?.status).toBe("skipped");
|
||||
expect(wakeup?.error).toContain("continuation summary says the executor should wait");
|
||||
expect(countExecuteCallsForRun(runId)).toBe(0);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@ import { describe, expect, it } from "vitest";
|
|||
import {
|
||||
ISSUE_CONTINUATION_SUMMARY_MAX_BODY_CHARS,
|
||||
buildContinuationSummaryMarkdown,
|
||||
continuationSummaryParksExecutor,
|
||||
extractContinuationSummaryNextAction,
|
||||
} from "../services/issue-continuation-summary.js";
|
||||
|
||||
describe("issue continuation summaries", () => {
|
||||
|
|
@ -83,4 +85,31 @@ describe("issue continuation summaries", () => {
|
|||
expect(body).toContain("Latest run error (adapter_failed): adapter failed");
|
||||
expect(body).toContain("Inspect the failed run, fix the cause");
|
||||
});
|
||||
|
||||
it("detects continuation summaries that explicitly park executor work for review", () => {
|
||||
const body = [
|
||||
"# Continuation Summary",
|
||||
"",
|
||||
"## Next Action",
|
||||
"",
|
||||
"- Wait for reviewer feedback or approval before continuing executor work.",
|
||||
].join("\n");
|
||||
|
||||
expect(extractContinuationSummaryNextAction(body)).toBe(
|
||||
"Wait for reviewer feedback or approval before continuing executor work.",
|
||||
);
|
||||
expect(continuationSummaryParksExecutor(body)).toBe(true);
|
||||
});
|
||||
|
||||
it("does not park executor work when the next action is still runnable", () => {
|
||||
const body = [
|
||||
"# Continuation Summary",
|
||||
"",
|
||||
"## Next Action",
|
||||
"",
|
||||
"- Re-check run `25145432006`, then move the issue to `in_review` if the final step is green.",
|
||||
].join("\n");
|
||||
|
||||
expect(continuationSummaryParksExecutor(body)).toBe(false);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue