paperclip/server/src/__tests__/productivity-review-service.test.ts
Dotta 454edfe81e
Add recovery handoff system notices (#5289)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - Agent runs can end productively while the source issue still lacks a
durable final disposition.
> - That leaves the control plane unsure whether to resume, escalate, or
close the work.
> - Issue comments also need a presentation contract so system-authored
recovery notices can render as first-class thread messages without
overloading normal comments.
> - This pull request adds successful-run handoff recovery, comment
presentation metadata, and system notice rendering.
> - The benefit is stricter task liveness with clearer operator-facing
recovery state.

## What Changed

- Added successful-run handoff decisions, wake payloads, escalation
behavior, and recovery tests.
- Added issue comment presentation metadata with migration
`0078_white_darwin.sql` and shared/server/company portability support.
- Rendered recovery/system notices in issue chat with dedicated UI
components, fixtures, tests, and storybook/lab coverage.
- Included the current recovery model-profile hint patch so automatic
recovery follow-ups use the cheap profile.

## Verification

- `pnpm install --frozen-lockfile`
- `pnpm exec vitest run
server/src/services/recovery/successful-run-handoff.test.ts
ui/src/components/SystemNotice.test.tsx
ui/src/lib/system-notice-comment.test.ts
ui/src/components/IssueChatThreadSystemNotice.test.tsx`

## Risks

- Migration-bearing PR: merge this before any other branch that might
later add a migration.
- The branch touches both recovery services and issue-thread rendering,
so review should pay attention to recovery wake idempotency and comment
metadata compatibility.

## Model Used

- OpenAI GPT-5 Codex via Paperclip `codex_local` adapter, with
shell/git/GitHub CLI tool use.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-05-06 06:05:58 -05:00

566 lines
19 KiB
TypeScript

import { randomUUID } from "node:crypto";
import { and, eq, sql } from "drizzle-orm";
import { afterAll, afterEach, beforeAll, describe, expect, it } from "vitest";
import {
activityLog,
agents,
companies,
createDb,
heartbeatRuns,
issueComments,
issues,
} from "@paperclipai/db";
import {
getEmbeddedPostgresTestSupport,
startEmbeddedPostgresTestDatabase,
} from "./helpers/embedded-postgres.js";
import { MAX_ISSUE_REQUEST_DEPTH } from "@paperclipai/shared";
import {
DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS,
DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS,
PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX,
PRODUCTIVITY_REVIEW_ORIGIN_KIND,
productivityReviewService,
} from "../services/productivity-review.ts";
const embeddedPostgresSupport = await getEmbeddedPostgresTestSupport();
const describeEmbeddedPostgres = embeddedPostgresSupport.supported ? describe : describe.skip;
if (!embeddedPostgresSupport.supported) {
console.warn(
`Skipping embedded Postgres productivity review tests on this host: ${embeddedPostgresSupport.reason ?? "unsupported environment"}`,
);
}
describeEmbeddedPostgres("productivity review service", () => {
let tempDb: Awaited<ReturnType<typeof startEmbeddedPostgresTestDatabase>> | null = null;
let db: ReturnType<typeof createDb>;
beforeAll(async () => {
tempDb = await startEmbeddedPostgresTestDatabase("paperclip-productivity-review-");
db = createDb(tempDb.connectionString);
}, 30_000);
afterEach(async () => {
await db.execute(sql.raw(`TRUNCATE TABLE "companies" CASCADE`));
});
afterAll(async () => {
await tempDb?.cleanup();
});
async function seedAssignedIssue(opts?: {
status?: "todo" | "in_progress";
startedAt?: Date;
parentId?: string | null;
originKind?: string;
}) {
const companyId = randomUUID();
const managerId = randomUUID();
const coderId = randomUUID();
const issueId = randomUUID();
const issuePrefix = `PR${companyId.replace(/-/g, "").slice(0, 6).toUpperCase()}`;
const createdAt = new Date("2026-04-28T10:00:00.000Z");
await db.insert(companies).values({
id: companyId,
name: "Productivity Review Co",
issuePrefix,
requireBoardApprovalForNewAgents: false,
});
await db.insert(agents).values([
{
id: managerId,
companyId,
name: "CTO",
role: "cto",
status: "idle",
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
permissions: {},
},
{
id: coderId,
companyId,
name: "Coder",
role: "engineer",
status: "idle",
reportsTo: managerId,
adapterType: "codex_local",
adapterConfig: {},
runtimeConfig: {},
permissions: {},
},
]);
await db.insert(issues).values({
id: issueId,
companyId,
title: "Implement data import",
status: opts?.status ?? "in_progress",
priority: "medium",
assigneeAgentId: coderId,
parentId: opts?.parentId ?? null,
originKind: opts?.originKind ?? "manual",
issueNumber: 1,
identifier: `${issuePrefix}-1`,
startedAt: opts?.startedAt ?? createdAt,
createdAt,
updatedAt: createdAt,
});
return { companyId, managerId, coderId, issueId, issuePrefix, createdAt };
}
async function insertRuns(input: {
companyId: string;
agentId: string;
issueId: string;
count: number;
now: Date;
withRunComments?: boolean;
}) {
const runs: Array<typeof heartbeatRuns.$inferInsert> = [];
for (let index = 0; index < input.count; index += 1) {
const runId = randomUUID();
const createdAt = new Date(input.now.getTime() - index * 60_000);
runs.push({
id: runId,
companyId: input.companyId,
agentId: input.agentId,
status: "succeeded",
invocationSource: "assignment",
triggerDetail: "system",
startedAt: createdAt,
finishedAt: new Date(createdAt.getTime() + 30_000),
contextSnapshot: { issueId: input.issueId, taskId: input.issueId },
livenessState: "advanced",
nextAction: "Continue processing the next batch.",
createdAt,
updatedAt: createdAt,
});
}
await db.insert(heartbeatRuns).values(runs);
if (input.withRunComments) {
await db.insert(issueComments).values(
runs.map((run, index) => ({
companyId: input.companyId,
issueId: input.issueId,
authorAgentId: input.agentId,
createdByRunId: run.id,
body: `Progress update ${index}`,
createdAt: run.createdAt as Date,
updatedAt: run.createdAt as Date,
})),
);
}
return runs;
}
async function listProductivityReviews(companyId: string) {
return db
.select()
.from(issues)
.where(and(eq(issues.companyId, companyId), eq(issues.originKind, PRODUCTIVITY_REVIEW_ORIGIN_KIND)))
.orderBy(issues.createdAt);
}
async function listRefreshComments(reviewIssueId: string) {
return db
.select()
.from(issueComments)
.where(and(
eq(issueComments.issueId, reviewIssueId),
sql`${issueComments.body} like ${`${PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX}%`}`,
))
.orderBy(issueComments.createdAt);
}
it("creates exactly one manager-assigned review for a no-comment run streak and rate-limits immediate refresh", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
const service = productivityReviewService(db);
const first = await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
const second = await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
expect(first.created).toBe(1);
expect(second.updated).toBe(0);
expect(second.existing).toBe(1);
const reviews = await listProductivityReviews(seeded.companyId);
expect(reviews).toHaveLength(1);
expect(reviews[0]?.parentId).toBe(seeded.issueId);
expect(reviews[0]?.assigneeAgentId).toBe(seeded.managerId);
expect(reviews[0]?.assigneeAdapterOverrides).toEqual({ modelProfile: "cheap" });
expect(reviews[0]?.originId).toBe(seeded.issueId);
expect(reviews[0]?.originFingerprint).toBe(`productivity-review:${seeded.issueId}`);
expect(reviews[0]?.description).toContain("Primary trigger: `no_comment_streak`");
expect(reviews[0]?.description).toContain("No-comment completed-run streak: 10");
expect(await listRefreshComments(reviews[0]!.id)).toHaveLength(0);
});
it("refreshes open productivity reviews only once per interval and caps refresh comments", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
const service = productivityReviewService(db);
await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
const [review] = await listProductivityReviews(seeded.companyId);
const firstRefreshAt = new Date(now.getTime() + DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS);
const firstRefresh = await service.reconcileProductivityReviews({
now: firstRefreshAt,
companyId: seeded.companyId,
});
const tooSoonRefresh = await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + 30 * 60 * 1000),
companyId: seeded.companyId,
});
await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS),
companyId: seeded.companyId,
});
await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + 2 * DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS),
companyId: seeded.companyId,
});
const cappedRefresh = await service.reconcileProductivityReviews({
now: new Date(firstRefreshAt.getTime() + 3 * DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS),
companyId: seeded.companyId,
});
expect(firstRefresh.updated).toBe(1);
expect(tooSoonRefresh.updated).toBe(0);
expect(tooSoonRefresh.existing).toBe(1);
expect(cappedRefresh.updated).toBe(0);
expect(cappedRefresh.existing).toBe(1);
expect(await listRefreshComments(review!.id)).toHaveLength(DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS);
});
it("caps productivity review creation per source issue in the rolling creation window", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
await db.insert(issues).values(
[8, 9, 10].map((hoursAgo, index) => {
const createdAt = new Date(now.getTime() - hoursAgo * 60 * 60 * 1000);
return {
id: randomUUID(),
companyId: seeded.companyId,
title: `Completed productivity review ${index + 1}`,
status: "done",
priority: "high",
originKind: PRODUCTIVITY_REVIEW_ORIGIN_KIND,
originId: seeded.issueId,
originFingerprint: `productivity-review:${seeded.issueId}`,
parentId: seeded.issueId,
issueNumber: index + 2,
identifier: `${seeded.issuePrefix}-${index + 2}`,
createdAt,
updatedAt: createdAt,
};
}),
);
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.created).toBe(0);
expect(result.creationCapped).toBe(1);
expect(await listProductivityReviews(seeded.companyId)).toHaveLength(3);
});
it("does not count cancelled productivity reviews toward the creation cap", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
await db.insert(issues).values(
[8, 9, 10].map((hoursAgo, index) => {
const createdAt = new Date(now.getTime() - hoursAgo * 60 * 60 * 1000);
return {
id: randomUUID(),
companyId: seeded.companyId,
title: `Cancelled productivity review ${index + 1}`,
status: "cancelled",
priority: "high",
originKind: PRODUCTIVITY_REVIEW_ORIGIN_KIND,
originId: seeded.issueId,
originFingerprint: `productivity-review:${seeded.issueId}`,
parentId: seeded.issueId,
issueNumber: index + 2,
identifier: `${seeded.issuePrefix}-${index + 2}`,
createdAt,
updatedAt: createdAt,
};
}),
);
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.created).toBe(1);
expect(result.creationCapped).toBe(0);
expect(await listProductivityReviews(seeded.companyId)).toHaveLength(4);
});
it("creates a long-active review without enabling a continuation hold", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue({
status: "in_progress",
startedAt: new Date(now.getTime() - 7 * 60 * 60 * 1000),
});
const service = productivityReviewService(db);
const result = await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
const hold = await service.isProductivityReviewContinuationHoldActive({
companyId: seeded.companyId,
issueId: seeded.issueId,
agentId: seeded.coderId,
now,
});
expect(result.created).toBe(1);
const [review] = await listProductivityReviews(seeded.companyId);
expect(review?.description).toContain("Primary trigger: `long_active_duration`");
expect(review?.priority).toBe("medium");
expect(hold.held).toBe(false);
});
it("creates a high-churn review even when every sampled run has a progress comment", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: 10,
now,
withRunComments: true,
});
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.created).toBe(1);
const [review] = await listProductivityReviews(seeded.companyId);
expect(review?.description).toContain("Primary trigger: `high_churn`");
expect(review?.description).toContain("Runs in rolling windows: 10/1h");
});
it("ignores non-assignee comments when evaluating high-churn productivity reviews", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: 9,
now,
});
const managerRuns = await insertRuns({
companyId: seeded.companyId,
agentId: seeded.managerId,
issueId: seeded.issueId,
count: 10,
now,
});
await db.insert(issueComments).values(
managerRuns.map((run, index) => ({
companyId: seeded.companyId,
issueId: seeded.issueId,
authorAgentId: seeded.managerId,
createdByRunId: run.id,
body: `Manager note ${index}`,
createdAt: run.createdAt as Date,
updatedAt: run.createdAt as Date,
})),
);
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.created).toBe(0);
expect(await listProductivityReviews(seeded.companyId)).toHaveLength(0);
});
it("skips productivity-review descendants so reviews cannot recursively spawn reviews", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
const reviewId = randomUUID();
const childId = randomUUID();
await db.insert(issues).values({
id: reviewId,
companyId: seeded.companyId,
title: "Existing productivity review",
status: "todo",
priority: "high",
originKind: PRODUCTIVITY_REVIEW_ORIGIN_KIND,
originId: seeded.issueId,
originFingerprint: `productivity-review:${seeded.issueId}`,
parentId: seeded.issueId,
issueNumber: 2,
identifier: `${seeded.issuePrefix}-2`,
});
await db.insert(issues).values({
id: childId,
companyId: seeded.companyId,
title: "Review follow-up child",
status: "in_progress",
priority: "medium",
assigneeAgentId: seeded.coderId,
parentId: reviewId,
issueNumber: 3,
identifier: `${seeded.issuePrefix}-3`,
startedAt: new Date(now.getTime() - 7 * 60 * 60 * 1000),
});
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: childId,
count: 10,
now,
});
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
const reviews = await listProductivityReviews(seeded.companyId);
expect(result.created).toBe(0);
expect(reviews).toHaveLength(1);
});
it("treats a recently completed review as a snooze window", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: 10,
now,
});
const service = productivityReviewService(db);
await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
const [review] = await listProductivityReviews(seeded.companyId);
await db
.update(issues)
.set({ status: "done", updatedAt: now })
.where(eq(issues.id, review!.id));
const result = await service.reconcileProductivityReviews({
now: new Date(now.getTime() + 30 * 60 * 1000),
companyId: seeded.companyId,
});
const reviews = await listProductivityReviews(seeded.companyId);
expect(result.snoozed).toBe(1);
expect(reviews).toHaveLength(1);
});
it("reports and logs soft-stop holds for open no-comment reviews", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
const [latestRun] = await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: 10,
now,
});
const service = productivityReviewService(db);
await service.reconcileProductivityReviews({ now, companyId: seeded.companyId });
const [review] = await listProductivityReviews(seeded.companyId);
const hold = await service.isProductivityReviewContinuationHoldActive({
companyId: seeded.companyId,
issueId: seeded.issueId,
agentId: seeded.coderId,
now,
});
expect(hold.held).toBe(true);
if (!hold.held) return;
await service.recordContinuationHold({
companyId: seeded.companyId,
issueId: seeded.issueId,
runId: latestRun!.id as string,
agentId: seeded.coderId,
reviewIssueId: review!.id,
trigger: hold.trigger,
reason: hold.reason,
});
const activities = await db
.select()
.from(activityLog)
.where(eq(activityLog.action, "issue.productivity_review_continuation_held"));
expect(activities).toHaveLength(1);
expect(activities[0]?.entityId).toBe(seeded.issueId);
});
it("clamps poisoned requestDepth metadata instead of aborting productivity reconciliation", async () => {
const now = new Date("2026-04-28T12:00:00.000Z");
const seeded = await seedAssignedIssue();
await db
.update(issues)
.set({ requestDepth: 2_147_483_647 })
.where(eq(issues.id, seeded.issueId));
await insertRuns({
companyId: seeded.companyId,
agentId: seeded.coderId,
issueId: seeded.issueId,
count: DEFAULT_PRODUCTIVITY_REVIEW_NO_COMMENT_STREAK_RUNS,
now,
});
const result = await productivityReviewService(db).reconcileProductivityReviews({
now,
companyId: seeded.companyId,
});
expect(result.failed).toBe(0);
const [review] = await listProductivityReviews(seeded.companyId);
expect(review?.requestDepth).toBe(MAX_ISSUE_REQUEST_DEPTH);
});
});