[codex] Bound productivity review recovery loops (#4948)

## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - The heartbeat/productivity review subsystem detects when assigned
work is likely stuck or churning.
> - Productivity reviews are useful, but repeated reconciliation can
create noisy refresh comments or repeated review issues around the same
source issue.
> - That makes manager follow-up harder because the signal can get
buried under duplicate review activity.
> - This pull request bounds productivity review refreshes and creation
loops while preserving the existing escalation path.
> - The benefit is a quieter recovery loop that still surfaces stuck or
high-churn work for manager attention.

## What Changed

- Added refresh throttling for open productivity review issues,
including a one-hour default interval and a maximum of three refresh
comments per open review.
- Added a rolling 24-hour creation cap so completed/closed reviews
cannot immediately recreate review issues indefinitely for the same
source issue.
- Excluded cancelled productivity reviews from the creation cap so
manager cancellations do not silently suppress future legitimate
reviews.
- Preserved productivity review timestamps in deterministic test paths
and added targeted coverage for immediate refresh suppression, refresh
caps, creation caps, and cancelled-review exclusion.

## Verification

- `pnpm run preflight:workspace-links && pnpm exec vitest run
server/src/__tests__/productivity-review-service.test.ts`
- `pnpm exec vitest run
server/src/__tests__/productivity-review-service.test.ts`
- Greptile Review: 5/5 on commit
`bcf25832d0ffae25890b2ee7eed112d1c2d114fe` with review threads resolved.
- GitHub PR checks passed on the latest head: `policy`, `verify`, `e2e`,
`Greptile Review`, and `security/snyk (cryppadotta)`.
- Verified the branch is rebased onto `public-gh/master` with no
conflicts.
- Verified the diff does not include `pnpm-lock.yaml`, database schema
changes, or migrations.

## Risks

- Low-to-medium risk: this changes automation cadence for productivity
reviews. A truly stuck issue may receive fewer repeated refresh
comments, but the original review issue remains open and assigned for
manager action.
- No migration risk: this is server logic and tests only.

> Checked [`ROADMAP.md`](ROADMAP.md) for overlapping planned core work;
this is a targeted recovery-loop fix and does not add a new roadmap
feature.

## Model Used

- OpenAI Codex coding agent, GPT-5 model family, tool-using software
engineering mode. Exact context window is not exposed in this runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots (not applicable; server-only change)
- [x] I have updated relevant documentation to reflect my changes (not
applicable; no user-facing docs or commands changed)
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
Dotta 2026-05-01 08:32:04 -05:00 committed by GitHub
parent d2dd759caa
commit 42a299fb9d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 266 additions and 11 deletions

View file

@ -22,12 +22,17 @@ export const DEFAULT_PRODUCTIVITY_REVIEW_LONG_ACTIVE_HOURS = 6;
export const DEFAULT_PRODUCTIVITY_REVIEW_HIGH_CHURN_HOURLY = 10;
export const DEFAULT_PRODUCTIVITY_REVIEW_HIGH_CHURN_SIX_HOURS = 30;
export const DEFAULT_PRODUCTIVITY_REVIEW_RESOLVED_SNOOZE_MS = 6 * 60 * 60 * 1000;
export const DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS = 60 * 60 * 1000;
export const DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS = 3;
export const DEFAULT_PRODUCTIVITY_REVIEW_CREATION_WINDOW_MS = 24 * 60 * 60 * 1000;
export const DEFAULT_PRODUCTIVITY_REVIEW_MAX_CREATIONS_PER_WINDOW = 3;
const TERMINAL_RUN_STATUSES = ["succeeded", "failed", "cancelled", "timed_out"] as const;
const ACTIVE_RUN_STATUSES = ["queued", "running", "scheduled_retry"] as const;
const MAX_CANDIDATE_ISSUES = 250;
const MAX_RUNS_FOR_STREAK = 100;
const MAX_PARENT_WALK_DEPTH = 25;
export const PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX = "Productivity review evidence refreshed.";
type IssueRow = typeof issues.$inferSelect;
type AgentRow = typeof agents.$inferSelect;
@ -40,6 +45,10 @@ type ProductivityReviewThresholds = {
highChurnHourly: number;
highChurnSixHours: number;
resolvedSnoozeMs: number;
refreshIntervalMs: number;
maxRefreshComments: number;
creationWindowMs: number;
maxCreationsPerWindow: number;
};
type ProductivityReviewEvidence = {
@ -120,6 +129,11 @@ function readPositiveInteger(value: number, fallback: number) {
return Number.isFinite(value) && value > 0 ? Math.floor(value) : fallback;
}
function coerceDate(value: Date | string | null | undefined) {
if (!value) return null;
return value instanceof Date ? value : new Date(value);
}
function buildThresholds(overrides?: Partial<ProductivityReviewThresholds>): ProductivityReviewThresholds {
return {
noCommentStreakRuns: readPositiveInteger(
@ -142,6 +156,22 @@ function buildThresholds(overrides?: Partial<ProductivityReviewThresholds>): Pro
overrides?.resolvedSnoozeMs ?? DEFAULT_PRODUCTIVITY_REVIEW_RESOLVED_SNOOZE_MS,
DEFAULT_PRODUCTIVITY_REVIEW_RESOLVED_SNOOZE_MS,
),
refreshIntervalMs: readPositiveInteger(
overrides?.refreshIntervalMs ?? DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS,
DEFAULT_PRODUCTIVITY_REVIEW_REFRESH_INTERVAL_MS,
),
maxRefreshComments: readPositiveInteger(
overrides?.maxRefreshComments ?? DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS,
DEFAULT_PRODUCTIVITY_REVIEW_MAX_REFRESH_COMMENTS,
),
creationWindowMs: readPositiveInteger(
overrides?.creationWindowMs ?? DEFAULT_PRODUCTIVITY_REVIEW_CREATION_WINDOW_MS,
DEFAULT_PRODUCTIVITY_REVIEW_CREATION_WINDOW_MS,
),
maxCreationsPerWindow: readPositiveInteger(
overrides?.maxCreationsPerWindow ?? DEFAULT_PRODUCTIVITY_REVIEW_MAX_CREATIONS_PER_WINDOW,
DEFAULT_PRODUCTIVITY_REVIEW_MAX_CREATIONS_PER_WINDOW,
),
};
}
@ -249,6 +279,69 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
.then((rows) => rows[0] ?? null);
}
async function countRecentProductivityReviews(
companyId: string,
sourceIssueId: string,
thresholds: ProductivityReviewThresholds,
now: Date,
) {
const cutoff = new Date(now.getTime() - thresholds.creationWindowMs);
return db
.select({ count: sql<number>`count(*)::int` })
.from(issues)
.where(
and(
eq(issues.companyId, companyId),
eq(issues.originKind, PRODUCTIVITY_REVIEW_ORIGIN_KIND),
eq(issues.originId, sourceIssueId),
isNull(issues.hiddenAt),
sql`${issues.status} <> 'cancelled'`,
sql`${issues.createdAt} >= ${cutoff.toISOString()}::timestamptz`,
),
)
.then((rows) => Number(rows[0]?.count ?? 0));
}
async function getRefreshCommentState(companyId: string, reviewIssueId: string) {
return db
.select({
count: sql<number>`count(*)::int`,
latestCreatedAt: sql<Date | null>`max(${issueComments.createdAt})`,
})
.from(issueComments)
.where(
and(
eq(issueComments.companyId, companyId),
eq(issueComments.issueId, reviewIssueId),
sql`${issueComments.body} like ${`${PRODUCTIVITY_REVIEW_REFRESH_COMMENT_PREFIX}%`}`,
),
)
.then((rows) => {
const row = rows[0];
return {
count: Number(row?.count ?? 0),
latestCreatedAt: coerceDate(row?.latestCreatedAt),
};
});
}
async function addRefreshComment(
reviewIssueId: string,
body: string,
generatedAt: Date,
) {
const comment = await issuesSvc.addComment(reviewIssueId, body, {});
await db
.update(issueComments)
.set({ createdAt: generatedAt, updatedAt: generatedAt })
.where(eq(issueComments.id, comment.id));
await db
.update(issues)
.set({ updatedAt: generatedAt })
.where(eq(issues.id, reviewIssueId));
return comment;
}
async function countIssueRunsSince(companyId: string, agentId: string, issueId: string, since: Date) {
return db
.select({ count: sql<number>`count(*)::int` })
@ -538,11 +631,19 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
async function createOrUpdateReview(
evidence: ProductivityReviewEvidence,
opts: { prefix: string },
opts: { prefix: string; thresholds: ProductivityReviewThresholds },
) {
const existing = await findOpenProductivityReview(evidence.sourceIssue.companyId, evidence.sourceIssue.id);
if (existing) {
await issuesSvc.addComment(existing.id, buildRefreshComment(evidence, opts.prefix), {});
const refreshState = await getRefreshCommentState(evidence.sourceIssue.companyId, existing.id);
const lastRefreshOrCreationAt = refreshState.latestCreatedAt ?? existing.createdAt;
if (
refreshState.count >= opts.thresholds.maxRefreshComments ||
evidence.generatedAt.getTime() - lastRefreshOrCreationAt.getTime() < opts.thresholds.refreshIntervalMs
) {
return { kind: "existing" as const, reviewIssueId: existing.id };
}
await addRefreshComment(existing.id, buildRefreshComment(evidence, opts.prefix), evidence.generatedAt);
await logActivity(db, {
companyId: evidence.sourceIssue.companyId,
actorType: "system",
@ -563,6 +664,16 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
return { kind: "updated" as const, reviewIssueId: existing.id };
}
const recentCreationCount = await countRecentProductivityReviews(
evidence.sourceIssue.companyId,
evidence.sourceIssue.id,
opts.thresholds,
evidence.generatedAt,
);
if (recentCreationCount >= opts.thresholds.maxCreationsPerWindow) {
return { kind: "creation_capped" as const, reviewIssueId: null };
}
const ownerAgentId = await resolveReviewOwnerAgentId(evidence.sourceIssue, evidence.sourceAgent);
let review: Awaited<ReturnType<typeof issuesSvc.create>>;
try {
@ -593,6 +704,10 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
if (!raced) throw error;
return { kind: "existing" as const, reviewIssueId: raced.id };
}
await db
.update(issues)
.set({ createdAt: evidence.generatedAt, updatedAt: evidence.generatedAt })
.where(eq(issues.id, review.id));
await logActivity(db, {
companyId: evidence.sourceIssue.companyId,
@ -667,6 +782,7 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
updated: 0,
existing: 0,
snoozed: 0,
creationCapped: 0,
skipped: 0,
failed: 0,
reviewIssueIds: [] as string[],
@ -703,11 +819,12 @@ export function productivityReviewService(db: Db, deps?: { enqueueWakeup?: Enque
prefixCache.set(candidate.companyId, prefix);
}
try {
const outcome = await createOrUpdateReview(evidence, { prefix });
const outcome = await createOrUpdateReview(evidence, { prefix, thresholds });
if (outcome.kind === "created") result.created += 1;
else if (outcome.kind === "updated") result.updated += 1;
else if (outcome.kind === "creation_capped") result.creationCapped += 1;
else result.existing += 1;
result.reviewIssueIds.push(outcome.reviewIssueId);
if (outcome.reviewIssueId) result.reviewIssueIds.push(outcome.reviewIssueId);
} catch (err) {
result.failed += 1;
result.failedIssueIds.push(candidate.id);