[codex] Add runtime lifecycle recovery and live issue visibility (#4419)

This commit is contained in:
Dotta 2026-04-24 15:50:32 -05:00 committed by GitHub
parent 9a8d219949
commit 5a0c1979cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
121 changed files with 9625 additions and 2044 deletions

View file

@ -92,6 +92,7 @@ export const DEFAULT_PAPERCLIP_AGENT_PROMPT_TEMPLATE = [
"- If woken by a human comment on a dependency-blocked issue, respond or triage the comment without treating the blocked deliverable work as unblocked.",
"- Create child issues directly when you know what needs to be done; use issue-thread interactions when the board/user must choose suggested tasks, answer structured questions, or confirm a proposal.",
"- To ask for that input, create an interaction on the current issue with POST /api/issues/{issueId}/interactions using kind suggest_tasks, ask_user_questions, or request_confirmation. Use continuationPolicy wake_assignee when you need to resume after a response; for request_confirmation this resumes only after acceptance.",
"- When you intentionally restart follow-up work on a completed assigned issue, include structured `resume: true` with the POST /api/issues/{issueId}/comments or PATCH /api/issues/{issueId} comment payload. Generic agent comments on closed issues are inert by default.",
"- For plan approval, update the plan document first, then create request_confirmation targeting the latest plan revision with idempotencyKey confirmation:{issueId}:plan:{revisionId}. Wait for acceptance before creating implementation subtasks, and create a fresh confirmation after superseding board/user comments if approval is still needed.",
"- If blocked, mark the issue blocked and name the unblock owner and action.",
"- Respect budget, pause/cancel, approval gates, and company boundaries.",

View file

@ -0,0 +1,13 @@
CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_liveness_recovery_incident_uq"
ON "issues" USING btree ("company_id","origin_kind","origin_id")
WHERE "origin_kind" = 'harness_liveness_escalation'
AND "origin_id" IS NOT NULL
AND "hidden_at" IS NULL
AND "status" NOT IN ('done', 'cancelled');
--> statement-breakpoint
CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_liveness_recovery_leaf_uq"
ON "issues" USING btree ("company_id","origin_kind","origin_fingerprint")
WHERE "origin_kind" = 'harness_liveness_escalation'
AND "origin_fingerprint" <> 'default'
AND "hidden_at" IS NULL
AND "status" NOT IN ('done', 'cancelled');

View file

@ -0,0 +1,70 @@
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_at" timestamp with time zone;
--> statement-breakpoint
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_seq" integer DEFAULT 0 NOT NULL;
--> statement-breakpoint
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_stream" text;
--> statement-breakpoint
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_bytes" bigint;
--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "heartbeat_runs_company_status_last_output_idx"
ON "heartbeat_runs" USING btree ("company_id","status","last_output_at");
--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "heartbeat_runs_company_status_process_started_idx"
ON "heartbeat_runs" USING btree ("company_id","status","process_started_at");
--> statement-breakpoint
CREATE TABLE IF NOT EXISTS "heartbeat_run_watchdog_decisions" (
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
"company_id" uuid NOT NULL,
"run_id" uuid NOT NULL,
"evaluation_issue_id" uuid,
"decision" text NOT NULL,
"snoozed_until" timestamp with time zone,
"reason" text,
"created_by_agent_id" uuid,
"created_by_user_id" text,
"created_by_run_id" uuid,
"created_at" timestamp with time zone DEFAULT now() NOT NULL
);
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_company_id_companies_id_fk" FOREIGN KEY ("company_id") REFERENCES "public"."companies"("id") ON DELETE no action ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_run_id_heartbeat_runs_id_fk" FOREIGN KEY ("run_id") REFERENCES "public"."heartbeat_runs"("id") ON DELETE cascade ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_evaluation_issue_id_issues_id_fk" FOREIGN KEY ("evaluation_issue_id") REFERENCES "public"."issues"("id") ON DELETE set null ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_created_by_agent_id_agents_id_fk" FOREIGN KEY ("created_by_agent_id") REFERENCES "public"."agents"("id") ON DELETE set null ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
DO $$ BEGIN
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_created_by_run_id_heartbeat_runs_id_fk" FOREIGN KEY ("created_by_run_id") REFERENCES "public"."heartbeat_runs"("id") ON DELETE set null ON UPDATE no action;
EXCEPTION
WHEN duplicate_object THEN null;
END $$;
--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "heartbeat_run_watchdog_decisions_company_run_created_idx"
ON "heartbeat_run_watchdog_decisions" USING btree ("company_id","run_id","created_at");
--> statement-breakpoint
CREATE INDEX IF NOT EXISTS "heartbeat_run_watchdog_decisions_company_run_snooze_idx"
ON "heartbeat_run_watchdog_decisions" USING btree ("company_id","run_id","snoozed_until");
--> statement-breakpoint
CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_stale_run_evaluation_uq"
ON "issues" USING btree ("company_id","origin_kind","origin_id")
WHERE "origin_kind" = 'stale_active_run_evaluation'
AND "origin_id" IS NOT NULL
AND "hidden_at" IS NULL
AND "status" NOT IN ('done', 'cancelled');

View file

@ -484,6 +484,20 @@
"when": 1776959400000,
"tag": "0068_environment_local_driver_unique",
"breakpoints": true
},
{
"idx": 69,
"version": "7",
"when": 1776780003000,
"tag": "0069_liveness_recovery_dedupe",
"breakpoints": true
},
{
"idx": 70,
"version": "7",
"when": 1776780004000,
"tag": "0070_active_run_output_watchdog",
"breakpoints": true
}
]
}

View file

@ -0,0 +1,34 @@
import { index, pgTable, text, timestamp, uuid } from "drizzle-orm/pg-core";
import { agents } from "./agents.js";
import { companies } from "./companies.js";
import { heartbeatRuns } from "./heartbeat_runs.js";
import { issues } from "./issues.js";
export const heartbeatRunWatchdogDecisions = pgTable(
"heartbeat_run_watchdog_decisions",
{
id: uuid("id").primaryKey().defaultRandom(),
companyId: uuid("company_id").notNull().references(() => companies.id),
runId: uuid("run_id").notNull().references(() => heartbeatRuns.id, { onDelete: "cascade" }),
evaluationIssueId: uuid("evaluation_issue_id").references(() => issues.id, { onDelete: "set null" }),
decision: text("decision").notNull(),
snoozedUntil: timestamp("snoozed_until", { withTimezone: true }),
reason: text("reason"),
createdByAgentId: uuid("created_by_agent_id").references(() => agents.id, { onDelete: "set null" }),
createdByUserId: text("created_by_user_id"),
createdByRunId: uuid("created_by_run_id").references(() => heartbeatRuns.id, { onDelete: "set null" }),
createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),
},
(table) => ({
companyRunCreatedIdx: index("heartbeat_run_watchdog_decisions_company_run_created_idx").on(
table.companyId,
table.runId,
table.createdAt,
),
companyRunSnoozeIdx: index("heartbeat_run_watchdog_decisions_company_run_snooze_idx").on(
table.companyId,
table.runId,
table.snoozedUntil,
),
}),
);

View file

@ -34,6 +34,10 @@ export const heartbeatRuns = pgTable(
processPid: integer("process_pid"),
processGroupId: integer("process_group_id"),
processStartedAt: timestamp("process_started_at", { withTimezone: true }),
lastOutputAt: timestamp("last_output_at", { withTimezone: true }),
lastOutputSeq: integer("last_output_seq").notNull().default(0),
lastOutputStream: text("last_output_stream"),
lastOutputBytes: bigint("last_output_bytes", { mode: "number" }),
retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, {
onDelete: "set null",
}),
@ -64,5 +68,15 @@ export const heartbeatRuns = pgTable(
table.livenessState,
table.createdAt,
),
companyStatusLastOutputIdx: index("heartbeat_runs_company_status_last_output_idx").on(
table.companyId,
table.status,
table.lastOutputAt,
),
companyStatusProcessStartedIdx: index("heartbeat_runs_company_status_process_started_idx").on(
table.companyId,
table.status,
table.processStartedAt,
),
}),
);

View file

@ -53,6 +53,7 @@ export { documentRevisions } from "./document_revisions.js";
export { issueDocuments } from "./issue_documents.js";
export { heartbeatRuns } from "./heartbeat_runs.js";
export { heartbeatRunEvents } from "./heartbeat_run_events.js";
export { heartbeatRunWatchdogDecisions } from "./heartbeat_run_watchdog_decisions.js";
export { costEvents } from "./cost_events.js";
export { financeEvents } from "./finance_events.js";
export { approvals } from "./approvals.js";

View file

@ -91,5 +91,29 @@ export const issues = pgTable(
and ${table.executionRunId} is not null
and ${table.status} in ('backlog', 'todo', 'in_progress', 'in_review', 'blocked')`,
),
activeLivenessRecoveryIncidentIdx: uniqueIndex("issues_active_liveness_recovery_incident_uq")
.on(table.companyId, table.originKind, table.originId)
.where(
sql`${table.originKind} = 'harness_liveness_escalation'
and ${table.originId} is not null
and ${table.hiddenAt} is null
and ${table.status} not in ('done', 'cancelled')`,
),
activeLivenessRecoveryLeafIdx: uniqueIndex("issues_active_liveness_recovery_leaf_uq")
.on(table.companyId, table.originKind, table.originFingerprint)
.where(
sql`${table.originKind} = 'harness_liveness_escalation'
and ${table.originFingerprint} <> 'default'
and ${table.hiddenAt} is null
and ${table.status} not in ('done', 'cancelled')`,
),
activeStaleRunEvaluationIdx: uniqueIndex("issues_active_stale_run_evaluation_uq")
.on(table.companyId, table.originKind, table.originId)
.where(
sql`${table.originKind} = 'stale_active_run_evaluation'
and ${table.originId} is not null
and ${table.hiddenAt} is null
and ${table.status} not in ('done', 'cancelled')`,
),
}),
);

View file

@ -33,77 +33,56 @@ export type EmbeddedPostgresTestDatabase = {
let embeddedPostgresSupportPromise: Promise<EmbeddedPostgresTestSupport> | null = null;
const DEFAULT_PAPERCLIP_EMBEDDED_POSTGRES_PORT = 54329;
function getReservedTestPorts(): Set<number> {
const configuredPorts = [
DEFAULT_PAPERCLIP_EMBEDDED_POSTGRES_PORT,
Number.parseInt(process.env.PAPERCLIP_EMBEDDED_POSTGRES_PORT ?? "", 10),
...String(process.env.PAPERCLIP_TEST_POSTGRES_RESERVED_PORTS ?? "")
.split(",")
.map((value) => Number.parseInt(value.trim(), 10)),
];
return new Set(configuredPorts.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535));
}
async function getEmbeddedPostgresCtor(): Promise<EmbeddedPostgresCtor> {
const mod = await import("embedded-postgres");
return mod.default as EmbeddedPostgresCtor;
}
async function getAvailablePort(): Promise<number> {
return await new Promise((resolve, reject) => {
const server = net.createServer();
server.unref();
server.on("error", reject);
server.listen(0, "127.0.0.1", () => {
const address = server.address();
if (!address || typeof address === "string") {
server.close(() => reject(new Error("Failed to allocate test port")));
return;
}
const { port } = address;
server.close((error) => {
if (error) reject(error);
else resolve(port);
const reservedPorts = getReservedTestPorts();
for (let attempt = 0; attempt < 20; attempt += 1) {
const port = await new Promise<number>((resolve, reject) => {
const server = net.createServer();
server.unref();
server.on("error", reject);
server.listen(0, "127.0.0.1", () => {
const address = server.address();
if (!address || typeof address === "string") {
server.close(() => reject(new Error("Failed to allocate test port")));
return;
}
const { port } = address;
server.close((error) => {
if (error) reject(error);
else resolve(port);
});
});
});
});
}
function formatEmbeddedPostgresError(error: unknown): string {
if (error instanceof Error && error.message.length > 0) return error.message;
if (typeof error === "string" && error.length > 0) return error;
return "embedded Postgres startup failed";
}
async function probeEmbeddedPostgresSupport(): Promise<EmbeddedPostgresTestSupport> {
const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-embedded-postgres-probe-"));
const port = await getAvailablePort();
const EmbeddedPostgres = await getEmbeddedPostgresCtor();
const instance = new EmbeddedPostgres({
databaseDir: dataDir,
user: "paperclip",
password: "paperclip",
port,
persistent: true,
initdbFlags: ["--encoding=UTF8", "--locale=C", "--lc-messages=C"],
onLog: () => {},
onError: () => {},
});
try {
await instance.initialise();
await instance.start();
return { supported: true };
} catch (error) {
return {
supported: false,
reason: formatEmbeddedPostgresError(error),
};
} finally {
await instance.stop().catch(() => {});
fs.rmSync(dataDir, { recursive: true, force: true });
if (!reservedPorts.has(port)) return port;
}
throw new Error(
`Failed to allocate embedded Postgres test port outside reserved Paperclip ports: ${[
...reservedPorts,
].join(", ")}`,
);
}
export async function getEmbeddedPostgresTestSupport(): Promise<EmbeddedPostgresTestSupport> {
if (!embeddedPostgresSupportPromise) {
embeddedPostgresSupportPromise = probeEmbeddedPostgresSupport();
}
return await embeddedPostgresSupportPromise;
}
export async function startEmbeddedPostgresTestDatabase(
tempDirPrefix: string,
): Promise<EmbeddedPostgresTestDatabase> {
async function createEmbeddedPostgresTestInstance(tempDirPrefix: string) {
const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), tempDirPrefix));
const port = await getAvailablePort();
const EmbeddedPostgres = await getEmbeddedPostgresCtor();
@ -118,6 +97,51 @@ export async function startEmbeddedPostgresTestDatabase(
onError: () => {},
});
return { dataDir, port, instance };
}
function cleanupEmbeddedPostgresTestDirs(dataDir: string) {
fs.rmSync(dataDir, { recursive: true, force: true });
}
function formatEmbeddedPostgresError(error: unknown): string {
if (error instanceof Error && error.message.length > 0) return error.message;
if (typeof error === "string" && error.length > 0) return error;
return "embedded Postgres startup failed";
}
async function probeEmbeddedPostgresSupport(): Promise<EmbeddedPostgresTestSupport> {
const { dataDir, instance } = await createEmbeddedPostgresTestInstance(
"paperclip-embedded-postgres-probe-",
);
try {
await instance.initialise();
await instance.start();
return { supported: true };
} catch (error) {
return {
supported: false,
reason: formatEmbeddedPostgresError(error),
};
} finally {
await instance.stop().catch(() => {});
cleanupEmbeddedPostgresTestDirs(dataDir);
}
}
export async function getEmbeddedPostgresTestSupport(): Promise<EmbeddedPostgresTestSupport> {
if (!embeddedPostgresSupportPromise) {
embeddedPostgresSupportPromise = probeEmbeddedPostgresSupport();
}
return await embeddedPostgresSupportPromise;
}
export async function startEmbeddedPostgresTestDatabase(
tempDirPrefix: string,
): Promise<EmbeddedPostgresTestDatabase> {
const { dataDir, port, instance } = await createEmbeddedPostgresTestInstance(tempDirPrefix);
try {
await instance.initialise();
await instance.start();
@ -131,12 +155,12 @@ export async function startEmbeddedPostgresTestDatabase(
connectionString,
cleanup: async () => {
await instance.stop().catch(() => {});
fs.rmSync(dataDir, { recursive: true, force: true });
cleanupEmbeddedPostgresTestDirs(dataDir);
},
};
} catch (error) {
await instance.stop().catch(() => {});
fs.rmSync(dataDir, { recursive: true, force: true });
cleanupEmbeddedPostgresTestDirs(dataDir);
throw new Error(
`Failed to start embedded PostgreSQL test database: ${formatEmbeddedPostgresError(error)}`,
);

View file

@ -450,7 +450,7 @@ export function createToolDefinitions(client: PaperclipApiClient): ToolDefinitio
),
makeTool(
"paperclipUpdateIssue",
"Patch an issue, optionally including a comment",
"Patch an issue, optionally including a comment; include resume=true when intentionally requesting follow-up on resumable closed work",
updateIssueToolSchema,
async ({ issueId, ...body }) =>
client.requestJson("PATCH", `/issues/${encodeURIComponent(issueId)}`, { body }),
@ -475,7 +475,7 @@ export function createToolDefinitions(client: PaperclipApiClient): ToolDefinitio
),
makeTool(
"paperclipAddComment",
"Add a comment to an issue",
"Add a comment to an issue; include resume=true when intentionally requesting follow-up on resumable closed work",
addCommentToolSchema,
async ({ issueId, ...body }) =>
client.requestJson("POST", `/issues/${encodeURIComponent(issueId)}/comments`, { body }),

View file

@ -162,7 +162,7 @@ export const ISSUE_THREAD_INTERACTION_CONTINUATION_POLICIES = [
export type IssueThreadInteractionContinuationPolicy =
(typeof ISSUE_THREAD_INTERACTION_CONTINUATION_POLICIES)[number];
export const ISSUE_ORIGIN_KINDS = ["manual", "routine_execution"] as const;
export const ISSUE_ORIGIN_KINDS = ["manual", "routine_execution", "stale_active_run_evaluation"] as const;
export type BuiltInIssueOriginKind = (typeof ISSUE_ORIGIN_KINDS)[number];
export type PluginIssueOriginKind = `plugin:${string}`;
export type IssueOriginKind = BuiltInIssueOriginKind | PluginIssueOriginKind;

View file

@ -324,6 +324,9 @@ export type {
IssueWorkProductReviewState,
Issue,
IssueAssigneeAdapterOverrides,
IssueBlockerAttention,
IssueBlockerAttentionReason,
IssueBlockerAttentionState,
IssueReferenceSource,
IssueRelatedWorkItem,
IssueRelatedWorkSummary,

View file

@ -37,6 +37,10 @@ export interface HeartbeatRun {
processPid: number | null;
processGroupId?: number | null;
processStartedAt: Date | null;
lastOutputAt: Date | null;
lastOutputSeq: number;
lastOutputStream: "stdout" | "stderr" | null;
lastOutputBytes: number | null;
retryOfRunId: string | null;
processLossRetryCount: number;
scheduledRetryAt?: Date | null;
@ -51,6 +55,28 @@ export interface HeartbeatRun {
contextSnapshot: Record<string, unknown> | null;
createdAt: Date;
updatedAt: Date;
outputSilence?: HeartbeatRunOutputSilence;
}
export type HeartbeatRunOutputSilenceLevel =
| "not_applicable"
| "ok"
| "suspicious"
| "critical"
| "snoozed";
export interface HeartbeatRunOutputSilence {
lastOutputAt: Date | string | null;
lastOutputSeq: number;
lastOutputStream: "stdout" | "stderr" | null;
silenceStartedAt: Date | string | null;
silenceAgeMs: number | null;
level: HeartbeatRunOutputSilenceLevel;
suspicionThresholdMs: number;
criticalThresholdMs: number;
snoozedUntil: Date | string | null;
evaluationIssueId: string | null;
evaluationIssueIdentifier: string | null;
}
export interface AgentWakeupSkipped {

View file

@ -118,6 +118,9 @@ export type {
export type {
Issue,
IssueAssigneeAdapterOverrides,
IssueBlockerAttention,
IssueBlockerAttentionReason,
IssueBlockerAttentionState,
IssueReferenceSource,
IssueRelatedWorkItem,
IssueRelatedWorkSummary,

View file

@ -27,6 +27,7 @@ export interface InstanceExperimentalSettings {
enableEnvironments: boolean;
enableIsolatedWorkspaces: boolean;
autoRestartDevServerWhenIdle: boolean;
enableIssueGraphLivenessAutoRecovery: boolean;
}
export interface InstanceSettings {

View file

@ -116,6 +116,24 @@ export interface IssueRelationIssueSummary {
priority: IssuePriority;
assigneeAgentId: string | null;
assigneeUserId: string | null;
terminalBlockers?: IssueRelationIssueSummary[];
}
export type IssueBlockerAttentionState = "none" | "covered" | "needs_attention";
export type IssueBlockerAttentionReason =
| "active_child"
| "active_dependency"
| "attention_required"
| null;
export interface IssueBlockerAttention {
state: IssueBlockerAttentionState;
reason: IssueBlockerAttentionReason;
unresolvedBlockerCount: number;
coveredBlockerCount: number;
attentionBlockerCount: number;
sampleBlockerIdentifier: string | null;
}
export interface IssueRelation {
@ -242,6 +260,7 @@ export interface Issue {
labels?: IssueLabel[];
blockedBy?: IssueRelationIssueSummary[];
blocks?: IssueRelationIssueSummary[];
blockerAttention?: IssueBlockerAttention;
relatedWork?: IssueRelatedWorkSummary;
referencedIssueIdentifiers?: string[];
planDocument?: IssueDocument | null;
@ -267,6 +286,7 @@ export interface IssueComment {
authorAgentId: string | null;
authorUserId: string | null;
body: string;
followUpRequested?: boolean;
createdAt: Date;
updatedAt: Date;
}

View file

@ -36,6 +36,7 @@ export const instanceExperimentalSettingsSchema = z.object({
enableEnvironments: z.boolean().default(false),
enableIsolatedWorkspaces: z.boolean().default(false),
autoRestartDevServerWhenIdle: z.boolean().default(false),
enableIssueGraphLivenessAutoRecovery: z.boolean().default(false),
}).strict();
export const patchInstanceExperimentalSettingsSchema = instanceExperimentalSettingsSchema.partial();

View file

@ -171,6 +171,7 @@ export const updateIssueSchema = createIssueSchema.partial().extend({
comment: z.string().min(1).optional(),
reviewRequest: issueReviewRequestSchema.optional().nullable(),
reopen: z.boolean().optional(),
resume: z.boolean().optional(),
interrupt: z.boolean().optional(),
hiddenAt: z.string().datetime().nullable().optional(),
});
@ -188,6 +189,7 @@ export type CheckoutIssue = z.infer<typeof checkoutIssueSchema>;
export const addIssueCommentSchema = z.object({
body: z.string().min(1),
reopen: z.boolean().optional(),
resume: z.boolean().optional(),
interrupt: z.boolean().optional(),
});