mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-14 01:50:39 +09:00
[codex] Add runtime lifecycle recovery and live issue visibility (#4419)
This commit is contained in:
parent
9a8d219949
commit
5a0c1979cf
121 changed files with 9625 additions and 2044 deletions
13
packages/db/src/migrations/0069_liveness_recovery_dedupe.sql
Normal file
13
packages/db/src/migrations/0069_liveness_recovery_dedupe.sql
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_liveness_recovery_incident_uq"
|
||||
ON "issues" USING btree ("company_id","origin_kind","origin_id")
|
||||
WHERE "origin_kind" = 'harness_liveness_escalation'
|
||||
AND "origin_id" IS NOT NULL
|
||||
AND "hidden_at" IS NULL
|
||||
AND "status" NOT IN ('done', 'cancelled');
|
||||
--> statement-breakpoint
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_liveness_recovery_leaf_uq"
|
||||
ON "issues" USING btree ("company_id","origin_kind","origin_fingerprint")
|
||||
WHERE "origin_kind" = 'harness_liveness_escalation'
|
||||
AND "origin_fingerprint" <> 'default'
|
||||
AND "hidden_at" IS NULL
|
||||
AND "status" NOT IN ('done', 'cancelled');
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_at" timestamp with time zone;
|
||||
--> statement-breakpoint
|
||||
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_seq" integer DEFAULT 0 NOT NULL;
|
||||
--> statement-breakpoint
|
||||
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_stream" text;
|
||||
--> statement-breakpoint
|
||||
ALTER TABLE "heartbeat_runs" ADD COLUMN IF NOT EXISTS "last_output_bytes" bigint;
|
||||
--> statement-breakpoint
|
||||
CREATE INDEX IF NOT EXISTS "heartbeat_runs_company_status_last_output_idx"
|
||||
ON "heartbeat_runs" USING btree ("company_id","status","last_output_at");
|
||||
--> statement-breakpoint
|
||||
CREATE INDEX IF NOT EXISTS "heartbeat_runs_company_status_process_started_idx"
|
||||
ON "heartbeat_runs" USING btree ("company_id","status","process_started_at");
|
||||
--> statement-breakpoint
|
||||
CREATE TABLE IF NOT EXISTS "heartbeat_run_watchdog_decisions" (
|
||||
"id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL,
|
||||
"company_id" uuid NOT NULL,
|
||||
"run_id" uuid NOT NULL,
|
||||
"evaluation_issue_id" uuid,
|
||||
"decision" text NOT NULL,
|
||||
"snoozed_until" timestamp with time zone,
|
||||
"reason" text,
|
||||
"created_by_agent_id" uuid,
|
||||
"created_by_user_id" text,
|
||||
"created_by_run_id" uuid,
|
||||
"created_at" timestamp with time zone DEFAULT now() NOT NULL
|
||||
);
|
||||
--> statement-breakpoint
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_company_id_companies_id_fk" FOREIGN KEY ("company_id") REFERENCES "public"."companies"("id") ON DELETE no action ON UPDATE no action;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
--> statement-breakpoint
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_run_id_heartbeat_runs_id_fk" FOREIGN KEY ("run_id") REFERENCES "public"."heartbeat_runs"("id") ON DELETE cascade ON UPDATE no action;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
--> statement-breakpoint
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_evaluation_issue_id_issues_id_fk" FOREIGN KEY ("evaluation_issue_id") REFERENCES "public"."issues"("id") ON DELETE set null ON UPDATE no action;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
--> statement-breakpoint
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_created_by_agent_id_agents_id_fk" FOREIGN KEY ("created_by_agent_id") REFERENCES "public"."agents"("id") ON DELETE set null ON UPDATE no action;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
--> statement-breakpoint
|
||||
DO $$ BEGIN
|
||||
ALTER TABLE "heartbeat_run_watchdog_decisions" ADD CONSTRAINT "heartbeat_run_watchdog_decisions_created_by_run_id_heartbeat_runs_id_fk" FOREIGN KEY ("created_by_run_id") REFERENCES "public"."heartbeat_runs"("id") ON DELETE set null ON UPDATE no action;
|
||||
EXCEPTION
|
||||
WHEN duplicate_object THEN null;
|
||||
END $$;
|
||||
--> statement-breakpoint
|
||||
CREATE INDEX IF NOT EXISTS "heartbeat_run_watchdog_decisions_company_run_created_idx"
|
||||
ON "heartbeat_run_watchdog_decisions" USING btree ("company_id","run_id","created_at");
|
||||
--> statement-breakpoint
|
||||
CREATE INDEX IF NOT EXISTS "heartbeat_run_watchdog_decisions_company_run_snooze_idx"
|
||||
ON "heartbeat_run_watchdog_decisions" USING btree ("company_id","run_id","snoozed_until");
|
||||
--> statement-breakpoint
|
||||
CREATE UNIQUE INDEX IF NOT EXISTS "issues_active_stale_run_evaluation_uq"
|
||||
ON "issues" USING btree ("company_id","origin_kind","origin_id")
|
||||
WHERE "origin_kind" = 'stale_active_run_evaluation'
|
||||
AND "origin_id" IS NOT NULL
|
||||
AND "hidden_at" IS NULL
|
||||
AND "status" NOT IN ('done', 'cancelled');
|
||||
|
|
@ -484,6 +484,20 @@
|
|||
"when": 1776959400000,
|
||||
"tag": "0068_environment_local_driver_unique",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 69,
|
||||
"version": "7",
|
||||
"when": 1776780003000,
|
||||
"tag": "0069_liveness_recovery_dedupe",
|
||||
"breakpoints": true
|
||||
},
|
||||
{
|
||||
"idx": 70,
|
||||
"version": "7",
|
||||
"when": 1776780004000,
|
||||
"tag": "0070_active_run_output_watchdog",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
|
|||
34
packages/db/src/schema/heartbeat_run_watchdog_decisions.ts
Normal file
34
packages/db/src/schema/heartbeat_run_watchdog_decisions.ts
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import { index, pgTable, text, timestamp, uuid } from "drizzle-orm/pg-core";
|
||||
import { agents } from "./agents.js";
|
||||
import { companies } from "./companies.js";
|
||||
import { heartbeatRuns } from "./heartbeat_runs.js";
|
||||
import { issues } from "./issues.js";
|
||||
|
||||
export const heartbeatRunWatchdogDecisions = pgTable(
|
||||
"heartbeat_run_watchdog_decisions",
|
||||
{
|
||||
id: uuid("id").primaryKey().defaultRandom(),
|
||||
companyId: uuid("company_id").notNull().references(() => companies.id),
|
||||
runId: uuid("run_id").notNull().references(() => heartbeatRuns.id, { onDelete: "cascade" }),
|
||||
evaluationIssueId: uuid("evaluation_issue_id").references(() => issues.id, { onDelete: "set null" }),
|
||||
decision: text("decision").notNull(),
|
||||
snoozedUntil: timestamp("snoozed_until", { withTimezone: true }),
|
||||
reason: text("reason"),
|
||||
createdByAgentId: uuid("created_by_agent_id").references(() => agents.id, { onDelete: "set null" }),
|
||||
createdByUserId: text("created_by_user_id"),
|
||||
createdByRunId: uuid("created_by_run_id").references(() => heartbeatRuns.id, { onDelete: "set null" }),
|
||||
createdAt: timestamp("created_at", { withTimezone: true }).notNull().defaultNow(),
|
||||
},
|
||||
(table) => ({
|
||||
companyRunCreatedIdx: index("heartbeat_run_watchdog_decisions_company_run_created_idx").on(
|
||||
table.companyId,
|
||||
table.runId,
|
||||
table.createdAt,
|
||||
),
|
||||
companyRunSnoozeIdx: index("heartbeat_run_watchdog_decisions_company_run_snooze_idx").on(
|
||||
table.companyId,
|
||||
table.runId,
|
||||
table.snoozedUntil,
|
||||
),
|
||||
}),
|
||||
);
|
||||
|
|
@ -34,6 +34,10 @@ export const heartbeatRuns = pgTable(
|
|||
processPid: integer("process_pid"),
|
||||
processGroupId: integer("process_group_id"),
|
||||
processStartedAt: timestamp("process_started_at", { withTimezone: true }),
|
||||
lastOutputAt: timestamp("last_output_at", { withTimezone: true }),
|
||||
lastOutputSeq: integer("last_output_seq").notNull().default(0),
|
||||
lastOutputStream: text("last_output_stream"),
|
||||
lastOutputBytes: bigint("last_output_bytes", { mode: "number" }),
|
||||
retryOfRunId: uuid("retry_of_run_id").references((): AnyPgColumn => heartbeatRuns.id, {
|
||||
onDelete: "set null",
|
||||
}),
|
||||
|
|
@ -64,5 +68,15 @@ export const heartbeatRuns = pgTable(
|
|||
table.livenessState,
|
||||
table.createdAt,
|
||||
),
|
||||
companyStatusLastOutputIdx: index("heartbeat_runs_company_status_last_output_idx").on(
|
||||
table.companyId,
|
||||
table.status,
|
||||
table.lastOutputAt,
|
||||
),
|
||||
companyStatusProcessStartedIdx: index("heartbeat_runs_company_status_process_started_idx").on(
|
||||
table.companyId,
|
||||
table.status,
|
||||
table.processStartedAt,
|
||||
),
|
||||
}),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -53,6 +53,7 @@ export { documentRevisions } from "./document_revisions.js";
|
|||
export { issueDocuments } from "./issue_documents.js";
|
||||
export { heartbeatRuns } from "./heartbeat_runs.js";
|
||||
export { heartbeatRunEvents } from "./heartbeat_run_events.js";
|
||||
export { heartbeatRunWatchdogDecisions } from "./heartbeat_run_watchdog_decisions.js";
|
||||
export { costEvents } from "./cost_events.js";
|
||||
export { financeEvents } from "./finance_events.js";
|
||||
export { approvals } from "./approvals.js";
|
||||
|
|
|
|||
|
|
@ -91,5 +91,29 @@ export const issues = pgTable(
|
|||
and ${table.executionRunId} is not null
|
||||
and ${table.status} in ('backlog', 'todo', 'in_progress', 'in_review', 'blocked')`,
|
||||
),
|
||||
activeLivenessRecoveryIncidentIdx: uniqueIndex("issues_active_liveness_recovery_incident_uq")
|
||||
.on(table.companyId, table.originKind, table.originId)
|
||||
.where(
|
||||
sql`${table.originKind} = 'harness_liveness_escalation'
|
||||
and ${table.originId} is not null
|
||||
and ${table.hiddenAt} is null
|
||||
and ${table.status} not in ('done', 'cancelled')`,
|
||||
),
|
||||
activeLivenessRecoveryLeafIdx: uniqueIndex("issues_active_liveness_recovery_leaf_uq")
|
||||
.on(table.companyId, table.originKind, table.originFingerprint)
|
||||
.where(
|
||||
sql`${table.originKind} = 'harness_liveness_escalation'
|
||||
and ${table.originFingerprint} <> 'default'
|
||||
and ${table.hiddenAt} is null
|
||||
and ${table.status} not in ('done', 'cancelled')`,
|
||||
),
|
||||
activeStaleRunEvaluationIdx: uniqueIndex("issues_active_stale_run_evaluation_uq")
|
||||
.on(table.companyId, table.originKind, table.originId)
|
||||
.where(
|
||||
sql`${table.originKind} = 'stale_active_run_evaluation'
|
||||
and ${table.originId} is not null
|
||||
and ${table.hiddenAt} is null
|
||||
and ${table.status} not in ('done', 'cancelled')`,
|
||||
),
|
||||
}),
|
||||
);
|
||||
|
|
|
|||
|
|
@ -33,77 +33,56 @@ export type EmbeddedPostgresTestDatabase = {
|
|||
|
||||
let embeddedPostgresSupportPromise: Promise<EmbeddedPostgresTestSupport> | null = null;
|
||||
|
||||
const DEFAULT_PAPERCLIP_EMBEDDED_POSTGRES_PORT = 54329;
|
||||
|
||||
function getReservedTestPorts(): Set<number> {
|
||||
const configuredPorts = [
|
||||
DEFAULT_PAPERCLIP_EMBEDDED_POSTGRES_PORT,
|
||||
Number.parseInt(process.env.PAPERCLIP_EMBEDDED_POSTGRES_PORT ?? "", 10),
|
||||
...String(process.env.PAPERCLIP_TEST_POSTGRES_RESERVED_PORTS ?? "")
|
||||
.split(",")
|
||||
.map((value) => Number.parseInt(value.trim(), 10)),
|
||||
];
|
||||
return new Set(configuredPorts.filter((port) => Number.isInteger(port) && port > 0 && port <= 65535));
|
||||
}
|
||||
|
||||
async function getEmbeddedPostgresCtor(): Promise<EmbeddedPostgresCtor> {
|
||||
const mod = await import("embedded-postgres");
|
||||
return mod.default as EmbeddedPostgresCtor;
|
||||
}
|
||||
|
||||
async function getAvailablePort(): Promise<number> {
|
||||
return await new Promise((resolve, reject) => {
|
||||
const server = net.createServer();
|
||||
server.unref();
|
||||
server.on("error", reject);
|
||||
server.listen(0, "127.0.0.1", () => {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === "string") {
|
||||
server.close(() => reject(new Error("Failed to allocate test port")));
|
||||
return;
|
||||
}
|
||||
const { port } = address;
|
||||
server.close((error) => {
|
||||
if (error) reject(error);
|
||||
else resolve(port);
|
||||
const reservedPorts = getReservedTestPorts();
|
||||
for (let attempt = 0; attempt < 20; attempt += 1) {
|
||||
const port = await new Promise<number>((resolve, reject) => {
|
||||
const server = net.createServer();
|
||||
server.unref();
|
||||
server.on("error", reject);
|
||||
server.listen(0, "127.0.0.1", () => {
|
||||
const address = server.address();
|
||||
if (!address || typeof address === "string") {
|
||||
server.close(() => reject(new Error("Failed to allocate test port")));
|
||||
return;
|
||||
}
|
||||
const { port } = address;
|
||||
server.close((error) => {
|
||||
if (error) reject(error);
|
||||
else resolve(port);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function formatEmbeddedPostgresError(error: unknown): string {
|
||||
if (error instanceof Error && error.message.length > 0) return error.message;
|
||||
if (typeof error === "string" && error.length > 0) return error;
|
||||
return "embedded Postgres startup failed";
|
||||
}
|
||||
|
||||
async function probeEmbeddedPostgresSupport(): Promise<EmbeddedPostgresTestSupport> {
|
||||
const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), "paperclip-embedded-postgres-probe-"));
|
||||
const port = await getAvailablePort();
|
||||
const EmbeddedPostgres = await getEmbeddedPostgresCtor();
|
||||
const instance = new EmbeddedPostgres({
|
||||
databaseDir: dataDir,
|
||||
user: "paperclip",
|
||||
password: "paperclip",
|
||||
port,
|
||||
persistent: true,
|
||||
initdbFlags: ["--encoding=UTF8", "--locale=C", "--lc-messages=C"],
|
||||
onLog: () => {},
|
||||
onError: () => {},
|
||||
});
|
||||
|
||||
try {
|
||||
await instance.initialise();
|
||||
await instance.start();
|
||||
return { supported: true };
|
||||
} catch (error) {
|
||||
return {
|
||||
supported: false,
|
||||
reason: formatEmbeddedPostgresError(error),
|
||||
};
|
||||
} finally {
|
||||
await instance.stop().catch(() => {});
|
||||
fs.rmSync(dataDir, { recursive: true, force: true });
|
||||
if (!reservedPorts.has(port)) return port;
|
||||
}
|
||||
|
||||
throw new Error(
|
||||
`Failed to allocate embedded Postgres test port outside reserved Paperclip ports: ${[
|
||||
...reservedPorts,
|
||||
].join(", ")}`,
|
||||
);
|
||||
}
|
||||
|
||||
export async function getEmbeddedPostgresTestSupport(): Promise<EmbeddedPostgresTestSupport> {
|
||||
if (!embeddedPostgresSupportPromise) {
|
||||
embeddedPostgresSupportPromise = probeEmbeddedPostgresSupport();
|
||||
}
|
||||
return await embeddedPostgresSupportPromise;
|
||||
}
|
||||
|
||||
export async function startEmbeddedPostgresTestDatabase(
|
||||
tempDirPrefix: string,
|
||||
): Promise<EmbeddedPostgresTestDatabase> {
|
||||
async function createEmbeddedPostgresTestInstance(tempDirPrefix: string) {
|
||||
const dataDir = fs.mkdtempSync(path.join(os.tmpdir(), tempDirPrefix));
|
||||
const port = await getAvailablePort();
|
||||
const EmbeddedPostgres = await getEmbeddedPostgresCtor();
|
||||
|
|
@ -118,6 +97,51 @@ export async function startEmbeddedPostgresTestDatabase(
|
|||
onError: () => {},
|
||||
});
|
||||
|
||||
return { dataDir, port, instance };
|
||||
}
|
||||
|
||||
function cleanupEmbeddedPostgresTestDirs(dataDir: string) {
|
||||
fs.rmSync(dataDir, { recursive: true, force: true });
|
||||
}
|
||||
|
||||
function formatEmbeddedPostgresError(error: unknown): string {
|
||||
if (error instanceof Error && error.message.length > 0) return error.message;
|
||||
if (typeof error === "string" && error.length > 0) return error;
|
||||
return "embedded Postgres startup failed";
|
||||
}
|
||||
|
||||
async function probeEmbeddedPostgresSupport(): Promise<EmbeddedPostgresTestSupport> {
|
||||
const { dataDir, instance } = await createEmbeddedPostgresTestInstance(
|
||||
"paperclip-embedded-postgres-probe-",
|
||||
);
|
||||
|
||||
try {
|
||||
await instance.initialise();
|
||||
await instance.start();
|
||||
return { supported: true };
|
||||
} catch (error) {
|
||||
return {
|
||||
supported: false,
|
||||
reason: formatEmbeddedPostgresError(error),
|
||||
};
|
||||
} finally {
|
||||
await instance.stop().catch(() => {});
|
||||
cleanupEmbeddedPostgresTestDirs(dataDir);
|
||||
}
|
||||
}
|
||||
|
||||
export async function getEmbeddedPostgresTestSupport(): Promise<EmbeddedPostgresTestSupport> {
|
||||
if (!embeddedPostgresSupportPromise) {
|
||||
embeddedPostgresSupportPromise = probeEmbeddedPostgresSupport();
|
||||
}
|
||||
return await embeddedPostgresSupportPromise;
|
||||
}
|
||||
|
||||
export async function startEmbeddedPostgresTestDatabase(
|
||||
tempDirPrefix: string,
|
||||
): Promise<EmbeddedPostgresTestDatabase> {
|
||||
const { dataDir, port, instance } = await createEmbeddedPostgresTestInstance(tempDirPrefix);
|
||||
|
||||
try {
|
||||
await instance.initialise();
|
||||
await instance.start();
|
||||
|
|
@ -131,12 +155,12 @@ export async function startEmbeddedPostgresTestDatabase(
|
|||
connectionString,
|
||||
cleanup: async () => {
|
||||
await instance.stop().catch(() => {});
|
||||
fs.rmSync(dataDir, { recursive: true, force: true });
|
||||
cleanupEmbeddedPostgresTestDirs(dataDir);
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
await instance.stop().catch(() => {});
|
||||
fs.rmSync(dataDir, { recursive: true, force: true });
|
||||
cleanupEmbeddedPostgresTestDirs(dataDir);
|
||||
throw new Error(
|
||||
`Failed to start embedded PostgreSQL test database: ${formatEmbeddedPostgresError(error)}`,
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue