mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-17 11:20:37 +09:00
[codex] Add run liveness continuations (#4083)
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies. > - Heartbeat runs are the control-plane record of each agent execution window. > - Long-running local agents can exhaust context or stop while still holding useful next-step state. > - Operators need that stop reason, next action, and continuation path to be durable and visible. > - This pull request adds run liveness metadata, continuation summaries, and UI surfaces for issue run ledgers. > - The benefit is that interrupted or long-running work can resume with clearer context instead of losing the agent's last useful handoff. ## What Changed - Added heartbeat-run liveness fields, continuation attempt tracking, and an idempotent `0058` migration. - Added server services and tests for run liveness, continuation summaries, stop metadata, and activity backfill. - Wired local and HTTP adapters to surface continuation/liveness context through shared adapter utilities. - Added shared constants, validators, and heartbeat types for liveness continuation state. - Added issue-detail UI surfaces for continuation handoffs and the run ledger, with component tests. - Updated agent runtime docs, heartbeat protocol docs, prompt guidance, onboarding assets, and skills instructions to explain continuation behavior. - Addressed Greptile feedback by scoping document evidence by run, excluding system continuation-summary documents from liveness evidence, importing shared liveness types, surfacing hidden ledger run counts, documenting bounded retry behavior, and moving run-ledger liveness backfill off the request path. ## Verification - `pnpm exec vitest run packages/adapter-utils/src/server-utils.test.ts server/src/__tests__/run-continuations.test.ts server/src/__tests__/run-liveness.test.ts server/src/__tests__/activity-service.test.ts server/src/__tests__/documents-service.test.ts server/src/__tests__/issue-continuation-summary.test.ts server/src/services/heartbeat-stop-metadata.test.ts ui/src/components/IssueRunLedger.test.tsx ui/src/components/IssueContinuationHandoff.test.tsx ui/src/components/IssueDocumentsSection.test.tsx` - `pnpm --filter @paperclipai/db build` - `pnpm exec vitest run server/src/__tests__/activity-service.test.ts ui/src/components/IssueRunLedger.test.tsx` - `pnpm --filter @paperclipai/ui typecheck` - `pnpm --filter @paperclipai/server typecheck` - `pnpm exec vitest run server/src/__tests__/activity-service.test.ts server/src/__tests__/run-continuations.test.ts ui/src/components/IssueRunLedger.test.tsx` - `pnpm exec vitest run server/src/__tests__/heartbeat-process-recovery.test.ts -t "treats a plan document update"` - `pnpm exec vitest run server/src/__tests__/activity-service.test.ts server/src/__tests__/heartbeat-process-recovery.test.ts -t "activity service|treats a plan document update"` - Remote PR checks on head `e53b1a1d`: `verify`, `e2e`, `policy`, and Snyk all passed. - Confirmed `public-gh/master` is an ancestor of this branch after fetching `public-gh master`. - Confirmed `pnpm-lock.yaml` is not included in the branch diff. - Confirmed migration `0058_wealthy_starbolt.sql` is ordered after `0057` and uses `IF NOT EXISTS` guards for repeat application. - Greptile inline review threads are resolved. ## Risks - Medium risk: this touches heartbeat execution, liveness recovery, activity rendering, issue routes, shared contracts, docs, and UI. - Migration risk is mitigated by additive columns/indexes and idempotent guards. - Run-ledger liveness backfill is now asynchronous, so the first ledger response can briefly show historical missing liveness until the background backfill completes. - UI screenshot coverage is not included in this packaging pass; validation is currently through focused component tests. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5.4, local tool-use coding agent with terminal, git, GitHub connector, GitHub CLI, and Paperclip API access. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge Screenshot note: no before/after screenshots were captured in this PR packaging pass; the UI changes are covered by focused component tests listed above. --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
parent
b9a80dcf22
commit
236d11d36f
71 changed files with 18254 additions and 85 deletions
|
|
@ -4,19 +4,25 @@ import { execFile as execFileCallback } from "node:child_process";
|
|||
import { promisify } from "node:util";
|
||||
import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, or, sql } from "drizzle-orm";
|
||||
import type { Db } from "@paperclipai/db";
|
||||
import type { BillingType, ExecutionWorkspace, ExecutionWorkspaceConfig } from "@paperclipai/shared";
|
||||
import { ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY } from "@paperclipai/shared";
|
||||
import type { BillingType, ExecutionWorkspace, ExecutionWorkspaceConfig, RunLivenessState } from "@paperclipai/shared";
|
||||
import {
|
||||
agents,
|
||||
agentRuntimeState,
|
||||
agentTaskSessions,
|
||||
agentWakeupRequests,
|
||||
activityLog,
|
||||
companySkills as companySkillsTable,
|
||||
documentRevisions,
|
||||
issueDocuments,
|
||||
heartbeatRunEvents,
|
||||
heartbeatRuns,
|
||||
issueComments,
|
||||
issues,
|
||||
issueWorkProducts,
|
||||
projects,
|
||||
projectWorkspaces,
|
||||
workspaceOperations,
|
||||
} from "@paperclipai/db";
|
||||
import { conflict, HttpError, notFound } from "../errors.js";
|
||||
import { logger } from "../middleware/logger.js";
|
||||
|
|
@ -40,6 +46,14 @@ import {
|
|||
HEARTBEAT_RUN_SAFE_RESULT_JSON_MAX_BYTES,
|
||||
mergeHeartbeatRunResultJson,
|
||||
} from "./heartbeat-run-summary.js";
|
||||
import {
|
||||
buildHeartbeatRunStopMetadata,
|
||||
mergeHeartbeatRunStopMetadata,
|
||||
} from "./heartbeat-stop-metadata.js";
|
||||
import {
|
||||
classifyRunLiveness,
|
||||
type RunLivenessClassificationInput,
|
||||
} from "./run-liveness.js";
|
||||
import { logActivity, type LogActivityInput } from "./activity-log.js";
|
||||
import {
|
||||
buildWorkspaceReadyComment,
|
||||
|
|
@ -53,6 +67,10 @@ import {
|
|||
sanitizeRuntimeServiceBaseEnv,
|
||||
} from "./workspace-runtime.js";
|
||||
import { issueService } from "./issues.js";
|
||||
import {
|
||||
getIssueContinuationSummaryDocument,
|
||||
refreshIssueContinuationSummary,
|
||||
} from "./issue-continuation-summary.js";
|
||||
import { executionWorkspaceService, mergeExecutionWorkspaceConfig } from "./execution-workspaces.js";
|
||||
import { workspaceOperationService } from "./workspace-operations.js";
|
||||
import { isProcessGroupAlive, terminateLocalService } from "./local-service-supervisor.js";
|
||||
|
|
@ -65,6 +83,13 @@ import {
|
|||
resolveExecutionWorkspaceMode,
|
||||
} from "./execution-workspace-policy.js";
|
||||
import { instanceSettingsService } from "./instance-settings.js";
|
||||
import {
|
||||
RUN_LIVENESS_CONTINUATION_REASON,
|
||||
buildRunLivenessContinuationIdempotencyKey,
|
||||
decideRunLivenessContinuation,
|
||||
findExistingRunLivenessContinuationWake,
|
||||
readContinuationAttempt,
|
||||
} from "./run-continuations.js";
|
||||
import { redactCurrentUserText, redactCurrentUserValue } from "../log-redaction.js";
|
||||
import {
|
||||
hasSessionCompactionThresholds,
|
||||
|
|
@ -397,6 +422,11 @@ const heartbeatRunListColumns = {
|
|||
processStartedAt: heartbeatRuns.processStartedAt,
|
||||
retryOfRunId: heartbeatRuns.retryOfRunId,
|
||||
processLossRetryCount: heartbeatRuns.processLossRetryCount,
|
||||
livenessState: heartbeatRuns.livenessState,
|
||||
livenessReason: heartbeatRuns.livenessReason,
|
||||
continuationAttempt: heartbeatRuns.continuationAttempt,
|
||||
lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt,
|
||||
nextAction: heartbeatRuns.nextAction,
|
||||
createdAt: heartbeatRuns.createdAt,
|
||||
updatedAt: heartbeatRuns.updatedAt,
|
||||
} as const;
|
||||
|
|
@ -490,6 +520,11 @@ const heartbeatRunIssueSummaryColumns = {
|
|||
finishedAt: heartbeatRuns.finishedAt,
|
||||
createdAt: heartbeatRuns.createdAt,
|
||||
agentId: heartbeatRuns.agentId,
|
||||
livenessState: heartbeatRuns.livenessState,
|
||||
livenessReason: heartbeatRuns.livenessReason,
|
||||
continuationAttempt: heartbeatRuns.continuationAttempt,
|
||||
lastUsefulActionAt: heartbeatRuns.lastUsefulActionAt,
|
||||
nextAction: heartbeatRuns.nextAction,
|
||||
issueId: sql<string | null>`${heartbeatRuns.contextSnapshot} ->> 'issueId'`.as("issueId"),
|
||||
} as const;
|
||||
|
||||
|
|
@ -1204,6 +1239,14 @@ async function buildPaperclipWakePayload(input: {
|
|||
db: Db;
|
||||
companyId: string;
|
||||
contextSnapshot: Record<string, unknown>;
|
||||
continuationSummary?:
|
||||
| {
|
||||
key: string;
|
||||
title: string | null;
|
||||
body: string;
|
||||
updatedAt: Date;
|
||||
}
|
||||
| null;
|
||||
issueSummary?:
|
||||
| {
|
||||
id: string;
|
||||
|
|
@ -1217,6 +1260,7 @@ async function buildPaperclipWakePayload(input: {
|
|||
const executionStage = parseObject(input.contextSnapshot.executionStage);
|
||||
const commentIds = extractWakeCommentIds(input.contextSnapshot);
|
||||
const issueId = readNonEmptyString(input.contextSnapshot.issueId);
|
||||
const continuationSummary = input.continuationSummary ?? null;
|
||||
const issueSummary =
|
||||
input.issueSummary ??
|
||||
(issueId
|
||||
|
|
@ -1309,8 +1353,37 @@ async function buildPaperclipWakePayload(input: {
|
|||
priority: issueSummary.priority,
|
||||
}
|
||||
: null,
|
||||
childIssueSummaries: Array.isArray(input.contextSnapshot.childIssueSummaries)
|
||||
? input.contextSnapshot.childIssueSummaries
|
||||
: [],
|
||||
childIssueSummaryTruncated: input.contextSnapshot.childIssueSummaryTruncated === true,
|
||||
livenessContinuation: readNonEmptyString(input.contextSnapshot.livenessContinuationState) ||
|
||||
readNonEmptyString(input.contextSnapshot.livenessContinuationInstruction) ||
|
||||
readNonEmptyString(input.contextSnapshot.livenessContinuationSourceRunId) ||
|
||||
typeof input.contextSnapshot.livenessContinuationAttempt === "number"
|
||||
? {
|
||||
attempt: input.contextSnapshot.livenessContinuationAttempt,
|
||||
maxAttempts: input.contextSnapshot.livenessContinuationMaxAttempts,
|
||||
sourceRunId: readNonEmptyString(input.contextSnapshot.livenessContinuationSourceRunId),
|
||||
state: readNonEmptyString(input.contextSnapshot.livenessContinuationState),
|
||||
reason: readNonEmptyString(input.contextSnapshot.livenessContinuationReason),
|
||||
instruction: readNonEmptyString(input.contextSnapshot.livenessContinuationInstruction),
|
||||
}
|
||||
: null,
|
||||
checkedOutByHarness: input.contextSnapshot[PAPERCLIP_HARNESS_CHECKOUT_KEY] === true,
|
||||
executionStage: Object.keys(executionStage).length > 0 ? executionStage : null,
|
||||
continuationSummary: continuationSummary
|
||||
? {
|
||||
key: continuationSummary.key,
|
||||
title: continuationSummary.title,
|
||||
body:
|
||||
continuationSummary.body.length > 4_000
|
||||
? continuationSummary.body.slice(0, 4_000)
|
||||
: continuationSummary.body,
|
||||
bodyTruncated: continuationSummary.body.length > 4_000,
|
||||
updatedAt: continuationSummary.updatedAt.toISOString(),
|
||||
}
|
||||
: null,
|
||||
commentIds,
|
||||
latestCommentId: commentIds[commentIds.length - 1] ?? null,
|
||||
comments,
|
||||
|
|
@ -1643,6 +1716,7 @@ export function heartbeatService(db: Db) {
|
|||
agent: typeof agents.$inferSelect;
|
||||
sessionId: string | null;
|
||||
issueId: string | null;
|
||||
continuationSummaryBody?: string | null;
|
||||
}): Promise<SessionCompactionDecision> {
|
||||
const { agent, sessionId, issueId } = input;
|
||||
if (!sessionId) {
|
||||
|
|
@ -1746,6 +1820,9 @@ export function heartbeatService(db: Db) {
|
|||
issueId ? `- Issue: ${issueId}` : "",
|
||||
`- Rotation reason: ${reason}`,
|
||||
latestTextSummary ? `- Last run summary: ${latestTextSummary}` : "",
|
||||
input.continuationSummaryBody
|
||||
? `- Issue continuation summary: ${input.continuationSummaryBody.slice(0, 1_500)}`
|
||||
: "",
|
||||
"Continue from the current task state. Rebuild only the minimum context you need.",
|
||||
]
|
||||
.filter(Boolean)
|
||||
|
|
@ -2170,6 +2247,136 @@ export function heartbeatService(db: Db) {
|
|||
.where(eq(agentWakeupRequests.id, wakeupRequestId));
|
||||
}
|
||||
|
||||
async function addContinuationExhaustedCommentOnce(input: {
|
||||
run: typeof heartbeatRuns.$inferSelect;
|
||||
issueId: string;
|
||||
comment: string;
|
||||
}) {
|
||||
const existing = await db
|
||||
.select({ id: issueComments.id })
|
||||
.from(issueComments)
|
||||
.where(
|
||||
and(
|
||||
eq(issueComments.companyId, input.run.companyId),
|
||||
eq(issueComments.issueId, input.issueId),
|
||||
eq(issueComments.createdByRunId, input.run.id),
|
||||
sql`${issueComments.body} like 'Bounded liveness continuation exhausted%'`,
|
||||
),
|
||||
)
|
||||
.limit(1)
|
||||
.then((rows) => rows[0] ?? null);
|
||||
if (existing) return;
|
||||
await issuesSvc.addComment(input.issueId, input.comment, {
|
||||
agentId: input.run.agentId,
|
||||
runId: input.run.id,
|
||||
});
|
||||
}
|
||||
|
||||
async function handleRunLivenessContinuation(run: typeof heartbeatRuns.$inferSelect) {
|
||||
const livenessState = run.livenessState as RunLivenessState | null;
|
||||
if (livenessState !== "plan_only" && livenessState !== "empty_response") return;
|
||||
|
||||
const context = parseObject(run.contextSnapshot);
|
||||
const issueId = readNonEmptyString(context.issueId);
|
||||
if (!issueId) return;
|
||||
|
||||
const [issue, agent] = await Promise.all([
|
||||
db
|
||||
.select({
|
||||
id: issues.id,
|
||||
companyId: issues.companyId,
|
||||
identifier: issues.identifier,
|
||||
title: issues.title,
|
||||
status: issues.status,
|
||||
assigneeAgentId: issues.assigneeAgentId,
|
||||
executionState: issues.executionState,
|
||||
projectId: issues.projectId,
|
||||
})
|
||||
.from(issues)
|
||||
.where(and(eq(issues.id, issueId), eq(issues.companyId, run.companyId)))
|
||||
.then((rows) => rows[0] ?? null),
|
||||
db
|
||||
.select({
|
||||
id: agents.id,
|
||||
companyId: agents.companyId,
|
||||
status: agents.status,
|
||||
})
|
||||
.from(agents)
|
||||
.where(eq(agents.id, run.agentId))
|
||||
.then((rows) => rows[0] ?? null),
|
||||
]);
|
||||
|
||||
const budgetBlock =
|
||||
issue && agent
|
||||
? await budgets.getInvocationBlock(issue.companyId, agent.id, {
|
||||
issueId: issue.id,
|
||||
projectId: issue.projectId,
|
||||
})
|
||||
: null;
|
||||
|
||||
const nextAttempt = readContinuationAttempt(run.continuationAttempt) + 1;
|
||||
const idempotencyKey = issue
|
||||
? buildRunLivenessContinuationIdempotencyKey({
|
||||
issueId: issue.id,
|
||||
sourceRunId: run.id,
|
||||
livenessState,
|
||||
nextAttempt,
|
||||
})
|
||||
: null;
|
||||
const existingWake = idempotencyKey
|
||||
? await findExistingRunLivenessContinuationWake(db, {
|
||||
companyId: run.companyId,
|
||||
idempotencyKey,
|
||||
})
|
||||
: null;
|
||||
|
||||
const decision = decideRunLivenessContinuation({
|
||||
run,
|
||||
issue,
|
||||
agent,
|
||||
livenessState,
|
||||
livenessReason: run.livenessReason,
|
||||
nextAction: run.nextAction,
|
||||
budgetBlocked: Boolean(budgetBlock),
|
||||
idempotentWakeExists: Boolean(existingWake),
|
||||
});
|
||||
|
||||
if (decision.kind === "exhausted") {
|
||||
await setRunStatus(run.id, run.status, {
|
||||
livenessReason: `${run.livenessReason ?? "Run ended without concrete progress"}; continuation attempts exhausted`,
|
||||
});
|
||||
await addContinuationExhaustedCommentOnce({
|
||||
run,
|
||||
issueId,
|
||||
comment: decision.comment,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
if (decision.kind !== "enqueue") return;
|
||||
|
||||
const continuationRun = await enqueueWakeup(run.agentId, {
|
||||
source: "automation",
|
||||
triggerDetail: "system",
|
||||
reason: RUN_LIVENESS_CONTINUATION_REASON,
|
||||
payload: decision.payload,
|
||||
contextSnapshot: decision.contextSnapshot,
|
||||
idempotencyKey: decision.idempotencyKey,
|
||||
requestedByActorType: "system",
|
||||
requestedByActorId: "heartbeat",
|
||||
});
|
||||
|
||||
if (continuationRun) {
|
||||
await db
|
||||
.update(heartbeatRuns)
|
||||
.set({
|
||||
continuationAttempt: decision.nextAttempt,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(heartbeatRuns.id, continuationRun.id));
|
||||
}
|
||||
}
|
||||
|
||||
async function appendRunEvent(
|
||||
run: typeof heartbeatRuns.$inferSelect,
|
||||
seq: number,
|
||||
|
|
@ -2298,6 +2505,47 @@ export function heartbeatService(db: Db) {
|
|||
.then((rows) => rows[0] ?? null);
|
||||
}
|
||||
|
||||
async function refreshContinuationSummaryForRun(
|
||||
run: typeof heartbeatRuns.$inferSelect,
|
||||
agent: typeof agents.$inferSelect,
|
||||
) {
|
||||
const contextSnapshot = parseObject(run.contextSnapshot);
|
||||
const issueId = readNonEmptyString(contextSnapshot.issueId);
|
||||
if (!issueId) return null;
|
||||
try {
|
||||
return await refreshIssueContinuationSummary({
|
||||
db,
|
||||
issueId,
|
||||
run: {
|
||||
id: run.id,
|
||||
status: run.status,
|
||||
error: run.error,
|
||||
errorCode: run.errorCode,
|
||||
resultJson: run.resultJson as Record<string, unknown> | null,
|
||||
stdoutExcerpt: run.stdoutExcerpt,
|
||||
stderrExcerpt: run.stderrExcerpt,
|
||||
finishedAt: run.finishedAt,
|
||||
},
|
||||
agent: {
|
||||
id: agent.id,
|
||||
name: agent.name,
|
||||
adapterType: agent.adapterType,
|
||||
},
|
||||
});
|
||||
} catch (err) {
|
||||
logger.warn(
|
||||
{
|
||||
err,
|
||||
runId: run.id,
|
||||
issueId,
|
||||
agentId: agent.id,
|
||||
},
|
||||
"failed to refresh issue continuation summary",
|
||||
);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async function enqueueMissingIssueCommentRetry(
|
||||
run: typeof heartbeatRuns.$inferSelect,
|
||||
agent: typeof agents.$inferSelect,
|
||||
|
|
@ -2737,6 +2985,194 @@ export function heartbeatService(db: Db) {
|
|||
}
|
||||
}
|
||||
|
||||
function mergeRunStopMetadataForAgent(
|
||||
agent: Pick<typeof agents.$inferSelect, "adapterType" | "adapterConfig">,
|
||||
outcome: "succeeded" | "failed" | "cancelled" | "timed_out",
|
||||
options?: {
|
||||
resultJson?: Record<string, unknown> | null;
|
||||
errorCode?: string | null;
|
||||
errorMessage?: string | null;
|
||||
},
|
||||
) {
|
||||
const stopMetadata = buildHeartbeatRunStopMetadata({
|
||||
adapterType: agent.adapterType,
|
||||
adapterConfig: parseObject(agent.adapterConfig),
|
||||
outcome,
|
||||
errorCode: options?.errorCode ?? null,
|
||||
errorMessage: options?.errorMessage ?? null,
|
||||
});
|
||||
return mergeHeartbeatRunStopMetadata(options?.resultJson ?? null, stopMetadata);
|
||||
}
|
||||
|
||||
function countValue(value: unknown) {
|
||||
const parsed = Number(value ?? 0);
|
||||
return Number.isFinite(parsed) ? Math.max(0, Math.floor(parsed)) : 0;
|
||||
}
|
||||
|
||||
function dateValue(value: unknown) {
|
||||
if (value instanceof Date) return Number.isNaN(value.getTime()) ? null : value;
|
||||
if (typeof value === "string" || typeof value === "number") {
|
||||
const parsed = new Date(value);
|
||||
return Number.isNaN(parsed.getTime()) ? null : parsed;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function latestDate(...values: unknown[]) {
|
||||
let latest: Date | null = null;
|
||||
for (const value of values) {
|
||||
const parsed = dateValue(value);
|
||||
if (!parsed) continue;
|
||||
if (!latest || parsed.getTime() > latest.getTime()) latest = parsed;
|
||||
}
|
||||
return latest;
|
||||
}
|
||||
|
||||
async function buildRunLivenessInput(
|
||||
run: typeof heartbeatRuns.$inferSelect,
|
||||
resultJson: Record<string, unknown> | null | undefined,
|
||||
): Promise<RunLivenessClassificationInput> {
|
||||
const context = parseObject(run.contextSnapshot);
|
||||
const contextIssueId = readNonEmptyString(context.issueId);
|
||||
const continuationAttempt = asNumber(context.continuationAttempt, run.continuationAttempt ?? 0);
|
||||
|
||||
const issue = contextIssueId
|
||||
? await db
|
||||
.select({
|
||||
status: issues.status,
|
||||
title: issues.title,
|
||||
description: issues.description,
|
||||
})
|
||||
.from(issues)
|
||||
.where(and(eq(issues.companyId, run.companyId), eq(issues.id, contextIssueId)))
|
||||
.then((rows) => rows[0] ?? null)
|
||||
: null;
|
||||
|
||||
const [commentStats] = contextIssueId
|
||||
? await db
|
||||
.select({
|
||||
count: sql<number>`count(*)::int`,
|
||||
latestAt: sql<Date | null>`max(${issueComments.createdAt})`,
|
||||
})
|
||||
.from(issueComments)
|
||||
.where(
|
||||
and(
|
||||
eq(issueComments.companyId, run.companyId),
|
||||
eq(issueComments.issueId, contextIssueId),
|
||||
eq(issueComments.createdByRunId, run.id),
|
||||
),
|
||||
)
|
||||
: [{ count: 0, latestAt: null }];
|
||||
|
||||
const [documentStats] = contextIssueId
|
||||
? await db
|
||||
.select({
|
||||
count: sql<number>`count(*)::int`,
|
||||
planCount: sql<number>`count(*) filter (where ${issueDocuments.key} = 'plan')::int`,
|
||||
latestAt: sql<Date | null>`max(${documentRevisions.createdAt})`,
|
||||
})
|
||||
.from(documentRevisions)
|
||||
.innerJoin(issueDocuments, eq(documentRevisions.documentId, issueDocuments.documentId))
|
||||
.where(
|
||||
and(
|
||||
eq(documentRevisions.companyId, run.companyId),
|
||||
eq(documentRevisions.createdByRunId, run.id),
|
||||
eq(issueDocuments.companyId, run.companyId),
|
||||
eq(issueDocuments.issueId, contextIssueId),
|
||||
sql`${issueDocuments.key} != ${ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY}`,
|
||||
),
|
||||
)
|
||||
: [{ count: 0, planCount: 0, latestAt: null }];
|
||||
|
||||
const [workProductStats] = contextIssueId
|
||||
? await db
|
||||
.select({
|
||||
count: sql<number>`count(*)::int`,
|
||||
latestAt: sql<Date | null>`max(${issueWorkProducts.createdAt})`,
|
||||
})
|
||||
.from(issueWorkProducts)
|
||||
.where(
|
||||
and(
|
||||
eq(issueWorkProducts.companyId, run.companyId),
|
||||
eq(issueWorkProducts.issueId, contextIssueId),
|
||||
eq(issueWorkProducts.createdByRunId, run.id),
|
||||
),
|
||||
)
|
||||
: [{ count: 0, latestAt: null }];
|
||||
|
||||
const [workspaceOperationStats] = await db
|
||||
.select({
|
||||
count: sql<number>`count(*)::int`,
|
||||
latestAt: sql<Date | null>`max(${workspaceOperations.startedAt})`,
|
||||
})
|
||||
.from(workspaceOperations)
|
||||
.where(and(eq(workspaceOperations.companyId, run.companyId), eq(workspaceOperations.heartbeatRunId, run.id)));
|
||||
|
||||
const [activityStats] = await db
|
||||
.select({
|
||||
count: sql<number>`count(*)::int`,
|
||||
latestAt: sql<Date | null>`max(${activityLog.createdAt})`,
|
||||
})
|
||||
.from(activityLog)
|
||||
.where(and(eq(activityLog.companyId, run.companyId), eq(activityLog.runId, run.id)));
|
||||
|
||||
const [eventStats] = await db
|
||||
.select({
|
||||
count: sql<number>`count(*) filter (where ${heartbeatRunEvents.eventType} not in ('lifecycle', 'adapter.invoke', 'error'))::int`,
|
||||
latestAt: sql<Date | null>`max(${heartbeatRunEvents.createdAt}) filter (where ${heartbeatRunEvents.eventType} not in ('lifecycle', 'adapter.invoke', 'error'))`,
|
||||
})
|
||||
.from(heartbeatRunEvents)
|
||||
.where(and(eq(heartbeatRunEvents.companyId, run.companyId), eq(heartbeatRunEvents.runId, run.id)));
|
||||
|
||||
return {
|
||||
runStatus: run.status,
|
||||
issue,
|
||||
resultJson: resultJson ?? run.resultJson ?? null,
|
||||
stdoutExcerpt: run.stdoutExcerpt ?? null,
|
||||
stderrExcerpt: run.stderrExcerpt ?? null,
|
||||
error: run.error ?? null,
|
||||
errorCode: run.errorCode ?? null,
|
||||
continuationAttempt,
|
||||
evidence: {
|
||||
issueCommentsCreated: countValue(commentStats?.count),
|
||||
documentRevisionsCreated: countValue(documentStats?.count),
|
||||
planDocumentRevisionsCreated: countValue(documentStats?.planCount),
|
||||
workProductsCreated: countValue(workProductStats?.count),
|
||||
workspaceOperationsCreated: countValue(workspaceOperationStats?.count),
|
||||
activityEventsCreated: countValue(activityStats?.count),
|
||||
toolOrActionEventsCreated: countValue(eventStats?.count),
|
||||
latestEvidenceAt: latestDate(
|
||||
commentStats?.latestAt,
|
||||
documentStats?.latestAt,
|
||||
workProductStats?.latestAt,
|
||||
workspaceOperationStats?.latestAt,
|
||||
activityStats?.latestAt,
|
||||
eventStats?.latestAt,
|
||||
),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
async function classifyAndPersistRunLiveness(
|
||||
run: typeof heartbeatRuns.$inferSelect,
|
||||
resultJson?: Record<string, unknown> | null,
|
||||
) {
|
||||
const classification = classifyRunLiveness(await buildRunLivenessInput(run, resultJson));
|
||||
return db
|
||||
.update(heartbeatRuns)
|
||||
.set({
|
||||
livenessState: classification.livenessState,
|
||||
livenessReason: classification.livenessReason,
|
||||
continuationAttempt: classification.continuationAttempt,
|
||||
lastUsefulActionAt: classification.lastUsefulActionAt,
|
||||
nextAction: classification.nextAction,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(eq(heartbeatRuns.id, run.id))
|
||||
.returning()
|
||||
.then((rows) => rows[0] ?? null);
|
||||
}
|
||||
|
||||
async function reapOrphanedRuns(opts?: { staleThresholdMs?: number }) {
|
||||
const staleThresholdMs = opts?.staleThresholdMs ?? 0;
|
||||
const now = new Date();
|
||||
|
|
@ -2746,6 +3182,7 @@ export function heartbeatService(db: Db) {
|
|||
.select({
|
||||
run: heartbeatRuns,
|
||||
adapterType: agents.adapterType,
|
||||
adapterConfig: agents.adapterConfig,
|
||||
})
|
||||
.from(heartbeatRuns)
|
||||
.innerJoin(agents, eq(heartbeatRuns.agentId, agents.id))
|
||||
|
|
@ -2753,7 +3190,7 @@ export function heartbeatService(db: Db) {
|
|||
|
||||
const reaped: string[] = [];
|
||||
|
||||
for (const { run, adapterType } of activeRuns) {
|
||||
for (const { run, adapterType, adapterConfig } of activeRuns) {
|
||||
if (runningProcesses.has(run.id) || activeRunExecutions.has(run.id)) continue;
|
||||
|
||||
// Apply staleness threshold to avoid false positives
|
||||
|
|
@ -2803,6 +3240,15 @@ export function heartbeatService(db: Db) {
|
|||
error: shouldRetry ? `${baseMessage}; retrying once` : baseMessage,
|
||||
errorCode: "process_lost",
|
||||
finishedAt: now,
|
||||
resultJson: mergeRunStopMetadataForAgent(
|
||||
{ adapterType, adapterConfig },
|
||||
"failed",
|
||||
{
|
||||
resultJson: parseObject(run.resultJson),
|
||||
errorCode: "process_lost",
|
||||
errorMessage: shouldRetry ? `${baseMessage}; retrying once` : baseMessage,
|
||||
},
|
||||
),
|
||||
});
|
||||
await setWakeupStatus(run.wakeupRequestId, "failed", {
|
||||
finishedAt: now,
|
||||
|
|
@ -2810,6 +3256,7 @@ export function heartbeatService(db: Db) {
|
|||
});
|
||||
if (!finalizedRun) finalizedRun = await getRun(run.id);
|
||||
if (!finalizedRun) continue;
|
||||
finalizedRun = await classifyAndPersistRunLiveness(finalizedRun, parseObject(finalizedRun.resultJson)) ?? finalizedRun;
|
||||
|
||||
let retriedRun: typeof heartbeatRuns.$inferSelect | null = null;
|
||||
if (shouldRetry) {
|
||||
|
|
@ -3340,10 +3787,24 @@ export function heartbeatService(db: Db) {
|
|||
executionWorkspacePreference: issueContext.executionWorkspacePreference,
|
||||
}
|
||||
: null;
|
||||
const continuationSummary = issueRef
|
||||
? await getIssueContinuationSummaryDocument(db, issueRef.id)
|
||||
: null;
|
||||
if (continuationSummary) {
|
||||
context.paperclipContinuationSummary = {
|
||||
key: continuationSummary.key,
|
||||
title: continuationSummary.title,
|
||||
body: continuationSummary.body,
|
||||
updatedAt: continuationSummary.updatedAt.toISOString(),
|
||||
};
|
||||
} else {
|
||||
delete context.paperclipContinuationSummary;
|
||||
}
|
||||
const paperclipWakePayload = await buildPaperclipWakePayload({
|
||||
db,
|
||||
companyId: agent.companyId,
|
||||
contextSnapshot: context,
|
||||
continuationSummary,
|
||||
issueSummary: issueRef
|
||||
? {
|
||||
id: issueRef.id,
|
||||
|
|
@ -3656,6 +4117,7 @@ export function heartbeatService(db: Db) {
|
|||
agent,
|
||||
sessionId: previousSessionDisplayId ?? runtimeSessionIdForAdapter,
|
||||
issueId,
|
||||
continuationSummaryBody: continuationSummary?.body ?? null,
|
||||
});
|
||||
if (sessionCompaction.rotate) {
|
||||
context.paperclipSessionHandoffMarkdown = sessionCompaction.handoffMarkdown;
|
||||
|
|
@ -3962,6 +4424,23 @@ export function heartbeatService(db: Db) {
|
|||
} else {
|
||||
outcome = "failed";
|
||||
}
|
||||
const runErrorMessage =
|
||||
outcome === "cancelled"
|
||||
? (latestRun?.error ?? adapterResult.errorMessage ?? "Cancelled")
|
||||
: outcome === "succeeded"
|
||||
? null
|
||||
: redactCurrentUserText(
|
||||
adapterResult.errorMessage ?? (outcome === "timed_out" ? "Timed out" : "Adapter failed"),
|
||||
currentUserRedactionOptions,
|
||||
);
|
||||
const runErrorCode =
|
||||
outcome === "timed_out"
|
||||
? "timeout"
|
||||
: outcome === "cancelled"
|
||||
? (latestRun?.errorCode ?? "cancelled")
|
||||
: outcome === "failed"
|
||||
? (adapterResult.errorCode ?? "adapter_failed")
|
||||
: null;
|
||||
|
||||
let logSummary: { bytes: number; sha256?: string; compressed: boolean } | null = null;
|
||||
if (handle) {
|
||||
|
|
@ -4004,27 +4483,18 @@ export function heartbeatService(db: Db) {
|
|||
: null;
|
||||
|
||||
const persistedResultJson = mergeHeartbeatRunResultJson(
|
||||
adapterResult.resultJson ?? null,
|
||||
mergeRunStopMetadataForAgent(agent, outcome, {
|
||||
resultJson: adapterResult.resultJson ?? null,
|
||||
errorCode: runErrorCode,
|
||||
errorMessage: runErrorMessage,
|
||||
}),
|
||||
adapterResult.summary ?? null,
|
||||
);
|
||||
|
||||
await setRunStatus(run.id, status, {
|
||||
let persistedRun = await setRunStatus(run.id, status, {
|
||||
finishedAt: new Date(),
|
||||
error:
|
||||
outcome === "succeeded"
|
||||
? null
|
||||
: redactCurrentUserText(
|
||||
adapterResult.errorMessage ?? (outcome === "timed_out" ? "Timed out" : "Adapter failed"),
|
||||
currentUserRedactionOptions,
|
||||
),
|
||||
errorCode:
|
||||
outcome === "timed_out"
|
||||
? "timeout"
|
||||
: outcome === "cancelled"
|
||||
? "cancelled"
|
||||
: outcome === "failed"
|
||||
? (adapterResult.errorCode ?? "adapter_failed")
|
||||
: null,
|
||||
error: runErrorMessage,
|
||||
errorCode: runErrorCode,
|
||||
exitCode: adapterResult.exitCode,
|
||||
signal: adapterResult.signal,
|
||||
usageJson,
|
||||
|
|
@ -4036,13 +4506,16 @@ export function heartbeatService(db: Db) {
|
|||
logSha256: logSummary?.sha256,
|
||||
logCompressed: logSummary?.compressed ?? false,
|
||||
});
|
||||
if (persistedRun) {
|
||||
persistedRun = await classifyAndPersistRunLiveness(persistedRun, persistedResultJson) ?? persistedRun;
|
||||
}
|
||||
|
||||
await setWakeupStatus(run.wakeupRequestId, outcome === "succeeded" ? "completed" : status, {
|
||||
finishedAt: new Date(),
|
||||
error: adapterResult.errorMessage ?? null,
|
||||
error: runErrorMessage,
|
||||
});
|
||||
|
||||
const finalizedRun = await getRun(run.id);
|
||||
const finalizedRun = persistedRun ?? (await getRun(run.id));
|
||||
if (finalizedRun) {
|
||||
await appendRunEvent(finalizedRun, seq++, {
|
||||
eventType: "lifecycle",
|
||||
|
|
@ -4054,13 +4527,15 @@ export function heartbeatService(db: Db) {
|
|||
exitCode: adapterResult.exitCode,
|
||||
},
|
||||
});
|
||||
const livenessRun = finalizedRun;
|
||||
await refreshContinuationSummaryForRun(livenessRun, agent);
|
||||
if (issueId && outcome === "succeeded") {
|
||||
try {
|
||||
const existingRunComment = await findRunIssueComment(finalizedRun.id, finalizedRun.companyId, issueId);
|
||||
const existingRunComment = await findRunIssueComment(livenessRun.id, livenessRun.companyId, issueId);
|
||||
if (!existingRunComment) {
|
||||
const issueComment = buildHeartbeatRunIssueComment(persistedResultJson);
|
||||
if (issueComment) {
|
||||
await issuesSvc.addComment(issueId, issueComment, { agentId: agent.id, runId: finalizedRun.id });
|
||||
await issuesSvc.addComment(issueId, issueComment, { agentId: agent.id, runId: livenessRun.id });
|
||||
}
|
||||
}
|
||||
} catch (err) {
|
||||
|
|
@ -4070,8 +4545,9 @@ export function heartbeatService(db: Db) {
|
|||
);
|
||||
}
|
||||
}
|
||||
await finalizeIssueCommentPolicy(finalizedRun, agent);
|
||||
await releaseIssueExecutionAndPromote(finalizedRun);
|
||||
await finalizeIssueCommentPolicy(livenessRun, agent);
|
||||
await releaseIssueExecutionAndPromote(livenessRun);
|
||||
await handleRunLivenessContinuation(livenessRun);
|
||||
}
|
||||
|
||||
if (finalizedRun) {
|
||||
|
|
@ -4119,6 +4595,10 @@ export function heartbeatService(db: Db) {
|
|||
error: message,
|
||||
errorCode: "adapter_failed",
|
||||
finishedAt: new Date(),
|
||||
resultJson: mergeRunStopMetadataForAgent(agent, "failed", {
|
||||
errorCode: "adapter_failed",
|
||||
errorMessage: message,
|
||||
}),
|
||||
stdoutExcerpt,
|
||||
stderrExcerpt,
|
||||
logBytes: logSummary?.bytes,
|
||||
|
|
@ -4137,10 +4617,12 @@ export function heartbeatService(db: Db) {
|
|||
level: "error",
|
||||
message,
|
||||
});
|
||||
await finalizeIssueCommentPolicy(failedRun, agent);
|
||||
await releaseIssueExecutionAndPromote(failedRun);
|
||||
const livenessRun = await classifyAndPersistRunLiveness(failedRun) ?? failedRun;
|
||||
await refreshContinuationSummaryForRun(livenessRun, agent);
|
||||
await finalizeIssueCommentPolicy(livenessRun, agent);
|
||||
await releaseIssueExecutionAndPromote(livenessRun);
|
||||
|
||||
await updateRuntimeState(agent, failedRun, {
|
||||
await updateRuntimeState(agent, livenessRun, {
|
||||
exitCode: null,
|
||||
signal: null,
|
||||
timedOut: false,
|
||||
|
|
@ -4170,10 +4652,17 @@ export function heartbeatService(db: Db) {
|
|||
// The inner catch did not fire, so we must record the failure here.
|
||||
const message = outerErr instanceof Error ? outerErr.message : "Unknown setup failure";
|
||||
logger.error({ err: outerErr, runId }, "heartbeat execution setup failed");
|
||||
const setupFailureAgent = await getAgent(run.agentId).catch(() => null);
|
||||
await setRunStatus(runId, "failed", {
|
||||
error: message,
|
||||
errorCode: "adapter_failed",
|
||||
finishedAt: new Date(),
|
||||
...(setupFailureAgent ? {
|
||||
resultJson: mergeRunStopMetadataForAgent(setupFailureAgent, "failed", {
|
||||
errorCode: "adapter_failed",
|
||||
errorMessage: message,
|
||||
}),
|
||||
} : {}),
|
||||
}).catch(() => undefined);
|
||||
await setWakeupStatus(run.wakeupRequestId, "failed", {
|
||||
finishedAt: new Date(),
|
||||
|
|
@ -4189,11 +4678,13 @@ export function heartbeatService(db: Db) {
|
|||
level: "error",
|
||||
message,
|
||||
}).catch(() => undefined);
|
||||
const failedAgent = await getAgent(run.agentId).catch(() => null);
|
||||
const livenessRun = await classifyAndPersistRunLiveness(failedRun).catch(() => failedRun);
|
||||
const failedAgent = setupFailureAgent ?? await getAgent(run.agentId).catch(() => null);
|
||||
if (failedAgent) {
|
||||
await finalizeIssueCommentPolicy(failedRun, failedAgent).catch(() => undefined);
|
||||
await refreshContinuationSummaryForRun(livenessRun, failedAgent).catch(() => undefined);
|
||||
await finalizeIssueCommentPolicy(livenessRun, failedAgent).catch(() => undefined);
|
||||
}
|
||||
await releaseIssueExecutionAndPromote(failedRun).catch(() => undefined);
|
||||
await releaseIssueExecutionAndPromote(livenessRun).catch(() => undefined);
|
||||
}
|
||||
// Ensure the agent is not left stuck in "running" if the inner catch handler's
|
||||
// DB calls threw (e.g. a transient DB error in finalizeAgentStatus).
|
||||
|
|
@ -4363,6 +4854,9 @@ export function heartbeatService(db: Db) {
|
|||
const sessionBefore =
|
||||
readNonEmptyString(promotedContextSnapshot.resumeSessionDisplayId) ??
|
||||
await resolveSessionBeforeForWakeup(deferredAgent, promotedTaskKey);
|
||||
const promotedContinuationAttempt = readContinuationAttempt(
|
||||
promotedContextSnapshot.livenessContinuationAttempt,
|
||||
);
|
||||
const now = new Date();
|
||||
const newRun = await tx
|
||||
.insert(heartbeatRuns)
|
||||
|
|
@ -4375,6 +4869,7 @@ export function heartbeatService(db: Db) {
|
|||
wakeupRequestId: deferred.id,
|
||||
contextSnapshot: promotedContextSnapshot,
|
||||
sessionIdBefore: sessionBefore,
|
||||
continuationAttempt: promotedContinuationAttempt,
|
||||
})
|
||||
.returning()
|
||||
.then((rows) => rows[0]);
|
||||
|
|
@ -4473,6 +4968,7 @@ export function heartbeatService(db: Db) {
|
|||
const sessionBefore =
|
||||
explicitResumeSession?.sessionDisplayId ??
|
||||
await resolveSessionBeforeForWakeup(agent, effectiveTaskKey);
|
||||
const continuationAttempt = readContinuationAttempt(enrichedContextSnapshot.livenessContinuationAttempt);
|
||||
|
||||
const writeSkippedRequest = async (skipReason: string) => {
|
||||
await db.insert(agentWakeupRequests).values({
|
||||
|
|
@ -4771,6 +5267,7 @@ export function heartbeatService(db: Db) {
|
|||
wakeupRequestId: wakeupRequest.id,
|
||||
contextSnapshot: enrichedContextSnapshot,
|
||||
sessionIdBefore: sessionBefore,
|
||||
continuationAttempt,
|
||||
})
|
||||
.returning()
|
||||
.then((rows) => rows[0]);
|
||||
|
|
@ -4890,6 +5387,7 @@ export function heartbeatService(db: Db) {
|
|||
wakeupRequestId: wakeupRequest.id,
|
||||
contextSnapshot: enrichedContextSnapshot,
|
||||
sessionIdBefore: sessionBefore,
|
||||
continuationAttempt,
|
||||
})
|
||||
.returning()
|
||||
.then((rows) => rows[0]);
|
||||
|
|
@ -5022,6 +5520,7 @@ export function heartbeatService(db: Db) {
|
|||
const run = await getRun(runId);
|
||||
if (!run) throw notFound("Heartbeat run not found");
|
||||
if (run.status !== "running" && run.status !== "queued") return run;
|
||||
const agent = await getAgent(run.agentId);
|
||||
|
||||
const running = runningProcesses.get(run.id);
|
||||
if (running) {
|
||||
|
|
@ -5041,6 +5540,13 @@ export function heartbeatService(db: Db) {
|
|||
finishedAt: new Date(),
|
||||
error: reason,
|
||||
errorCode: "cancelled",
|
||||
...(agent ? {
|
||||
resultJson: mergeRunStopMetadataForAgent(agent, "cancelled", {
|
||||
resultJson: parseObject(run.resultJson),
|
||||
errorCode: "cancelled",
|
||||
errorMessage: reason,
|
||||
}),
|
||||
} : {}),
|
||||
});
|
||||
|
||||
await setWakeupStatus(run.wakeupRequestId, "cancelled", {
|
||||
|
|
@ -5065,6 +5571,7 @@ export function heartbeatService(db: Db) {
|
|||
}
|
||||
|
||||
async function cancelActiveForAgentInternal(agentId: string, reason = "Cancelled due to agent pause") {
|
||||
const agent = await getAgent(agentId);
|
||||
const runs = await db
|
||||
.select()
|
||||
.from(heartbeatRuns)
|
||||
|
|
@ -5075,6 +5582,13 @@ export function heartbeatService(db: Db) {
|
|||
finishedAt: new Date(),
|
||||
error: reason,
|
||||
errorCode: "cancelled",
|
||||
...(agent ? {
|
||||
resultJson: mergeRunStopMetadataForAgent(agent, "cancelled", {
|
||||
resultJson: parseObject(run.resultJson),
|
||||
errorCode: "cancelled",
|
||||
errorMessage: reason,
|
||||
}),
|
||||
} : {}),
|
||||
});
|
||||
|
||||
await setWakeupStatus(run.wakeupRequestId, "cancelled", {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue