mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-17 19:20:39 +09:00
[codex] Add runtime lifecycle recovery and live issue visibility (#4419)
This commit is contained in:
parent
9a8d219949
commit
5a0c1979cf
121 changed files with 9625 additions and 2044 deletions
43
server/src/services/recovery/index.ts
Normal file
43
server/src/services/recovery/index.ts
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
export {
|
||||
RECOVERY_KEY_PREFIXES,
|
||||
RECOVERY_ORIGIN_KINDS,
|
||||
RECOVERY_REASON_KINDS,
|
||||
buildIssueGraphLivenessIncidentKey,
|
||||
buildIssueGraphLivenessLeafKey,
|
||||
parseIssueGraphLivenessIncidentKey,
|
||||
} from "./origins.js";
|
||||
export type {
|
||||
RecoveryKeyPrefix,
|
||||
RecoveryOriginKind,
|
||||
RecoveryReasonKind,
|
||||
} from "./origins.js";
|
||||
export {
|
||||
classifyIssueGraphLiveness,
|
||||
} from "./issue-graph-liveness.js";
|
||||
export type {
|
||||
IssueGraphLivenessInput,
|
||||
IssueLivenessAgentInput,
|
||||
IssueLivenessDependencyPathEntry,
|
||||
IssueLivenessExecutionPathInput,
|
||||
IssueLivenessFinding,
|
||||
IssueLivenessIssueInput,
|
||||
IssueLivenessOwnerCandidate,
|
||||
IssueLivenessOwnerCandidateReason,
|
||||
IssueLivenessRelationInput,
|
||||
IssueLivenessSeverity,
|
||||
IssueLivenessState,
|
||||
} from "./issue-graph-liveness.js";
|
||||
export {
|
||||
recoveryService,
|
||||
} from "./service.js";
|
||||
export {
|
||||
DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS,
|
||||
RUN_LIVENESS_CONTINUATION_REASON,
|
||||
buildRunLivenessContinuationIdempotencyKey,
|
||||
decideRunLivenessContinuation,
|
||||
findExistingRunLivenessContinuationWake,
|
||||
readContinuationAttempt,
|
||||
} from "./run-liveness-continuations.js";
|
||||
export type {
|
||||
RunContinuationDecision,
|
||||
} from "./run-liveness-continuations.js";
|
||||
414
server/src/services/recovery/issue-graph-liveness.ts
Normal file
414
server/src/services/recovery/issue-graph-liveness.ts
Normal file
|
|
@ -0,0 +1,414 @@
|
|||
import { buildIssueGraphLivenessIncidentKey } from "./origins.js";
|
||||
|
||||
export type IssueLivenessSeverity = "warning" | "critical";
|
||||
|
||||
export type IssueLivenessState =
|
||||
| "blocked_by_unassigned_issue"
|
||||
| "blocked_by_uninvokable_assignee"
|
||||
| "blocked_by_cancelled_issue"
|
||||
| "invalid_review_participant";
|
||||
|
||||
export interface IssueLivenessIssueInput {
|
||||
id: string;
|
||||
companyId: string;
|
||||
identifier: string | null;
|
||||
title: string;
|
||||
status: string;
|
||||
projectId?: string | null;
|
||||
goalId?: string | null;
|
||||
parentId?: string | null;
|
||||
assigneeAgentId?: string | null;
|
||||
assigneeUserId?: string | null;
|
||||
createdByAgentId?: string | null;
|
||||
createdByUserId?: string | null;
|
||||
executionState?: Record<string, unknown> | null;
|
||||
}
|
||||
|
||||
export interface IssueLivenessRelationInput {
|
||||
companyId: string;
|
||||
blockerIssueId: string;
|
||||
blockedIssueId: string;
|
||||
}
|
||||
|
||||
export interface IssueLivenessAgentInput {
|
||||
id: string;
|
||||
companyId: string;
|
||||
name: string;
|
||||
role: string;
|
||||
title?: string | null;
|
||||
status: string;
|
||||
reportsTo?: string | null;
|
||||
}
|
||||
|
||||
export interface IssueLivenessExecutionPathInput {
|
||||
companyId: string;
|
||||
issueId: string | null;
|
||||
agentId?: string | null;
|
||||
status: string;
|
||||
}
|
||||
|
||||
export interface IssueLivenessDependencyPathEntry {
|
||||
issueId: string;
|
||||
identifier: string | null;
|
||||
title: string;
|
||||
status: string;
|
||||
}
|
||||
|
||||
export type IssueLivenessOwnerCandidateReason =
|
||||
| "stalled_blocker_assignee"
|
||||
| "assignee_reporting_chain"
|
||||
| "creator_reporting_chain"
|
||||
| "root_agent"
|
||||
| "ordered_invokable_fallback";
|
||||
|
||||
export interface IssueLivenessOwnerCandidate {
|
||||
agentId: string;
|
||||
reason: IssueLivenessOwnerCandidateReason;
|
||||
sourceIssueId: string;
|
||||
}
|
||||
|
||||
export interface IssueLivenessFinding {
|
||||
issueId: string;
|
||||
companyId: string;
|
||||
identifier: string | null;
|
||||
state: IssueLivenessState;
|
||||
severity: IssueLivenessSeverity;
|
||||
reason: string;
|
||||
dependencyPath: IssueLivenessDependencyPathEntry[];
|
||||
recoveryIssueId: string;
|
||||
recommendedOwnerAgentId: string | null;
|
||||
recommendedOwnerCandidateAgentIds: string[];
|
||||
recommendedOwnerCandidates: IssueLivenessOwnerCandidate[];
|
||||
recommendedAction: string;
|
||||
incidentKey: string;
|
||||
}
|
||||
|
||||
export interface IssueGraphLivenessInput {
|
||||
issues: IssueLivenessIssueInput[];
|
||||
relations: IssueLivenessRelationInput[];
|
||||
agents: IssueLivenessAgentInput[];
|
||||
activeRuns?: IssueLivenessExecutionPathInput[];
|
||||
queuedWakeRequests?: IssueLivenessExecutionPathInput[];
|
||||
}
|
||||
|
||||
const INVOKABLE_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]);
|
||||
const BLOCKING_AGENT_STATUSES = new Set(["paused", "terminated", "pending_approval"]);
|
||||
|
||||
function issueLabel(issue: IssueLivenessIssueInput) {
|
||||
return issue.identifier ?? issue.id;
|
||||
}
|
||||
|
||||
function pathEntry(issue: IssueLivenessIssueInput): IssueLivenessDependencyPathEntry {
|
||||
return {
|
||||
issueId: issue.id,
|
||||
identifier: issue.identifier,
|
||||
title: issue.title,
|
||||
status: issue.status,
|
||||
};
|
||||
}
|
||||
|
||||
function isInvokableAgent(agent: IssueLivenessAgentInput | null | undefined) {
|
||||
return Boolean(agent && INVOKABLE_AGENT_STATUSES.has(agent.status));
|
||||
}
|
||||
|
||||
function hasActiveExecutionPath(
|
||||
companyId: string,
|
||||
issueId: string,
|
||||
activeRuns: IssueLivenessExecutionPathInput[],
|
||||
queuedWakeRequests: IssueLivenessExecutionPathInput[],
|
||||
) {
|
||||
return [...activeRuns, ...queuedWakeRequests].some(
|
||||
(entry) => entry.companyId === companyId && entry.issueId === issueId,
|
||||
);
|
||||
}
|
||||
|
||||
function readPrincipalAgentId(principal: unknown): string | null {
|
||||
if (!principal || typeof principal !== "object") return null;
|
||||
const value = principal as Record<string, unknown>;
|
||||
return value.type === "agent" && typeof value.agentId === "string" && value.agentId.length > 0
|
||||
? value.agentId
|
||||
: null;
|
||||
}
|
||||
|
||||
function principalIsResolvableUser(principal: unknown): boolean {
|
||||
if (!principal || typeof principal !== "object") return false;
|
||||
const value = principal as Record<string, unknown>;
|
||||
return value.type === "user" && typeof value.userId === "string" && value.userId.length > 0;
|
||||
}
|
||||
|
||||
function addOwnerCandidate(
|
||||
candidates: IssueLivenessOwnerCandidate[],
|
||||
seen: Set<string>,
|
||||
agentsById: Map<string, IssueLivenessAgentInput>,
|
||||
companyId: string,
|
||||
agentId: string | null | undefined,
|
||||
reason: IssueLivenessOwnerCandidateReason,
|
||||
sourceIssueId: string,
|
||||
) {
|
||||
if (!agentId || seen.has(agentId)) return;
|
||||
const agent = agentsById.get(agentId);
|
||||
if (!agent || agent.companyId !== companyId || !isInvokableAgent(agent)) return;
|
||||
seen.add(agentId);
|
||||
candidates.push({ agentId, reason, sourceIssueId });
|
||||
}
|
||||
|
||||
function addAgentChainCandidates(
|
||||
candidates: IssueLivenessOwnerCandidate[],
|
||||
seen: Set<string>,
|
||||
startAgentId: string | null | undefined,
|
||||
agentsById: Map<string, IssueLivenessAgentInput>,
|
||||
companyId: string,
|
||||
reason: IssueLivenessOwnerCandidateReason,
|
||||
sourceIssueId: string,
|
||||
) {
|
||||
const chainSeen = new Set<string>();
|
||||
let current = startAgentId ? agentsById.get(startAgentId) : null;
|
||||
|
||||
while (current?.reportsTo) {
|
||||
if (chainSeen.has(current.reportsTo)) break;
|
||||
chainSeen.add(current.reportsTo);
|
||||
const manager = agentsById.get(current.reportsTo);
|
||||
if (!manager || manager.companyId !== companyId) break;
|
||||
addOwnerCandidate(candidates, seen, agentsById, companyId, manager.id, reason, sourceIssueId);
|
||||
current = manager;
|
||||
}
|
||||
}
|
||||
|
||||
function orderedInvokableAgents(agents: IssueLivenessAgentInput[], companyId: string) {
|
||||
return agents
|
||||
.filter((agent) => agent.companyId === companyId && isInvokableAgent(agent))
|
||||
.sort((left, right) => left.id.localeCompare(right.id));
|
||||
}
|
||||
|
||||
function ownerCandidatesForRecoveryIssue(
|
||||
issue: IssueLivenessIssueInput,
|
||||
agents: IssueLivenessAgentInput[],
|
||||
agentsById: Map<string, IssueLivenessAgentInput>,
|
||||
options: {
|
||||
includeStalledAssignee?: boolean;
|
||||
} = {},
|
||||
) {
|
||||
const candidates: IssueLivenessOwnerCandidate[] = [];
|
||||
const seen = new Set<string>();
|
||||
|
||||
if (options.includeStalledAssignee && issue.status !== "cancelled" && issue.status !== "done") {
|
||||
addOwnerCandidate(
|
||||
candidates,
|
||||
seen,
|
||||
agentsById,
|
||||
issue.companyId,
|
||||
issue.assigneeAgentId,
|
||||
"stalled_blocker_assignee",
|
||||
issue.id,
|
||||
);
|
||||
}
|
||||
|
||||
addAgentChainCandidates(
|
||||
candidates,
|
||||
seen,
|
||||
issue.assigneeAgentId,
|
||||
agentsById,
|
||||
issue.companyId,
|
||||
"assignee_reporting_chain",
|
||||
issue.id,
|
||||
);
|
||||
addAgentChainCandidates(
|
||||
candidates,
|
||||
seen,
|
||||
issue.createdByAgentId,
|
||||
agentsById,
|
||||
issue.companyId,
|
||||
"creator_reporting_chain",
|
||||
issue.id,
|
||||
);
|
||||
|
||||
const invokableAgents = orderedInvokableAgents(agents, issue.companyId);
|
||||
for (const agent of invokableAgents) {
|
||||
if (!agent.reportsTo) {
|
||||
addOwnerCandidate(candidates, seen, agentsById, issue.companyId, agent.id, "root_agent", issue.id);
|
||||
}
|
||||
}
|
||||
for (const agent of invokableAgents) {
|
||||
addOwnerCandidate(
|
||||
candidates,
|
||||
seen,
|
||||
agentsById,
|
||||
issue.companyId,
|
||||
agent.id,
|
||||
"ordered_invokable_fallback",
|
||||
issue.id,
|
||||
);
|
||||
}
|
||||
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function incidentKey(input: {
|
||||
companyId: string;
|
||||
issueId: string;
|
||||
state: IssueLivenessState;
|
||||
blockerIssueId?: string | null;
|
||||
participantAgentId?: string | null;
|
||||
}) {
|
||||
return buildIssueGraphLivenessIncidentKey(input);
|
||||
}
|
||||
|
||||
function finding(input: {
|
||||
issue: IssueLivenessIssueInput;
|
||||
state: IssueLivenessState;
|
||||
severity?: IssueLivenessSeverity;
|
||||
reason: string;
|
||||
dependencyPath: IssueLivenessIssueInput[];
|
||||
recoveryIssue: IssueLivenessIssueInput;
|
||||
recommendedOwnerCandidateAgentIds: string[];
|
||||
recommendedOwnerCandidates: IssueLivenessOwnerCandidate[];
|
||||
recommendedAction: string;
|
||||
blockerIssueId?: string | null;
|
||||
participantAgentId?: string | null;
|
||||
}): IssueLivenessFinding {
|
||||
return {
|
||||
issueId: input.issue.id,
|
||||
companyId: input.issue.companyId,
|
||||
identifier: input.issue.identifier,
|
||||
state: input.state,
|
||||
severity: input.severity ?? "critical",
|
||||
reason: input.reason,
|
||||
dependencyPath: input.dependencyPath.map(pathEntry),
|
||||
recoveryIssueId: input.recoveryIssue.id,
|
||||
recommendedOwnerAgentId: input.recommendedOwnerCandidateAgentIds[0] ?? null,
|
||||
recommendedOwnerCandidateAgentIds: input.recommendedOwnerCandidateAgentIds,
|
||||
recommendedOwnerCandidates: input.recommendedOwnerCandidates,
|
||||
recommendedAction: input.recommendedAction,
|
||||
incidentKey: incidentKey({
|
||||
companyId: input.issue.companyId,
|
||||
issueId: input.issue.id,
|
||||
state: input.state,
|
||||
blockerIssueId: input.blockerIssueId,
|
||||
participantAgentId: input.participantAgentId,
|
||||
}),
|
||||
};
|
||||
}
|
||||
|
||||
export function classifyIssueGraphLiveness(input: IssueGraphLivenessInput): IssueLivenessFinding[] {
|
||||
const issuesById = new Map(input.issues.map((issue) => [issue.id, issue]));
|
||||
const agentsById = new Map(input.agents.map((agent) => [agent.id, agent]));
|
||||
const blockersByBlockedIssueId = new Map<string, IssueLivenessRelationInput[]>();
|
||||
const findings: IssueLivenessFinding[] = [];
|
||||
const activeRuns = input.activeRuns ?? [];
|
||||
const queuedWakeRequests = input.queuedWakeRequests ?? [];
|
||||
|
||||
for (const relation of input.relations) {
|
||||
const list = blockersByBlockedIssueId.get(relation.blockedIssueId) ?? [];
|
||||
list.push(relation);
|
||||
blockersByBlockedIssueId.set(relation.blockedIssueId, list);
|
||||
}
|
||||
|
||||
for (const issue of input.issues) {
|
||||
if (issue.status === "blocked") {
|
||||
const relations = blockersByBlockedIssueId.get(issue.id) ?? [];
|
||||
for (const relation of relations) {
|
||||
if (relation.companyId !== issue.companyId) continue;
|
||||
const blocker = issuesById.get(relation.blockerIssueId);
|
||||
if (!blocker || blocker.companyId !== issue.companyId || blocker.status === "done") continue;
|
||||
const ownerCandidates = ownerCandidatesForRecoveryIssue(blocker, input.agents, agentsById, {
|
||||
includeStalledAssignee: true,
|
||||
});
|
||||
|
||||
if (blocker.status === "cancelled") {
|
||||
findings.push(finding({
|
||||
issue,
|
||||
state: "blocked_by_cancelled_issue",
|
||||
reason: `${issueLabel(issue)} is still blocked by cancelled issue ${issueLabel(blocker)}.`,
|
||||
dependencyPath: [issue, blocker],
|
||||
recoveryIssue: blocker,
|
||||
recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId),
|
||||
recommendedOwnerCandidates: ownerCandidates,
|
||||
recommendedAction:
|
||||
`Inspect ${issueLabel(blocker)} and either remove it from ${issueLabel(issue)}'s blockers or replace it with an actionable unblock issue.`,
|
||||
blockerIssueId: blocker.id,
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!blocker.assigneeAgentId && !blocker.assigneeUserId) {
|
||||
if (hasActiveExecutionPath(issue.companyId, blocker.id, activeRuns, queuedWakeRequests)) continue;
|
||||
findings.push(finding({
|
||||
issue,
|
||||
state: "blocked_by_unassigned_issue",
|
||||
reason: `${issueLabel(issue)} is blocked by unassigned issue ${issueLabel(blocker)} with no user owner.`,
|
||||
dependencyPath: [issue, blocker],
|
||||
recoveryIssue: blocker,
|
||||
recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId),
|
||||
recommendedOwnerCandidates: ownerCandidates,
|
||||
recommendedAction:
|
||||
`Assign ${issueLabel(blocker)} to an owner who can complete it, or remove it from ${issueLabel(issue)}'s blockers if it is no longer required.`,
|
||||
blockerIssueId: blocker.id,
|
||||
}));
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!blocker.assigneeAgentId) continue;
|
||||
if (hasActiveExecutionPath(issue.companyId, blocker.id, activeRuns, queuedWakeRequests)) continue;
|
||||
|
||||
const blockerAgent = agentsById.get(blocker.assigneeAgentId);
|
||||
if (!blockerAgent || blockerAgent.companyId !== issue.companyId || BLOCKING_AGENT_STATUSES.has(blockerAgent.status)) {
|
||||
findings.push(finding({
|
||||
issue,
|
||||
state: "blocked_by_uninvokable_assignee",
|
||||
reason: blockerAgent
|
||||
? `${issueLabel(issue)} is blocked by ${issueLabel(blocker)}, but its assignee is ${blockerAgent.status}.`
|
||||
: `${issueLabel(issue)} is blocked by ${issueLabel(blocker)}, but its assignee no longer exists.`,
|
||||
dependencyPath: [issue, blocker],
|
||||
recoveryIssue: blocker,
|
||||
recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId),
|
||||
recommendedOwnerCandidates: ownerCandidates,
|
||||
recommendedAction:
|
||||
`Review ${issueLabel(blocker)} and assign it to an active owner or replace the blocker with an actionable issue.`,
|
||||
blockerIssueId: blocker.id,
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (issue.status !== "in_review" || !issue.executionState) continue;
|
||||
const ownerCandidates = ownerCandidatesForRecoveryIssue(issue, input.agents, agentsById);
|
||||
const participant = issue.executionState.currentParticipant;
|
||||
const participantAgentId = readPrincipalAgentId(participant);
|
||||
if (participantAgentId) {
|
||||
const participantAgent = agentsById.get(participantAgentId);
|
||||
if (!isInvokableAgent(participantAgent) || participantAgent?.companyId !== issue.companyId) {
|
||||
findings.push(finding({
|
||||
issue,
|
||||
state: "invalid_review_participant",
|
||||
reason: participantAgent
|
||||
? `${issueLabel(issue)} is in review, but current participant agent is ${participantAgent.status}.`
|
||||
: `${issueLabel(issue)} is in review, but current participant agent cannot be resolved.`,
|
||||
dependencyPath: [issue],
|
||||
recoveryIssue: issue,
|
||||
recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId),
|
||||
recommendedOwnerCandidates: ownerCandidates,
|
||||
recommendedAction:
|
||||
`Repair ${issueLabel(issue)}'s review participant or return the issue to an active assignee with a clear change request.`,
|
||||
participantAgentId,
|
||||
}));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!principalIsResolvableUser(participant)) {
|
||||
findings.push(finding({
|
||||
issue,
|
||||
state: "invalid_review_participant",
|
||||
reason: `${issueLabel(issue)} is in review, but its current participant cannot be resolved.`,
|
||||
dependencyPath: [issue],
|
||||
recoveryIssue: issue,
|
||||
recommendedOwnerCandidateAgentIds: ownerCandidates.map((candidate) => candidate.agentId),
|
||||
recommendedOwnerCandidates: ownerCandidates,
|
||||
recommendedAction:
|
||||
`Repair ${issueLabel(issue)}'s review participant or return the issue to an active assignee with a clear change request.`,
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
return findings;
|
||||
}
|
||||
56
server/src/services/recovery/origins.ts
Normal file
56
server/src/services/recovery/origins.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
export const RECOVERY_ORIGIN_KINDS = {
|
||||
issueGraphLivenessEscalation: "harness_liveness_escalation",
|
||||
strandedIssueRecovery: "stranded_issue_recovery",
|
||||
staleActiveRunEvaluation: "stale_active_run_evaluation",
|
||||
} as const;
|
||||
|
||||
export const RECOVERY_REASON_KINDS = {
|
||||
runLivenessContinuation: "run_liveness_continuation",
|
||||
} as const;
|
||||
|
||||
export const RECOVERY_KEY_PREFIXES = {
|
||||
issueGraphLivenessIncident: "harness_liveness",
|
||||
issueGraphLivenessLeaf: "harness_liveness_leaf",
|
||||
} as const;
|
||||
|
||||
export type RecoveryOriginKind = typeof RECOVERY_ORIGIN_KINDS[keyof typeof RECOVERY_ORIGIN_KINDS];
|
||||
export type RecoveryReasonKind = typeof RECOVERY_REASON_KINDS[keyof typeof RECOVERY_REASON_KINDS];
|
||||
export type RecoveryKeyPrefix = typeof RECOVERY_KEY_PREFIXES[keyof typeof RECOVERY_KEY_PREFIXES];
|
||||
|
||||
export function buildIssueGraphLivenessIncidentKey(input: {
|
||||
companyId: string;
|
||||
issueId: string;
|
||||
state: string;
|
||||
blockerIssueId?: string | null;
|
||||
participantAgentId?: string | null;
|
||||
}) {
|
||||
return [
|
||||
RECOVERY_KEY_PREFIXES.issueGraphLivenessIncident,
|
||||
input.companyId,
|
||||
input.issueId,
|
||||
input.state,
|
||||
input.blockerIssueId ?? input.participantAgentId ?? "none",
|
||||
].join(":");
|
||||
}
|
||||
|
||||
export function parseIssueGraphLivenessIncidentKey(incidentKey: string | null | undefined) {
|
||||
if (!incidentKey) return null;
|
||||
const parts = incidentKey.split(":");
|
||||
if (parts.length !== 5 || parts[0] !== RECOVERY_KEY_PREFIXES.issueGraphLivenessIncident) return null;
|
||||
const [, companyId, issueId, state, leafIssueId] = parts;
|
||||
if (!companyId || !issueId || !state || !leafIssueId) return null;
|
||||
return { companyId, issueId, state, leafIssueId };
|
||||
}
|
||||
|
||||
export function buildIssueGraphLivenessLeafKey(input: {
|
||||
companyId: string;
|
||||
state: string;
|
||||
leafIssueId: string;
|
||||
}) {
|
||||
return [
|
||||
RECOVERY_KEY_PREFIXES.issueGraphLivenessLeaf,
|
||||
input.companyId,
|
||||
input.state,
|
||||
input.leafIssueId,
|
||||
].join(":");
|
||||
}
|
||||
14
server/src/services/recovery/pause-hold-guard.ts
Normal file
14
server/src/services/recovery/pause-hold-guard.ts
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
import type { Db } from "@paperclipai/db";
|
||||
import { issueTreeControlService } from "../issue-tree-control.js";
|
||||
|
||||
type IssueTreeControlService = ReturnType<typeof issueTreeControlService>;
|
||||
|
||||
export async function isAutomaticRecoverySuppressedByPauseHold(
|
||||
db: Db,
|
||||
companyId: string,
|
||||
issueId: string,
|
||||
treeControlSvc: IssueTreeControlService = issueTreeControlService(db),
|
||||
) {
|
||||
const activePauseHold = await treeControlSvc.getActivePauseHoldGate(companyId, issueId);
|
||||
return Boolean(activePauseHold);
|
||||
}
|
||||
189
server/src/services/recovery/run-liveness-continuations.ts
Normal file
189
server/src/services/recovery/run-liveness-continuations.ts
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
import { and, eq, inArray } from "drizzle-orm";
|
||||
import type { Db } from "@paperclipai/db";
|
||||
import { agentWakeupRequests, agents, heartbeatRuns, issues } from "@paperclipai/db";
|
||||
import type { RunLivenessState } from "@paperclipai/shared";
|
||||
import { RECOVERY_REASON_KINDS } from "./origins.js";
|
||||
|
||||
export const RUN_LIVENESS_CONTINUATION_REASON = RECOVERY_REASON_KINDS.runLivenessContinuation;
|
||||
export const DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS = 2;
|
||||
|
||||
const ACTIONABLE_LIVENESS_STATES = new Set<RunLivenessState>(["plan_only", "empty_response"]);
|
||||
const CONTINUATION_ACTIVE_ISSUE_STATUSES = new Set(["todo", "in_progress"]);
|
||||
// A prior adapter error should not permanently suppress bounded liveness
|
||||
// continuations; the max-attempt/idempotency guards prevent unbounded retries.
|
||||
const CONTINUATION_AGENT_STATUSES = new Set(["active", "idle", "running", "error"]);
|
||||
const IDEMPOTENT_WAKE_STATUSES = ["queued", "deferred_issue_execution", "completed"];
|
||||
|
||||
type HeartbeatRunRow = typeof heartbeatRuns.$inferSelect;
|
||||
type IssueRow = Pick<
|
||||
typeof issues.$inferSelect,
|
||||
"id" | "companyId" | "identifier" | "title" | "status" | "assigneeAgentId" | "executionState" | "projectId"
|
||||
>;
|
||||
type AgentRow = Pick<typeof agents.$inferSelect, "id" | "companyId" | "status">;
|
||||
|
||||
export type RunContinuationDecision =
|
||||
| {
|
||||
kind: "enqueue";
|
||||
nextAttempt: number;
|
||||
idempotencyKey: string;
|
||||
payload: Record<string, unknown>;
|
||||
contextSnapshot: Record<string, unknown>;
|
||||
}
|
||||
| {
|
||||
kind: "exhausted";
|
||||
attempt: number;
|
||||
maxAttempts: number;
|
||||
comment: string;
|
||||
}
|
||||
| {
|
||||
kind: "skip";
|
||||
reason: string;
|
||||
};
|
||||
|
||||
export function readContinuationAttempt(value: unknown): number {
|
||||
const numeric = typeof value === "number" ? value : Number.parseInt(String(value ?? ""), 10);
|
||||
return Number.isFinite(numeric) && numeric > 0 ? Math.floor(numeric) : 0;
|
||||
}
|
||||
|
||||
export function buildRunLivenessContinuationIdempotencyKey(input: {
|
||||
issueId: string;
|
||||
sourceRunId: string;
|
||||
livenessState: RunLivenessState;
|
||||
nextAttempt: number;
|
||||
}) {
|
||||
return [
|
||||
RUN_LIVENESS_CONTINUATION_REASON,
|
||||
input.issueId,
|
||||
input.sourceRunId,
|
||||
input.livenessState,
|
||||
String(input.nextAttempt),
|
||||
].join(":");
|
||||
}
|
||||
|
||||
export async function findExistingRunLivenessContinuationWake(
|
||||
db: Db,
|
||||
input: {
|
||||
companyId: string;
|
||||
idempotencyKey: string;
|
||||
},
|
||||
) {
|
||||
return db
|
||||
.select({ id: agentWakeupRequests.id, status: agentWakeupRequests.status })
|
||||
.from(agentWakeupRequests)
|
||||
.where(
|
||||
and(
|
||||
eq(agentWakeupRequests.companyId, input.companyId),
|
||||
eq(agentWakeupRequests.idempotencyKey, input.idempotencyKey),
|
||||
inArray(agentWakeupRequests.status, IDEMPOTENT_WAKE_STATUSES),
|
||||
),
|
||||
)
|
||||
.limit(1)
|
||||
.then((rows) => rows[0] ?? null);
|
||||
}
|
||||
|
||||
export function decideRunLivenessContinuation(input: {
|
||||
run: HeartbeatRunRow;
|
||||
issue: IssueRow | null;
|
||||
agent: AgentRow | null;
|
||||
livenessState: RunLivenessState | null;
|
||||
livenessReason: string | null;
|
||||
nextAction: string | null;
|
||||
budgetBlocked: boolean;
|
||||
idempotentWakeExists: boolean;
|
||||
maxAttempts?: number;
|
||||
}): RunContinuationDecision {
|
||||
const {
|
||||
run,
|
||||
issue,
|
||||
agent,
|
||||
livenessState,
|
||||
livenessReason,
|
||||
nextAction,
|
||||
budgetBlocked,
|
||||
idempotentWakeExists,
|
||||
} = input;
|
||||
const maxAttempts = input.maxAttempts ?? DEFAULT_MAX_LIVENESS_CONTINUATION_ATTEMPTS;
|
||||
|
||||
if (!livenessState || !ACTIONABLE_LIVENESS_STATES.has(livenessState)) {
|
||||
return { kind: "skip", reason: "liveness state is not actionable for continuation" };
|
||||
}
|
||||
if (!issue) return { kind: "skip", reason: "issue not found" };
|
||||
if (!agent) return { kind: "skip", reason: "agent not found" };
|
||||
if (issue.companyId !== run.companyId || agent.companyId !== run.companyId) {
|
||||
return { kind: "skip", reason: "company scope mismatch" };
|
||||
}
|
||||
if (issue.assigneeAgentId !== run.agentId) {
|
||||
return { kind: "skip", reason: "issue is no longer assigned to the source run agent" };
|
||||
}
|
||||
if (!CONTINUATION_ACTIVE_ISSUE_STATUSES.has(issue.status)) {
|
||||
return { kind: "skip", reason: `issue status ${issue.status} is not continuable` };
|
||||
}
|
||||
if (issue.executionState) {
|
||||
return { kind: "skip", reason: "issue is blocked by execution policy state" };
|
||||
}
|
||||
if (!CONTINUATION_AGENT_STATUSES.has(agent.status)) {
|
||||
return { kind: "skip", reason: `agent status ${agent.status} is not invokable` };
|
||||
}
|
||||
if (budgetBlocked) {
|
||||
return { kind: "skip", reason: "budget hard stop blocks continuation" };
|
||||
}
|
||||
|
||||
const currentAttempt = readContinuationAttempt(run.continuationAttempt);
|
||||
if (currentAttempt >= maxAttempts) {
|
||||
return {
|
||||
kind: "exhausted",
|
||||
attempt: currentAttempt,
|
||||
maxAttempts,
|
||||
comment: [
|
||||
"Bounded liveness continuation exhausted",
|
||||
"",
|
||||
`- Last liveness state: \`${livenessState}\``,
|
||||
`- Attempts used: ${currentAttempt}/${maxAttempts}`,
|
||||
`- Reason: ${livenessReason ?? "Run ended without concrete progress"}`,
|
||||
"- Next action: a human or manager should inspect the run and either clarify the task, mark it blocked, or assign a concrete follow-up.",
|
||||
].join("\n"),
|
||||
};
|
||||
}
|
||||
|
||||
const nextAttempt = currentAttempt + 1;
|
||||
const idempotencyKey = buildRunLivenessContinuationIdempotencyKey({
|
||||
issueId: issue.id,
|
||||
sourceRunId: run.id,
|
||||
livenessState,
|
||||
nextAttempt,
|
||||
});
|
||||
if (idempotentWakeExists) {
|
||||
return { kind: "skip", reason: "continuation wake already exists for this source run and attempt" };
|
||||
}
|
||||
|
||||
const payload = {
|
||||
issueId: issue.id,
|
||||
sourceRunId: run.id,
|
||||
livenessState,
|
||||
livenessReason,
|
||||
continuationAttempt: nextAttempt,
|
||||
maxContinuationAttempts: maxAttempts,
|
||||
instruction:
|
||||
nextAction ??
|
||||
"The previous run ended without concrete progress. Take the first concrete action now or mark the issue blocked with a specific unblock request.",
|
||||
};
|
||||
|
||||
return {
|
||||
kind: "enqueue",
|
||||
nextAttempt,
|
||||
idempotencyKey,
|
||||
payload,
|
||||
contextSnapshot: {
|
||||
issueId: issue.id,
|
||||
taskId: issue.id,
|
||||
taskKey: issue.id,
|
||||
wakeReason: RUN_LIVENESS_CONTINUATION_REASON,
|
||||
livenessContinuationAttempt: nextAttempt,
|
||||
livenessContinuationMaxAttempts: maxAttempts,
|
||||
livenessContinuationSourceRunId: run.id,
|
||||
livenessContinuationState: livenessState,
|
||||
livenessContinuationReason: livenessReason,
|
||||
livenessContinuationInstruction: payload.instruction,
|
||||
},
|
||||
};
|
||||
}
|
||||
2143
server/src/services/recovery/service.ts
Normal file
2143
server/src/services/recovery/service.ts
Normal file
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue