mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-16 10:50:38 +09:00
[codex] Roll up May 17 branch changes (#6210)
## Thinking Path > - Paperclip is the control plane for autonomous AI companies, so agent work needs visible ownership, recovery, and operator controls. > - This local branch had accumulated several related control-plane reliability and operator-experience fixes across recovery actions, watchdog folding, model-profile defaults, mentions, markdown editing, plugin launchers, and small UI polish. > - The branch needed to be converted into a PR against the current `origin/master` without losing dirty work or including lockfile/workflow churn. > - The safest standalone shape is a single rollup PR because the recovery/server/UI files overlap heavily across the local commits and splitting would create avoidable conflicts. > - This pull request replays the local branch onto latest `origin/master`, preserves the uncommitted work as logical commits, and adds a Zod 4 validator compatibility fix found during verification. > - The benefit is that the May 17 local branch can be reviewed and merged as one coherent, conflict-free branch under the 100-file Greptile limit. ## What Changed - Rebased the local May 17 branch work onto current `origin/master` in a dedicated worktree. - Preserved and committed previously dirty changes for recovery retry handling, plugin/sidebar launcher polish, and `.herenow` ignores. - Added recovery-action behavior for returning source issues to `todo` when retrying source-scoped recovery. - Included the existing local recovery/liveness/watchdog fold, Codex cheap-profile, markdown/mention, duplicate-agent, and UI polish commits from the branch. - Normalized shared validator `z.record(...)` schemas to explicit string-key records for Zod 4 compatibility. - Confirmed the PR has no `pnpm-lock.yaml` or `.github/workflows/*` changes and stays below the 100-file Greptile limit. ## Verification - `pnpm install --frozen-lockfile --ignore-scripts` - `npm run install` in `node_modules/.pnpm/sqlite3@5.1.7/node_modules/sqlite3` to build the local native sqlite3 binding after installing with scripts disabled - `pnpm exec vitest run packages/shared/src/validators/issue.test.ts packages/shared/src/project-mentions.test.ts packages/adapter-utils/src/server-utils.test.ts server/src/__tests__/heartbeat-model-profile.test.ts server/src/__tests__/issue-recovery-actions.test.ts server/src/__tests__/issue-agent-mutation-ownership-routes.test.ts server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts server/src/__tests__/plugin-local-folders.test.ts ui/src/components/IssueRecoveryActionCard.test.tsx ui/src/components/Sidebar.test.tsx ui/src/components/SidebarAccountMenu.test.tsx ui/src/components/IssueProperties.test.tsx ui/src/components/MarkdownEditor.test.tsx ui/src/components/MarkdownBody.test.tsx ui/src/lib/duplicate-agent-payload.test.ts ui/src/pages/Routines.test.tsx` - First pass: 13 files passed with 201 passing tests; 3 server files failed before sqlite3 native binding was built. - After rebuilding sqlite3: `server/src/__tests__/heartbeat-model-profile.test.ts`, `server/src/__tests__/issue-recovery-actions.test.ts`, and `server/src/__tests__/heartbeat-active-run-output-watchdog.test.ts` passed/loaded; embedded Postgres tests were skipped by the local host guard. - `pnpm --filter @paperclipai/shared typecheck` - `pnpm --filter @paperclipai/adapter-utils typecheck` - `pnpm --filter @paperclipai/server typecheck` - `pnpm --filter @paperclipai/ui typecheck` ## Risks - Medium risk: this is a broad rollup PR across recovery semantics, server tests, shared validators, and UI surfaces. - Some embedded Postgres tests skipped locally due the host guard, so CI should provide the stronger database-backed signal. - UI changes were covered by component tests, but no browser screenshot was captured in this PR creation pass. - This branch may overlap with existing recovery/liveness PR work; merge this PR independently or restack/close overlapping branches rather than merging duplicate implementations together. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5-based coding agent, tool-enabled local repository and GitHub workflow, medium reasoning effort. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
parent
705c1b8d81
commit
d734bd43d1
83 changed files with 3675 additions and 180 deletions
|
|
@ -1000,7 +1000,7 @@ function redactInlineBase64ImageData(chunk: string) {
|
|||
}
|
||||
|
||||
export function compactRunLogChunk(chunk: string, maxChars = MAX_PERSISTED_LOG_CHUNK_CHARS) {
|
||||
const normalized = redactInlineBase64ImageData(chunk);
|
||||
const normalized = redactSensitiveText(redactInlineBase64ImageData(chunk));
|
||||
if (normalized.length <= maxChars) return normalized;
|
||||
|
||||
const headChars = Math.max(0, Math.floor(maxChars * 0.6));
|
||||
|
|
|
|||
|
|
@ -73,7 +73,10 @@ import {
|
|||
issueTreeControlService,
|
||||
type ActiveIssueTreePauseHoldGate,
|
||||
} from "./issue-tree-control.js";
|
||||
import { parseIssueGraphLivenessIncidentKey } from "./recovery/origins.js";
|
||||
import {
|
||||
parseIssueGraphLivenessIncidentKey,
|
||||
RECOVERY_ORIGIN_KINDS,
|
||||
} from "./recovery/origins.js";
|
||||
import { classifyIssueGraphLiveness, type IssueLivenessFinding } from "./recovery/issue-graph-liveness.js";
|
||||
|
||||
const ALL_ISSUE_STATUSES = ["backlog", "todo", "in_progress", "in_review", "blocked", "done", "cancelled"];
|
||||
|
|
@ -4515,6 +4518,25 @@ export function issueService(db: Db) {
|
|||
}
|
||||
}
|
||||
const [enriched] = await withIssueLabels(tx, [updated]);
|
||||
if (
|
||||
(issueData.status === "done" || issueData.status === "cancelled") &&
|
||||
existing.status !== issueData.status &&
|
||||
existing.originKind === RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation
|
||||
) {
|
||||
const parsedIncident = parseIssueGraphLivenessIncidentKey(existing.originId);
|
||||
if (parsedIncident?.issueId && parsedIncident.companyId === existing.companyId) {
|
||||
await tx
|
||||
.delete(issueRelations)
|
||||
.where(
|
||||
and(
|
||||
eq(issueRelations.companyId, existing.companyId),
|
||||
eq(issueRelations.issueId, existing.id),
|
||||
eq(issueRelations.relatedIssueId, parsedIncident.issueId),
|
||||
eq(issueRelations.type, "blocks"),
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
return enriched;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -486,8 +486,12 @@ export async function writePluginLocalFolderTextAtomic(
|
|||
contents: string,
|
||||
) {
|
||||
const rootRealPath = await fs.realpath(rootPath);
|
||||
const resolved = await resolvePluginLocalFolderPath(rootPath, relativePath);
|
||||
await fs.mkdir(path.dirname(resolved.absolutePath), { recursive: true });
|
||||
const normalized = normalizeRelativePath(relativePath);
|
||||
const parentRelativePath = path.dirname(normalized);
|
||||
if (parentRelativePath !== ".") {
|
||||
await ensureDirectoryInsideRoot(rootRealPath, parentRelativePath);
|
||||
}
|
||||
const resolved = await resolvePluginLocalFolderPath(rootRealPath, normalized);
|
||||
await assertPathInsideRoot(rootRealPath, path.dirname(resolved.absolutePath));
|
||||
const tempPath = path.join(
|
||||
path.dirname(resolved.absolutePath),
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { and, asc, desc, eq, gt, inArray, isNull, notInArray, sql } from "drizzle-orm";
|
||||
import { and, asc, desc, eq, gt, gte, inArray, isNull, notInArray, sql } from "drizzle-orm";
|
||||
import type { Db } from "@paperclipai/db";
|
||||
import {
|
||||
DEFAULT_ISSUE_GRAPH_LIVENESS_AUTO_RECOVERY_LOOKBACK_HOURS,
|
||||
|
|
@ -11,11 +11,12 @@ import {
|
|||
agents,
|
||||
agentWakeupRequests,
|
||||
approvals,
|
||||
activityLog,
|
||||
companies,
|
||||
issueComments,
|
||||
heartbeatRunEvents,
|
||||
heartbeatRunWatchdogDecisions,
|
||||
heartbeatRuns,
|
||||
issueComments,
|
||||
issueApprovals,
|
||||
issueRecoveryActions,
|
||||
issueRelations,
|
||||
|
|
@ -26,6 +27,7 @@ import { parseObject, asBoolean, asNumber } from "../../adapters/utils.js";
|
|||
import { runningProcesses } from "../../adapters/index.js";
|
||||
import { forbidden, notFound } from "../../errors.js";
|
||||
import { logger } from "../../middleware/logger.js";
|
||||
import { isPidAlive, isProcessGroupAlive, terminateLocalService } from "../local-service-supervisor.js";
|
||||
import { redactCurrentUserText } from "../../log-redaction.js";
|
||||
import { redactSensitiveText } from "../../redaction.js";
|
||||
import { logActivity } from "../activity-log.js";
|
||||
|
|
@ -68,6 +70,15 @@ const ACTIVE_RUN_OUTPUT_EVIDENCE_TAIL_BYTES = 8 * 1024;
|
|||
const STRANDED_ISSUE_RECOVERY_ORIGIN_KIND = RECOVERY_ORIGIN_KINDS.strandedIssueRecovery;
|
||||
const STALE_ACTIVE_RUN_EVALUATION_ORIGIN_KIND = RECOVERY_ORIGIN_KINDS.staleActiveRunEvaluation;
|
||||
const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
|
||||
const SESSIONED_LOCAL_ADAPTERS = new Set([
|
||||
"claude_local",
|
||||
"codex_local",
|
||||
"cursor",
|
||||
"gemini_local",
|
||||
"hermes_local",
|
||||
"opencode_local",
|
||||
"pi_local",
|
||||
]);
|
||||
|
||||
type RecoveryWakeupOptions = {
|
||||
source?: "timer" | "assignment" | "on_demand" | "automation";
|
||||
|
|
@ -673,6 +684,16 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
return `stale_active_run:${companyId}:${runId}`;
|
||||
}
|
||||
|
||||
function isTerminalIssueStatus(status: string | null | undefined) {
|
||||
return status === "done" || status === "cancelled";
|
||||
}
|
||||
|
||||
function isRecoveryOriginIssue(issue: typeof issues.$inferSelect) {
|
||||
return Object.values(RECOVERY_ORIGIN_KINDS).includes(
|
||||
issue.originKind as typeof RECOVERY_ORIGIN_KINDS[keyof typeof RECOVERY_ORIGIN_KINDS],
|
||||
);
|
||||
}
|
||||
|
||||
function silenceStartedAtForRun(run: Pick<typeof heartbeatRuns.$inferSelect, "lastOutputAt" | "processStartedAt" | "startedAt" | "createdAt">) {
|
||||
return run.lastOutputAt ?? run.processStartedAt ?? run.startedAt ?? run.createdAt ?? null;
|
||||
}
|
||||
|
|
@ -798,6 +819,309 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
return issue ?? null;
|
||||
}
|
||||
|
||||
async function latestSameRunSourceTerminalEvidence(input: {
|
||||
run: typeof heartbeatRuns.$inferSelect;
|
||||
sourceIssue: typeof issues.$inferSelect;
|
||||
evidenceAfter: Date | null;
|
||||
}) {
|
||||
if (!isTerminalIssueStatus(input.sourceIssue.status)) return null;
|
||||
const after = input.evidenceAfter ?? input.run.startedAt ?? input.run.createdAt ?? null;
|
||||
const activityPredicates = [
|
||||
eq(activityLog.companyId, input.run.companyId),
|
||||
eq(activityLog.runId, input.run.id),
|
||||
eq(activityLog.action, "issue.updated"),
|
||||
eq(activityLog.entityType, "issue"),
|
||||
eq(activityLog.entityId, input.sourceIssue.id),
|
||||
sql`${activityLog.details} ->> 'status' = ${input.sourceIssue.status}`,
|
||||
];
|
||||
if (after) {
|
||||
activityPredicates.push(gte(activityLog.createdAt, after));
|
||||
}
|
||||
|
||||
const activity = await db
|
||||
.select({
|
||||
id: activityLog.id,
|
||||
createdAt: activityLog.createdAt,
|
||||
action: activityLog.action,
|
||||
})
|
||||
.from(activityLog)
|
||||
.where(and(...activityPredicates))
|
||||
.orderBy(desc(activityLog.createdAt))
|
||||
.limit(1)
|
||||
.then((rows) => rows[0] ?? null);
|
||||
|
||||
if (activity) {
|
||||
return {
|
||||
kind: "activity" as const,
|
||||
id: activity.id,
|
||||
createdAt: activity.createdAt,
|
||||
action: activity.action,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
async function nextRunEventSeq(runId: string) {
|
||||
const [row] = await db
|
||||
.select({ maxSeq: sql<number | null>`max(${heartbeatRunEvents.seq})` })
|
||||
.from(heartbeatRunEvents)
|
||||
.where(eq(heartbeatRunEvents.runId, runId));
|
||||
return Number(row?.maxSeq ?? 0) + 1;
|
||||
}
|
||||
|
||||
async function appendRecoveryRunEvent(
|
||||
run: typeof heartbeatRuns.$inferSelect,
|
||||
event: {
|
||||
level: "info" | "warn" | "error";
|
||||
message: string;
|
||||
payload?: Record<string, unknown>;
|
||||
},
|
||||
) {
|
||||
await db.insert(heartbeatRunEvents).values({
|
||||
companyId: run.companyId,
|
||||
runId: run.id,
|
||||
agentId: run.agentId,
|
||||
seq: await nextRunEventSeq(run.id),
|
||||
eventType: "lifecycle",
|
||||
stream: "system",
|
||||
level: event.level,
|
||||
message: event.message,
|
||||
payload: event.payload ?? null,
|
||||
});
|
||||
}
|
||||
|
||||
async function cleanupSourceResolvedRunProcess(input: {
|
||||
run: typeof heartbeatRuns.$inferSelect;
|
||||
runningAgent: typeof agents.$inferSelect;
|
||||
}) {
|
||||
if (!SESSIONED_LOCAL_ADAPTERS.has(input.runningAgent.adapterType)) {
|
||||
return {
|
||||
attempted: false,
|
||||
outcome: "skipped_non_local_adapter",
|
||||
adapterType: input.runningAgent.adapterType,
|
||||
};
|
||||
}
|
||||
|
||||
const running = runningProcesses.get(input.run.id);
|
||||
const pid = running?.child.pid ?? input.run.processPid ?? null;
|
||||
const processGroupId = running?.processGroupId ?? input.run.processGroupId ?? null;
|
||||
if (typeof pid !== "number" && typeof processGroupId !== "number") {
|
||||
return {
|
||||
attempted: false,
|
||||
outcome: "no_process_metadata",
|
||||
adapterType: input.runningAgent.adapterType,
|
||||
};
|
||||
}
|
||||
|
||||
const wasAlive =
|
||||
(typeof pid === "number" && isPidAlive(pid)) ||
|
||||
(typeof processGroupId === "number" && isProcessGroupAlive(processGroupId));
|
||||
if (!wasAlive) {
|
||||
runningProcesses.delete(input.run.id);
|
||||
return {
|
||||
attempted: false,
|
||||
outcome: "not_running",
|
||||
adapterType: input.runningAgent.adapterType,
|
||||
pid,
|
||||
processGroupId,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await terminateLocalService(
|
||||
{
|
||||
pid: typeof pid === "number" && Number.isInteger(pid) && pid > 0
|
||||
? pid
|
||||
: (processGroupId ?? 0),
|
||||
processGroupId: typeof processGroupId === "number" && Number.isInteger(processGroupId) && processGroupId > 0
|
||||
? processGroupId
|
||||
: null,
|
||||
},
|
||||
running ? { forceAfterMs: Math.max(1, running.graceSec) * 1000 } : undefined,
|
||||
);
|
||||
runningProcesses.delete(input.run.id);
|
||||
const stillAlive =
|
||||
(typeof pid === "number" && isPidAlive(pid)) ||
|
||||
(typeof processGroupId === "number" && isProcessGroupAlive(processGroupId));
|
||||
return {
|
||||
attempted: true,
|
||||
outcome: stillAlive ? "termination_sent_still_running" : "terminated",
|
||||
adapterType: input.runningAgent.adapterType,
|
||||
pid,
|
||||
processGroupId,
|
||||
};
|
||||
} catch (error) {
|
||||
return {
|
||||
attempted: true,
|
||||
outcome: "failed",
|
||||
adapterType: input.runningAgent.adapterType,
|
||||
pid,
|
||||
processGroupId,
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
async function finalizeAgentAfterSourceResolvedRun(run: typeof heartbeatRuns.$inferSelect, status: "succeeded" | "cancelled") {
|
||||
const [runningCountRow] = await db
|
||||
.select({ count: sql<number>`count(*)::int` })
|
||||
.from(heartbeatRuns)
|
||||
.where(and(eq(heartbeatRuns.agentId, run.agentId), eq(heartbeatRuns.status, "running")));
|
||||
const runningCount = Number(runningCountRow?.count ?? 0);
|
||||
const nextStatus = runningCount > 0 ? "running" : status === "succeeded" || status === "cancelled" ? "idle" : "error";
|
||||
await db
|
||||
.update(agents)
|
||||
.set({
|
||||
status: nextStatus,
|
||||
lastHeartbeatAt: new Date(),
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(and(eq(agents.id, run.agentId), notInArray(agents.status, ["paused", "terminated"])));
|
||||
}
|
||||
|
||||
async function foldSourceResolvedStaleRun(input: {
|
||||
run: typeof heartbeatRuns.$inferSelect;
|
||||
runningAgent: typeof agents.$inferSelect;
|
||||
sourceIssue: typeof issues.$inferSelect;
|
||||
evidence: Awaited<ReturnType<typeof latestSameRunSourceTerminalEvidence>>;
|
||||
existingEvaluation: Awaited<ReturnType<typeof findOpenStaleRunEvaluation>>;
|
||||
silenceStartedAt: Date | null;
|
||||
silenceAgeMs: number | null;
|
||||
now: Date;
|
||||
}) {
|
||||
if (!input.evidence) return { kind: "skipped" as const };
|
||||
const cleanup = await cleanupSourceResolvedRunProcess({ run: input.run, runningAgent: input.runningAgent });
|
||||
const finalRunStatus = input.sourceIssue.status === "cancelled" ? "cancelled" : "succeeded";
|
||||
const resultJson = {
|
||||
...parseObject(input.run.resultJson),
|
||||
sourceResolvedWatchdogFold: {
|
||||
sourceIssueId: input.sourceIssue.id,
|
||||
sourceIssueIdentifier: input.sourceIssue.identifier,
|
||||
sourceIssueStatus: input.sourceIssue.status,
|
||||
sameRunEvidenceKind: input.evidence.kind,
|
||||
sameRunEvidenceId: input.evidence.id,
|
||||
sameRunEvidenceAt: input.evidence.createdAt.toISOString(),
|
||||
silenceStartedAt: input.silenceStartedAt?.toISOString() ?? null,
|
||||
silenceAgeMs: input.silenceAgeMs,
|
||||
evaluationIssueId: input.existingEvaluation?.id ?? null,
|
||||
evaluationIssueIdentifier: input.existingEvaluation?.identifier ?? null,
|
||||
cleanup,
|
||||
},
|
||||
};
|
||||
const finalizedRun = await db.transaction(async (tx) => {
|
||||
const [updatedRun] = await tx
|
||||
.update(heartbeatRuns)
|
||||
.set({
|
||||
status: finalRunStatus,
|
||||
finishedAt: input.now,
|
||||
error: null,
|
||||
errorCode: null,
|
||||
resultJson,
|
||||
updatedAt: input.now,
|
||||
})
|
||||
.where(and(eq(heartbeatRuns.id, input.run.id), eq(heartbeatRuns.companyId, input.run.companyId), eq(heartbeatRuns.status, "running")))
|
||||
.returning();
|
||||
if (!updatedRun) return null;
|
||||
|
||||
if (input.run.wakeupRequestId) {
|
||||
await tx
|
||||
.update(agentWakeupRequests)
|
||||
.set({
|
||||
status: finalRunStatus === "succeeded" ? "completed" : "cancelled",
|
||||
finishedAt: input.now,
|
||||
error: null,
|
||||
updatedAt: input.now,
|
||||
})
|
||||
.where(and(eq(agentWakeupRequests.id, input.run.wakeupRequestId), eq(agentWakeupRequests.companyId, input.run.companyId)));
|
||||
}
|
||||
|
||||
await tx
|
||||
.update(issues)
|
||||
.set({
|
||||
executionRunId: null,
|
||||
executionAgentNameKey: null,
|
||||
executionLockedAt: null,
|
||||
updatedAt: input.now,
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(issues.id, input.sourceIssue.id),
|
||||
eq(issues.companyId, input.run.companyId),
|
||||
eq(issues.executionRunId, input.run.id),
|
||||
),
|
||||
);
|
||||
|
||||
return updatedRun;
|
||||
});
|
||||
if (!finalizedRun) return { kind: "skipped" as const };
|
||||
|
||||
if (input.existingEvaluation && !isTerminalIssueStatus(input.existingEvaluation.status)) {
|
||||
await issuesSvc.update(input.existingEvaluation.id, { status: "done" });
|
||||
await issuesSvc.addComment(input.existingEvaluation.id, [
|
||||
"Source-resolved watchdog fold.",
|
||||
"",
|
||||
`- Source issue: ${input.sourceIssue.identifier ?? input.sourceIssue.id}`,
|
||||
`- Run: \`${input.run.id}\``,
|
||||
`- Same-run evidence: \`${input.evidence.kind}:${input.evidence.id}\` at ${input.evidence.createdAt.toISOString()}`,
|
||||
"- Outcome: false positive; the source issue already reached a terminal disposition from this run.",
|
||||
].join("\n"), { runId: input.run.id });
|
||||
}
|
||||
|
||||
const activeRecoveryAction = await recoveryActionsSvc.getActiveForIssue(input.run.companyId, input.sourceIssue.id);
|
||||
if (activeRecoveryAction?.kind === "active_run_watchdog") {
|
||||
await recoveryActionsSvc.resolveActiveForIssue({
|
||||
companyId: input.run.companyId,
|
||||
sourceIssueId: input.sourceIssue.id,
|
||||
actionId: activeRecoveryAction.id,
|
||||
status: "resolved",
|
||||
outcome: "false_positive",
|
||||
resolutionNote: "Source issue reached a terminal disposition through durable same-run activity; watchdog folded as source-resolved.",
|
||||
});
|
||||
}
|
||||
|
||||
const [decision] = await db
|
||||
.insert(heartbeatRunWatchdogDecisions)
|
||||
.values({
|
||||
companyId: input.run.companyId,
|
||||
runId: input.run.id,
|
||||
evaluationIssueId: input.existingEvaluation?.id ?? null,
|
||||
decision: "dismissed_false_positive",
|
||||
reason: "Source issue already reached a terminal disposition through durable same-run activity.",
|
||||
createdByRunId: input.run.id,
|
||||
})
|
||||
.returning();
|
||||
|
||||
await appendRecoveryRunEvent(finalizedRun, {
|
||||
level: cleanup.outcome === "failed" ? "warn" : "info",
|
||||
message: "Source-resolved watchdog fold finalized stale active run",
|
||||
payload: resultJson.sourceResolvedWatchdogFold,
|
||||
});
|
||||
await logActivity(db, {
|
||||
companyId: input.run.companyId,
|
||||
actorType: "system",
|
||||
actorId: "system",
|
||||
agentId: input.run.agentId,
|
||||
runId: input.run.id,
|
||||
action: "heartbeat.output_stale_source_resolved",
|
||||
entityType: "heartbeat_run",
|
||||
entityId: input.run.id,
|
||||
details: {
|
||||
source: "recovery.scan_silent_active_runs",
|
||||
sourceIssueId: input.sourceIssue.id,
|
||||
sourceIssueIdentifier: input.sourceIssue.identifier,
|
||||
sourceIssueStatus: input.sourceIssue.status,
|
||||
evaluationIssueId: input.existingEvaluation?.id ?? null,
|
||||
watchdogDecisionId: decision.id,
|
||||
sameRunEvidenceKind: input.evidence.kind,
|
||||
sameRunEvidenceId: input.evidence.id,
|
||||
sameRunEvidenceAt: input.evidence.createdAt.toISOString(),
|
||||
cleanup,
|
||||
},
|
||||
});
|
||||
await finalizeAgentAfterSourceResolvedRun(finalizedRun, finalRunStatus);
|
||||
return { kind: "folded" as const, evaluationIssueId: input.existingEvaluation?.id ?? null };
|
||||
}
|
||||
|
||||
async function resolveStaleRunOwnerAgentId(input: {
|
||||
run: typeof heartbeatRuns.$inferSelect;
|
||||
runningAgent: typeof agents.$inferSelect;
|
||||
|
|
@ -1030,6 +1354,47 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
const runningAgent = await getAgent(input.run.agentId);
|
||||
if (!runningAgent || runningAgent.companyId !== input.run.companyId) return { kind: "skipped" as const };
|
||||
const sourceIssue = await resolveStaleRunSourceIssue(input.run);
|
||||
const existing = await findOpenStaleRunEvaluation(input.run.companyId, input.run.id);
|
||||
if (sourceIssue && isRecoveryOriginIssue(sourceIssue)) {
|
||||
await logActivity(db, {
|
||||
companyId: input.run.companyId,
|
||||
actorType: "system",
|
||||
actorId: "system",
|
||||
agentId: input.run.agentId,
|
||||
runId: input.run.id,
|
||||
action: "heartbeat.output_stale_recovery_recursion_refused",
|
||||
entityType: "heartbeat_run",
|
||||
entityId: input.run.id,
|
||||
details: {
|
||||
source: "recovery.scan_silent_active_runs",
|
||||
sourceIssueId: sourceIssue.id,
|
||||
sourceIssueIdentifier: sourceIssue.identifier,
|
||||
sourceIssueOriginKind: sourceIssue.originKind,
|
||||
existingEvaluationIssueId: existing?.id ?? null,
|
||||
},
|
||||
});
|
||||
return { kind: "skipped" as const };
|
||||
}
|
||||
const silenceStartedAt = silenceStartedAtForRun(input.run);
|
||||
if (sourceIssue && isTerminalIssueStatus(sourceIssue.status)) {
|
||||
const terminalEvidence = await latestSameRunSourceTerminalEvidence({
|
||||
run: input.run,
|
||||
sourceIssue,
|
||||
evidenceAfter: silenceStartedAt,
|
||||
});
|
||||
if (terminalEvidence) {
|
||||
return foldSourceResolvedStaleRun({
|
||||
run: input.run,
|
||||
runningAgent,
|
||||
sourceIssue,
|
||||
evidence: terminalEvidence,
|
||||
existingEvaluation: existing,
|
||||
silenceStartedAt,
|
||||
silenceAgeMs: silenceAgeMsForRun(input.run, input.now),
|
||||
now: input.now,
|
||||
});
|
||||
}
|
||||
}
|
||||
const prefix = await getCompanyIssuePrefix(input.run.companyId);
|
||||
const evidence = await collectStaleRunEvidence({
|
||||
run: input.run,
|
||||
|
|
@ -1039,7 +1404,6 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
now: input.now,
|
||||
});
|
||||
const level = (evidence.silenceAgeMs ?? 0) >= ACTIVE_RUN_OUTPUT_CRITICAL_THRESHOLD_MS ? "critical" : "suspicious";
|
||||
const existing = await findOpenStaleRunEvaluation(input.run.companyId, input.run.id);
|
||||
if (existing) {
|
||||
if (level === "critical" && existing.priority !== "high") {
|
||||
await issuesSvc.update(existing.id, {
|
||||
|
|
@ -1174,6 +1538,7 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
created: 0,
|
||||
existing: 0,
|
||||
escalated: 0,
|
||||
folded: 0,
|
||||
snoozed: 0,
|
||||
skipped: 0,
|
||||
evaluationIssueIds: [] as string[],
|
||||
|
|
@ -1188,6 +1553,7 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
if (outcome.kind === "created") result.created += 1;
|
||||
else if (outcome.kind === "existing") result.existing += 1;
|
||||
else if (outcome.kind === "escalated") result.escalated += 1;
|
||||
else if (outcome.kind === "folded") result.folded += 1;
|
||||
else result.skipped += 1;
|
||||
if ("evaluationIssueId" in outcome && outcome.evaluationIssueId) {
|
||||
result.evaluationIssueIds.push(outcome.evaluationIssueId);
|
||||
|
|
@ -2382,7 +2748,6 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
if (row.originKind === RECOVERY_ORIGIN_KINDS.issueGraphLivenessEscalation) {
|
||||
const parsed = parseIssueGraphLivenessIncidentKey(row.originId);
|
||||
if (!parsed || parsed.companyId !== row.companyId) return [];
|
||||
if (parsed.state !== "blocked_by_assigned_backlog_issue") return [];
|
||||
return [
|
||||
{
|
||||
companyId: row.companyId,
|
||||
|
|
@ -2575,6 +2940,21 @@ export function recoveryService(db: Db, deps: { enqueueWakeup: RecoveryWakeup })
|
|||
) {
|
||||
continue;
|
||||
}
|
||||
const sourceIssue = await db
|
||||
.select({
|
||||
id: issues.id,
|
||||
status: issues.status,
|
||||
})
|
||||
.from(issues)
|
||||
.where(and(eq(issues.companyId, parsed.companyId), eq(issues.id, parsed.issueId)))
|
||||
.then((rows) => rows[0] ?? null);
|
||||
if (sourceIssue && !["done", "cancelled"].includes(sourceIssue.status)) {
|
||||
const blockerIds = await existingBlockerIssueIds(parsed.companyId, sourceIssue.id);
|
||||
if (blockerIds.includes(recovery.id)) {
|
||||
result.activeSkipped += 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if (await removeRecoveryBlockerFromSource(recovery)) {
|
||||
result.blockerRelationsRemoved += 1;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue