[codex] Improve agent runtime recovery and governance (#4086)

## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies. > - The heartbeat runtime, agent import path, and agent configuration defaults determine whether work is dispatched safely and predictably. > - Several accumulated fixes all touched agent execution recovery, wake routing, import behavior, and runtime concurrency defaults. > - Those changes need to land together so the heartbeat service and agent creation defaults stay internally consistent. > - This pull request groups the runtime/governance changes from the split branch into one standalone branch. > - The benefit is safer recovery for stranded runs, bounded high-volume reads, imported-agent approval correctness, skill-template support, and a clearer default concurrency policy. ## What Changed - Fixed stranded continuation recovery so successful automatic retries are requeued instead of incorrectly blocking the issue. - Bounded high-volume issue/log reads across issue, heartbeat, agent, project, and workspace paths. - Fixed imported-agent approval and instruction-path permission handling. - Quarantined seeded worktree execution state during worktree provisioning. - Queued approval follow-up wakes and hardened SQL_ASCII heartbeat output handling. - Added reusable agent instruction templates for hiring flows. - Set the default max concurrent agent runs to five and updated related UI/tests/docs. ## Verification - `pnpm install --frozen-lockfile` - `pnpm exec vitest run server/src/__tests__/company-portability.test.ts server/src/__tests__/heartbeat-process-recovery.test.ts server/src/__tests__/heartbeat-comment-wake-batching.test.ts server/src/__tests__/heartbeat-list.test.ts server/src/__tests__/issues-service.test.ts server/src/__tests__/agent-permissions-routes.test.ts packages/adapter-utils/src/server-utils.test.ts ui/src/lib/new-agent-runtime-config.test.ts` - Split integration check: merged this branch first, followed by the other [PAP-1614](/PAP/issues/PAP-1614) branches, with no merge conflicts. - Confirmed this branch does not include `pnpm-lock.yaml`. ## Risks - Medium risk: touches heartbeat recovery, queueing, and issue list bounds in central runtime paths. - Imported-agent and concurrency default behavior changes may affect existing automation that assumes one-at-a-time default runs. - No database migrations are included. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, GPT-5.4 tool-enabled coding model, agentic code-editing/runtime with local shell and GitHub CLI access; exact context window and reasoning mode are not exposed by the Paperclip harness. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [x] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-06-16 10:50:38 +09:00 · 2026-04-20 06:19:48 -05:00 · 2026-04-20 06:19:48 -05:00 · 16b2b84d84
commit 16b2b84d84
parent 057fee4836
38 changed files with 1569 additions and 240 deletions
--- a/server/src/services/heartbeat.ts
+++ b/server/src/services/heartbeat.ts
@ -4,8 +4,14 @@ import { execFile as execFileCallback } from "node:child_process";
 import { promisify } from "node:util";
 import { and, asc, desc, eq, getTableColumns, gt, inArray, isNull, or, sql } from "drizzle-orm";
 import type { Db } from "@paperclipai/db";
-import { ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY } from "@paperclipai/shared";
-import type { BillingType, ExecutionWorkspace, ExecutionWorkspaceConfig, RunLivenessState } from "@paperclipai/shared";
+import {
+  AGENT_DEFAULT_MAX_CONCURRENT_RUNS,
+  ISSUE_CONTINUATION_SUMMARY_DOCUMENT_KEY,
+  type BillingType,
+  type ExecutionWorkspace,
+  type ExecutionWorkspaceConfig,
+  type RunLivenessState,
+} from "@paperclipai/shared";
 import {
  agents,
  agentRuntimeState,
@ -31,7 +37,7 @@ import { getRunLogStore, type RunLogHandle } from "./run-log-store.js";
 import { getServerAdapter, runningProcesses } from "../adapters/index.js";
 import type { AdapterExecutionResult, AdapterInvocationMeta, AdapterSessionCodec, UsageSummary } from "../adapters/index.js";
 import { createLocalAgentJwt } from "../agent-auth-jwt.js";
-import { parseObject, asBoolean, asNumber, appendWithCap, MAX_EXCERPT_BYTES } from "../adapters/utils.js";
+import { parseObject, asBoolean, asNumber, appendWithByteCap, MAX_EXCERPT_BYTES } from "../adapters/utils.js";
 import { costService } from "./costs.js";
 import { trackAgentFirstHeartbeat } from "@paperclipai/shared/telemetry";
 import { getTelemetryClient } from "../telemetry.js";
@ -104,7 +110,11 @@ import { extractSkillMentionIds } from "@paperclipai/shared";

 const MAX_LIVE_LOG_CHUNK_BYTES = 8 * 1024;
 const MAX_PERSISTED_LOG_CHUNK_CHARS = 64 * 1024;
-const HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT = 1;
+const MAX_RUN_EVENT_PAYLOAD_STRING_CHARS = 16 * 1024;
+const MAX_RUN_EVENT_PAYLOAD_ARRAY_ITEMS = 50;
+const MAX_RUN_EVENT_PAYLOAD_OBJECT_KEYS = 100;
+const MAX_RUN_EVENT_PAYLOAD_DEPTH = 6;
+const HEARTBEAT_MAX_CONCURRENT_RUNS_DEFAULT = AGENT_DEFAULT_MAX_CONCURRENT_RUNS;
 const HEARTBEAT_MAX_CONCURRENT_RUNS_MAX = 10;
 const DEFERRED_WAKE_CONTEXT_KEY = "_paperclipWakeContext";
 const WAKE_COMMENT_IDS_KEY = "wakeCommentIds";
@ -119,6 +129,8 @@ const MAX_INLINE_WAKE_COMMENT_BODY_CHARS = 4_000;
 const MAX_INLINE_WAKE_COMMENT_BODY_TOTAL_CHARS = 12_000;
 const execFile = promisify(execFileCallback);
 const ACTIVE_HEARTBEAT_RUN_STATUSES = ["queued", "running"] as const;
+const UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES = ["failed", "cancelled", "timed_out"] as const;
+const RUNNING_ISSUE_WAKE_REASONS_REQUIRING_FOLLOWUP = new Set(["approval_approved"]);
 const SESSIONED_LOCAL_ADAPTERS = new Set([
  "claude_local",
  "codex_local",
@ -504,6 +516,15 @@ const heartbeatRunSafeColumns = {
  resultJson: heartbeatRunSafeResultJsonColumn,
 } as const;

+const heartbeatRunSqlAsciiSafeColumns = {
+  ...getTableColumns(heartbeatRuns),
+  processGroupId: heartbeatRunProcessGroupIdColumn,
+  error: sql<string | null>`NULL`.as("error"),
+  resultJson: sql<Record<string, unknown> | null>`NULL`.as("resultJson"),
+  stdoutExcerpt: sql<string | null>`NULL`.as("stdoutExcerpt"),
+  stderrExcerpt: sql<string | null>`NULL`.as("stderrExcerpt"),
+} as const;
+
 const heartbeatRunLogAccessColumns = {
  id: heartbeatRuns.id,
  companyId: heartbeatRuns.companyId,
@ -529,7 +550,81 @@ const heartbeatRunIssueSummaryColumns = {
 } as const;

 function appendExcerpt(prev: string, chunk: string) {
-  return appendWithCap(prev, chunk, MAX_EXCERPT_BYTES);
+  return appendWithByteCap(prev, chunk, MAX_EXCERPT_BYTES);
+}
+
+function truncateRunEventString(value: string) {
+  if (value.length <= MAX_RUN_EVENT_PAYLOAD_STRING_CHARS) return value;
+  const omittedChars = value.length - MAX_RUN_EVENT_PAYLOAD_STRING_CHARS;
+  return `${value.slice(0, MAX_RUN_EVENT_PAYLOAD_STRING_CHARS)}\n[truncated ${omittedChars} chars]`;
+}
+
+function boundRunEventValue(value: unknown, depth: number, seen: WeakSet<object>): unknown {
+  if (typeof value === "string") {
+    return truncateRunEventString(value);
+  }
+  if (
+    value === null
+    || typeof value === "number"
+    || typeof value === "boolean"
+  ) {
+    return value;
+  }
+  if (value instanceof Date) {
+    return value.toISOString();
+  }
+  if (Array.isArray(value)) {
+    if (depth >= MAX_RUN_EVENT_PAYLOAD_DEPTH) {
+      return {
+        _truncated: true,
+        type: "array",
+        originalLength: value.length,
+      };
+    }
+    const bounded = value
+      .slice(0, MAX_RUN_EVENT_PAYLOAD_ARRAY_ITEMS)
+      .map((entry) => boundRunEventValue(entry, depth + 1, seen));
+    if (value.length > MAX_RUN_EVENT_PAYLOAD_ARRAY_ITEMS) {
+      bounded.push({
+        _truncated: true,
+        omittedItems: value.length - MAX_RUN_EVENT_PAYLOAD_ARRAY_ITEMS,
+      });
+    }
+    return bounded;
+  }
+  if (typeof value !== "object" || value === undefined) {
+    return null;
+  }
+  if (seen.has(value)) {
+    return "[Circular]";
+  }
+  seen.add(value);
+  const entries = Object.entries(value as Record<string, unknown>);
+  if (depth >= MAX_RUN_EVENT_PAYLOAD_DEPTH) {
+    const bounded = {
+      _truncated: true,
+      type: "object",
+      keys: entries.map(([key]) => key).slice(0, 20),
+    };
+    seen.delete(value);
+    return bounded;
+  }
+
+  const out: Record<string, unknown> = {};
+  for (const [key, entryValue] of entries.slice(0, MAX_RUN_EVENT_PAYLOAD_OBJECT_KEYS)) {
+    out[key] = boundRunEventValue(entryValue, depth + 1, seen);
+  }
+  if (entries.length > MAX_RUN_EVENT_PAYLOAD_OBJECT_KEYS) {
+    out._truncated = true;
+    out._omittedKeys = entries.length - MAX_RUN_EVENT_PAYLOAD_OBJECT_KEYS;
+  }
+  seen.delete(value);
+  return out;
+}
+
+export function boundHeartbeatRunEventPayloadForStorage(payload: Record<string, unknown>): Record<string, unknown> {
+  const bounded = boundRunEventValue(payload, 0, new WeakSet());
+  return parseObject(bounded) ?? { _truncated: true };
 }

 function redactInlineBase64ImageData(chunk: string) {
@ -716,6 +811,22 @@ function summarizeRunFailureForIssueComment(
  return null;
 }

+function didAutomaticRecoveryFail(
+  latestRun: Pick<typeof heartbeatRuns.$inferSelect, "status" | "contextSnapshot"> | null,
+  expectedRetryReason: "assignment_recovery" | "issue_continuation_needed",
+) {
+  if (!latestRun) return false;
+
+  const latestContext = parseObject(latestRun.contextSnapshot);
+  const latestRetryReason = readNonEmptyString(latestContext.retryReason);
+  return (
+    latestRetryReason === expectedRetryReason &&
+    UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES.includes(
+      latestRun.status as (typeof UNSUCCESSFUL_HEARTBEAT_RUN_TERMINAL_STATUSES)[number],
+    )
+  );
+}
+
 function normalizeLedgerBillingType(value: unknown): BillingType {
  const raw = readNonEmptyString(value);
  switch (raw) {
@ -1095,6 +1206,15 @@ function shouldAutoCheckoutIssueForWake(input: {
  return true;
 }

+function shouldQueueFollowupForRunningIssueWake(input: {
+  contextSnapshot: Record<string, unknown> | null | undefined;
+  wakeCommentId: string | null;
+}) {
+  if (input.wakeCommentId) return true;
+  const wakeReason = readNonEmptyString(input.contextSnapshot?.wakeReason);
+  return Boolean(wakeReason && RUNNING_ISSUE_WAKE_REASONS_REQUIRING_FOLLOWUP.has(wakeReason));
+}
+
 function isCheckoutConflictError(error: unknown): boolean {
  return error instanceof HttpError && error.status === 409 && error.message === "Issue checkout conflict";
 }
@ -1577,6 +1697,26 @@ export function heartbeatService(db: Db) {
    cancelWorkForScope: cancelBudgetScopeWork,
  };
  const budgets = budgetService(db, budgetHooks);
+  let unsafeTextProjectionPromise: Promise<boolean> | null = null;
+
+  async function hasUnsafeTextProjectionDatabase() {
+    if (!unsafeTextProjectionPromise) {
+      unsafeTextProjectionPromise = db
+        .execute(sql`select current_setting('server_encoding') as server_encoding`)
+        .then((rows) => {
+          const first = Array.isArray(rows) ? rows[0] : null;
+          const serverEncoding = typeof first === "object" && first !== null
+            ? (first as Record<string, unknown>).server_encoding
+            : null;
+          return typeof serverEncoding === "string" && serverEncoding.toUpperCase() === "SQL_ASCII";
+        })
+        .catch((err) => {
+          logger.warn({ err }, "failed to inspect database server encoding; using conservative heartbeat result projection");
+          return true;
+        });
+    }
+    return unsafeTextProjectionPromise;
+  }

  async function getAgent(agentId: string) {
    return db
@ -1587,8 +1727,15 @@ export function heartbeatService(db: Db) {
  }

  async function getRun(runId: string, opts?: { unsafeFullResultJson?: boolean }) {
+    const safeForLegacyEncoding = !opts?.unsafeFullResultJson && await hasUnsafeTextProjectionDatabase();
    return db
-      .select(opts?.unsafeFullResultJson ? getTableColumns(heartbeatRuns) : heartbeatRunSafeColumns)
+      .select(
+        opts?.unsafeFullResultJson
+          ? getTableColumns(heartbeatRuns)
+          : safeForLegacyEncoding
+            ? heartbeatRunSqlAsciiSafeColumns
+            : heartbeatRunSafeColumns,
+      )
      .from(heartbeatRuns)
      .where(eq(heartbeatRuns.id, runId))
      .then((rows) => rows[0] ?? null);
@ -2393,9 +2540,12 @@ export function heartbeatService(db: Db) {
    const sanitizedMessage = event.message
      ? redactCurrentUserText(event.message, currentUserRedactionOptions)
      : event.message;
-    const sanitizedPayload = event.payload
-      ? redactCurrentUserValue(event.payload, currentUserRedactionOptions)
+    const boundedPayload = event.payload
+      ? boundHeartbeatRunEventPayloadForStorage(event.payload)
      : event.payload;
+    const sanitizedPayload = boundedPayload
+      ? redactCurrentUserValue(boundedPayload, currentUserRedactionOptions)
+      : boundedPayload;

    await db.insert(heartbeatRunEvents).values({
      companyId: run.companyId,
@ -3484,16 +3634,13 @@ export function heartbeatService(db: Db) {
      }

      const latestRun = await getLatestIssueRun(issue.companyId, issue.id);
-      const latestContext = parseObject(latestRun?.contextSnapshot);
-      const latestRetryReason = readNonEmptyString(latestContext.retryReason);
-
      if (issue.status === "todo") {
        if (!latestRun || latestRun.status === "succeeded") {
          result.skipped += 1;
          continue;
        }

-        if (latestRetryReason === "assignment_recovery") {
+        if (didAutomaticRecoveryFail(latestRun, "assignment_recovery")) {
          const failureSummary = summarizeRunFailureForIssueComment(latestRun);
          const updated = await escalateStrandedAssignedIssue({
            issue,
@ -3530,7 +3677,12 @@ export function heartbeatService(db: Db) {
        continue;
      }

-      if (latestRetryReason === "issue_continuation_needed") {
+      if (!latestRun && !issue.checkoutRunId && !issue.executionRunId) {
+        result.skipped += 1;
+        continue;
+      }
+
+      if (didAutomaticRecoveryFail(latestRun, "issue_continuation_needed")) {
        const failureSummary = summarizeRunFailureForIssueComment(latestRun);
        const updated = await escalateStrandedAssignedIssue({
          issue,
@ -5137,12 +5289,12 @@ export function heartbeatService(db: Db) {
            normalizeAgentNameKey(executionAgent?.name);
          const isSameExecutionAgent =
            Boolean(executionAgentNameKey) && executionAgentNameKey === agentNameKey;
-          const shouldQueueFollowupForCommentWake =
-            Boolean(wakeCommentId) &&
+          const shouldQueueFollowupForRunningWake =
+            shouldQueueFollowupForRunningIssueWake({ contextSnapshot: enrichedContextSnapshot, wakeCommentId }) &&
            activeExecutionRun.status === "running" &&
            isSameExecutionAgent;

-          if (isSameExecutionAgent && !shouldQueueFollowupForCommentWake) {
+          if (isSameExecutionAgent && !shouldQueueFollowupForRunningWake) {
            const mergedContextSnapshot = mergeCoalescedContextSnapshot(
              activeExecutionRun.contextSnapshot,
              enrichedContextSnapshot,
@ -5319,12 +5471,14 @@ export function heartbeatService(db: Db) {
    const sameScopeRunningRun = activeRuns.find(
      (candidate) => candidate.status === "running" && isSameTaskScope(runTaskKey(candidate), taskKey),
    );
-    const shouldQueueFollowupForCommentWake =
-      Boolean(wakeCommentId) && Boolean(sameScopeRunningRun) && !sameScopeQueuedRun;
+    const shouldQueueFollowupForRunningWake =
+      Boolean(sameScopeRunningRun) &&
+      !sameScopeQueuedRun &&
+      shouldQueueFollowupForRunningIssueWake({ contextSnapshot: enrichedContextSnapshot, wakeCommentId });

    const coalescedTargetRun =
      sameScopeQueuedRun ??
-      (shouldQueueFollowupForCommentWake ? null : sameScopeRunningRun ?? null);
+      (shouldQueueFollowupForRunningWake ? null : sameScopeRunningRun ?? null);

    if (coalescedTargetRun) {
      const mergedContextSnapshot = mergeCoalescedContextSnapshot(
@ -5646,12 +5800,21 @@ export function heartbeatService(db: Db) {

  return {
    list: async (companyId: string, agentId?: string, limit?: number) => {
+      const safeForLegacyEncoding = await hasUnsafeTextProjectionDatabase();
      const query = db
-        .select({
-          ...heartbeatRunListColumns,
-          ...heartbeatRunListContextColumns,
-          ...heartbeatRunListResultColumns,
-        })
+        .select(
+          safeForLegacyEncoding
+            ? {
+                ...heartbeatRunListColumns,
+                error: sql<string | null>`NULL`.as("error"),
+                ...heartbeatRunListContextColumns,
+              }
+            : {
+                ...heartbeatRunListColumns,
+                ...heartbeatRunListContextColumns,
+                ...heartbeatRunListResultColumns,
+              },
+        )
        .from(heartbeatRuns)
        .where(
          agentId
@ -5679,7 +5842,15 @@ export function heartbeatService(db: Db) {
          resultCostUsd,
          resultCostUsdCamel,
          ...rest
-        } = row;
+        } = row as typeof row & {
+          resultSummary?: string | null;
+          resultResult?: string | null;
+          resultMessage?: string | null;
+          resultError?: string | null;
+          resultTotalCostUsd?: string | null;
+          resultCostUsd?: string | null;
+          resultCostUsdCamel?: string | null;
+        };

        return {
          ...rest,
@ -5693,15 +5864,17 @@ export function heartbeatService(db: Db) {
            wakeSource: contextWakeSource,
            wakeTriggerDetail: contextWakeTriggerDetail,
          }),
-          resultJson: summarizeHeartbeatRunListResultJson({
-            summary: resultSummary,
-            result: resultResult,
-            message: resultMessage,
-            error: resultError,
-            totalCostUsd: resultTotalCostUsd,
-            costUsd: resultCostUsd,
-            costUsdCamel: resultCostUsdCamel,
-          }),
+          resultJson: safeForLegacyEncoding
+            ? null
+            : summarizeHeartbeatRunListResultJson({
+                summary: resultSummary,
+                result: resultResult,
+                message: resultMessage,
+                error: resultError,
+                totalCostUsd: resultTotalCostUsd,
+                costUsd: resultCostUsd,
+                costUsdCamel: resultCostUsdCamel,
+              }),
        };
      });
    },
@ -5810,7 +5983,9 @@ export function heartbeatService(db: Db) {
        store: run.logStore,
        logRef: run.logRef,
        ...result,
-        content: redactCurrentUserText(result.content, await getCurrentUserRedactionOptions()),
+        // Run-log chunks are already redacted before they are appended to the store.
+        // Rewriting the full chunk again on every poll creates avoidable string copies.
+        content: result.content,
      };
    },