paperclip/server/src/routes/execution-workspaces.ts
Dotta 16b2b84d84
[codex] Improve agent runtime recovery and governance (#4086)
## Thinking Path

> - Paperclip orchestrates AI agents for zero-human companies.
> - The heartbeat runtime, agent import path, and agent configuration
defaults determine whether work is dispatched safely and predictably.
> - Several accumulated fixes all touched agent execution recovery, wake
routing, import behavior, and runtime concurrency defaults.
> - Those changes need to land together so the heartbeat service and
agent creation defaults stay internally consistent.
> - This pull request groups the runtime/governance changes from the
split branch into one standalone branch.
> - The benefit is safer recovery for stranded runs, bounded high-volume
reads, imported-agent approval correctness, skill-template support, and
a clearer default concurrency policy.

## What Changed

- Fixed stranded continuation recovery so successful automatic retries
are requeued instead of incorrectly blocking the issue.
- Bounded high-volume issue/log reads across issue, heartbeat, agent,
project, and workspace paths.
- Fixed imported-agent approval and instruction-path permission
handling.
- Quarantined seeded worktree execution state during worktree
provisioning.
- Queued approval follow-up wakes and hardened SQL_ASCII heartbeat
output handling.
- Added reusable agent instruction templates for hiring flows.
- Set the default max concurrent agent runs to five and updated related
UI/tests/docs.

## Verification

- `pnpm install --frozen-lockfile`
- `pnpm exec vitest run server/src/__tests__/company-portability.test.ts
server/src/__tests__/heartbeat-process-recovery.test.ts
server/src/__tests__/heartbeat-comment-wake-batching.test.ts
server/src/__tests__/heartbeat-list.test.ts
server/src/__tests__/issues-service.test.ts
server/src/__tests__/agent-permissions-routes.test.ts
packages/adapter-utils/src/server-utils.test.ts
ui/src/lib/new-agent-runtime-config.test.ts`
- Split integration check: merged this branch first, followed by the
other [PAP-1614](/PAP/issues/PAP-1614) branches, with no merge
conflicts.
- Confirmed this branch does not include `pnpm-lock.yaml`.

## Risks

- Medium risk: touches heartbeat recovery, queueing, and issue list
bounds in central runtime paths.
- Imported-agent and concurrency default behavior changes may affect
existing automation that assumes one-at-a-time default runs.
- No database migrations are included.

> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.

## Model Used

- OpenAI Codex, GPT-5.4 tool-enabled coding model, agentic
code-editing/runtime with local shell and GitHub CLI access; exact
context window and reasoning mode are not exposed by the Paperclip
harness.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge

---------

Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-20 06:19:48 -05:00

616 lines
24 KiB
TypeScript

import { and, eq } from "drizzle-orm";
import { Router, type Request, type Response } from "express";
import type { Db } from "@paperclipai/db";
import { issues, projects, projectWorkspaces } from "@paperclipai/db";
import {
findWorkspaceCommandDefinition,
matchWorkspaceRuntimeServiceToCommand,
updateExecutionWorkspaceSchema,
workspaceRuntimeControlTargetSchema,
} from "@paperclipai/shared";
import { validate } from "../middleware/validate.js";
import { executionWorkspaceService, logActivity, workspaceOperationService } from "../services/index.js";
import { mergeExecutionWorkspaceConfig, readExecutionWorkspaceConfig } from "../services/execution-workspaces.js";
import { parseProjectExecutionWorkspacePolicy } from "../services/execution-workspace-policy.js";
import { readProjectWorkspaceRuntimeConfig } from "../services/project-workspace-runtime-config.js";
import {
buildWorkspaceRuntimeDesiredStatePatch,
cleanupExecutionWorkspaceArtifacts,
ensurePersistedExecutionWorkspaceAvailable,
listConfiguredRuntimeServiceEntries,
runWorkspaceJobForControl,
startRuntimeServicesForWorkspaceControl,
stopRuntimeServicesForExecutionWorkspace,
} from "../services/workspace-runtime.js";
import { assertCompanyAccess, getActorInfo } from "./authz.js";
import {
assertNoAgentHostWorkspaceCommandMutation,
collectExecutionWorkspaceCommandPaths,
} from "./workspace-command-authz.js";
import { assertCanManageExecutionWorkspaceRuntimeServices } from "./workspace-runtime-service-authz.js";
import { appendWithCap } from "../adapters/utils.js";
const WORKSPACE_CONTROL_OUTPUT_MAX_CHARS = 256 * 1024;
export function executionWorkspaceRoutes(db: Db) {
const router = Router();
const svc = executionWorkspaceService(db);
const workspaceOperationsSvc = workspaceOperationService(db);
router.get("/companies/:companyId/execution-workspaces", async (req, res) => {
const companyId = req.params.companyId as string;
assertCompanyAccess(req, companyId);
const filters = {
projectId: req.query.projectId as string | undefined,
projectWorkspaceId: req.query.projectWorkspaceId as string | undefined,
issueId: req.query.issueId as string | undefined,
status: req.query.status as string | undefined,
reuseEligible: req.query.reuseEligible === "true",
};
const workspaces = req.query.summary === "true"
? await svc.listSummaries(companyId, filters)
: await svc.list(companyId, filters);
res.json(workspaces);
});
router.get("/execution-workspaces/:id", async (req, res) => {
const id = req.params.id as string;
const workspace = await svc.getById(id);
if (!workspace) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
assertCompanyAccess(req, workspace.companyId);
res.json(workspace);
});
router.get("/execution-workspaces/:id/close-readiness", async (req, res) => {
const id = req.params.id as string;
const workspace = await svc.getById(id);
if (!workspace) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
assertCompanyAccess(req, workspace.companyId);
const readiness = await svc.getCloseReadiness(id);
if (!readiness) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
res.json(readiness);
});
router.get("/execution-workspaces/:id/workspace-operations", async (req, res) => {
const id = req.params.id as string;
const workspace = await svc.getById(id);
if (!workspace) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
assertCompanyAccess(req, workspace.companyId);
const operations = await workspaceOperationsSvc.listForExecutionWorkspace(id);
res.json(operations);
});
async function handleExecutionWorkspaceRuntimeCommand(req: Request, res: Response) {
const id = req.params.id as string;
const action = String(req.params.action ?? "").trim().toLowerCase();
if (action !== "start" && action !== "stop" && action !== "restart" && action !== "run") {
res.status(404).json({ error: "Workspace command action not found" });
return;
}
const existing = await svc.getById(id);
if (!existing) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
assertCompanyAccess(req, existing.companyId);
await assertCanManageExecutionWorkspaceRuntimeServices(db, req, {
companyId: existing.companyId,
executionWorkspaceId: existing.id,
sourceIssueId: existing.sourceIssueId,
});
const workspaceCwd = existing.cwd;
if (!workspaceCwd) {
res.status(422).json({ error: "Execution workspace needs a local path before Paperclip can run workspace commands" });
return;
}
const projectWorkspace = existing.projectWorkspaceId
? await db
.select({
id: projectWorkspaces.id,
cwd: projectWorkspaces.cwd,
repoUrl: projectWorkspaces.repoUrl,
repoRef: projectWorkspaces.repoRef,
defaultRef: projectWorkspaces.defaultRef,
metadata: projectWorkspaces.metadata,
})
.from(projectWorkspaces)
.where(
and(
eq(projectWorkspaces.id, existing.projectWorkspaceId),
eq(projectWorkspaces.companyId, existing.companyId),
),
)
.then((rows) => rows[0] ?? null)
: null;
const projectWorkspaceRuntime = readProjectWorkspaceRuntimeConfig(
(projectWorkspace?.metadata as Record<string, unknown> | null) ?? null,
)?.workspaceRuntime ?? null;
const projectPolicy = existing.projectId
? await db
.select({
executionWorkspacePolicy: projects.executionWorkspacePolicy,
})
.from(projects)
.where(
and(
eq(projects.id, existing.projectId),
eq(projects.companyId, existing.companyId),
),
)
.then((rows) => parseProjectExecutionWorkspacePolicy(rows[0]?.executionWorkspacePolicy))
: null;
const effectiveRuntimeConfig = existing.config?.workspaceRuntime ?? projectWorkspaceRuntime ?? null;
const target = req.body as { workspaceCommandId?: string | null; runtimeServiceId?: string | null; serviceIndex?: number | null };
const configuredServices = effectiveRuntimeConfig
? listConfiguredRuntimeServiceEntries({ workspaceRuntime: effectiveRuntimeConfig })
: [];
const workspaceCommand = effectiveRuntimeConfig
? findWorkspaceCommandDefinition(effectiveRuntimeConfig, target.workspaceCommandId ?? null)
: null;
if (target.workspaceCommandId && !workspaceCommand) {
res.status(404).json({ error: "Workspace command not found for this execution workspace" });
return;
}
if (target.runtimeServiceId && !(existing.runtimeServices ?? []).some((service) => service.id === target.runtimeServiceId)) {
res.status(404).json({ error: "Runtime service not found for this execution workspace" });
return;
}
const matchedRuntimeService =
workspaceCommand?.kind === "service" && !target.runtimeServiceId
? matchWorkspaceRuntimeServiceToCommand(workspaceCommand, existing.runtimeServices ?? [])
: null;
const selectedRuntimeServiceId = target.runtimeServiceId ?? matchedRuntimeService?.id ?? null;
const selectedServiceIndex =
workspaceCommand?.kind === "service"
? workspaceCommand.serviceIndex
: target.serviceIndex ?? null;
if (
selectedServiceIndex !== undefined
&& selectedServiceIndex !== null
&& (selectedServiceIndex < 0 || selectedServiceIndex >= configuredServices.length)
) {
res.status(422).json({ error: "Selected runtime service is not defined in this execution workspace runtime config" });
return;
}
if (workspaceCommand?.kind === "job" && action !== "run") {
res.status(422).json({ error: `Workspace job "${workspaceCommand.name}" can only be run` });
return;
}
if (workspaceCommand?.kind === "service" && action === "run") {
res.status(422).json({ error: `Workspace service "${workspaceCommand.name}" should be started or restarted, not run` });
return;
}
if (action === "run" && !workspaceCommand) {
res.status(422).json({ error: "Select a workspace job to run" });
return;
}
if ((action === "start" || action === "restart") && !effectiveRuntimeConfig) {
res.status(422).json({ error: "Execution workspace has no workspace command configuration or inherited project workspace default" });
return;
}
const actor = getActorInfo(req);
const recorder = workspaceOperationsSvc.createRecorder({
companyId: existing.companyId,
executionWorkspaceId: existing.id,
});
let runtimeServiceCount = existing.runtimeServices?.length ?? 0;
let stdout = "";
let stderr = "";
const operation = await recorder.recordOperation({
phase: action === "stop" ? "workspace_teardown" : "workspace_provision",
command: workspaceCommand?.command ?? `workspace command ${action}`,
cwd: existing.cwd,
metadata: {
action,
executionWorkspaceId: existing.id,
workspaceCommandId: workspaceCommand?.id ?? target.workspaceCommandId ?? null,
workspaceCommandKind: workspaceCommand?.kind ?? null,
workspaceCommandName: workspaceCommand?.name ?? null,
runtimeServiceId: selectedRuntimeServiceId,
serviceIndex: selectedServiceIndex,
},
run: async () => {
const ensureWorkspaceAvailable = async () =>
await ensurePersistedExecutionWorkspaceAvailable({
base: {
baseCwd: projectWorkspace?.cwd ?? workspaceCwd,
source: existing.mode === "shared_workspace" ? "project_primary" : "task_session",
projectId: existing.projectId,
workspaceId: existing.projectWorkspaceId,
repoUrl: existing.repoUrl,
repoRef: existing.baseRef,
},
workspace: {
mode: existing.mode,
strategyType: existing.strategyType,
cwd: existing.cwd,
providerRef: existing.providerRef,
projectId: existing.projectId,
projectWorkspaceId: existing.projectWorkspaceId,
repoUrl: existing.repoUrl,
baseRef: existing.baseRef,
branchName: existing.branchName,
config: {
...existing.config,
provisionCommand:
existing.config?.provisionCommand
?? projectPolicy?.workspaceStrategy?.provisionCommand
?? null,
},
},
issue: existing.sourceIssueId
? {
id: existing.sourceIssueId,
identifier: null,
title: existing.name,
}
: null,
agent: {
id: actor.agentId ?? null,
name: actor.actorType === "user" ? "Board" : "Agent",
companyId: existing.companyId,
},
recorder,
});
if (action === "run") {
if (!workspaceCommand || workspaceCommand.kind !== "job") {
throw new Error("Workspace job selection is required");
}
const availableWorkspace = await ensureWorkspaceAvailable();
if (!availableWorkspace) {
throw new Error("Execution workspace needs a local path before Paperclip can run workspace commands");
}
return await runWorkspaceJobForControl({
actor: {
id: actor.agentId ?? null,
name: actor.actorType === "user" ? "Board" : "Agent",
companyId: existing.companyId,
},
issue: existing.sourceIssueId
? {
id: existing.sourceIssueId,
identifier: null,
title: existing.name,
}
: null,
workspace: availableWorkspace,
command: workspaceCommand.rawConfig,
adapterEnv: {},
recorder,
metadata: {
action,
executionWorkspaceId: existing.id,
workspaceCommandId: workspaceCommand.id,
},
}).then((nestedOperation) => ({
status: "succeeded" as const,
exitCode: 0,
metadata: {
nestedOperationId: nestedOperation?.id ?? null,
runtimeServiceCount,
},
}));
}
const onLog = async (stream: "stdout" | "stderr", chunk: string) => {
if (stream === "stdout") stdout = appendWithCap(stdout, chunk, WORKSPACE_CONTROL_OUTPUT_MAX_CHARS);
else stderr = appendWithCap(stderr, chunk, WORKSPACE_CONTROL_OUTPUT_MAX_CHARS);
};
if (action === "stop" || action === "restart") {
await stopRuntimeServicesForExecutionWorkspace({
db,
executionWorkspaceId: existing.id,
workspaceCwd,
runtimeServiceId: selectedRuntimeServiceId,
});
}
if (action === "start" || action === "restart") {
const availableWorkspace = await ensureWorkspaceAvailable();
if (!availableWorkspace) {
throw new Error("Execution workspace needs a local path before Paperclip can manage local runtime services");
}
const startedServices = await startRuntimeServicesForWorkspaceControl({
db,
actor: {
id: actor.agentId ?? null,
name: actor.actorType === "user" ? "Board" : "Agent",
companyId: existing.companyId,
},
issue: existing.sourceIssueId
? {
id: existing.sourceIssueId,
identifier: null,
title: existing.name,
}
: null,
workspace: availableWorkspace,
executionWorkspaceId: existing.id,
config: { workspaceRuntime: effectiveRuntimeConfig },
adapterEnv: {},
onLog,
serviceIndex: selectedServiceIndex,
});
runtimeServiceCount = startedServices.length;
} else {
runtimeServiceCount = selectedRuntimeServiceId ? Math.max(0, (existing.runtimeServices?.length ?? 1) - 1) : 0;
}
const currentDesiredState: "running" | "stopped" =
existing.config?.desiredState
?? ((existing.runtimeServices ?? []).some((service) => service.status === "starting" || service.status === "running")
? "running"
: "stopped");
const nextRuntimeState: {
desiredState: "running" | "stopped";
serviceStates: Record<string, "running" | "stopped"> | null | undefined;
} = selectedRuntimeServiceId && (selectedServiceIndex === undefined || selectedServiceIndex === null)
? {
desiredState: currentDesiredState,
serviceStates: existing.config?.serviceStates ?? null,
}
: buildWorkspaceRuntimeDesiredStatePatch({
config: { workspaceRuntime: effectiveRuntimeConfig },
currentDesiredState,
currentServiceStates: existing.config?.serviceStates ?? null,
action,
serviceIndex: selectedServiceIndex,
});
const metadata = mergeExecutionWorkspaceConfig(existing.metadata as Record<string, unknown> | null, {
desiredState: nextRuntimeState.desiredState,
serviceStates: nextRuntimeState.serviceStates,
});
await svc.update(existing.id, { metadata });
return {
status: "succeeded",
stdout,
stderr,
system:
action === "stop"
? "Stopped execution workspace runtime services.\n"
: action === "restart"
? "Restarted execution workspace runtime services.\n"
: "Started execution workspace runtime services.\n",
metadata: {
runtimeServiceCount,
workspaceCommandId: workspaceCommand?.id ?? target.workspaceCommandId ?? null,
runtimeServiceId: selectedRuntimeServiceId,
serviceIndex: selectedServiceIndex,
},
};
},
});
const workspace = await svc.getById(id);
if (!workspace) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
await logActivity(db, {
companyId: existing.companyId,
actorType: actor.actorType,
actorId: actor.actorId,
agentId: actor.agentId,
runId: actor.runId,
action: `execution_workspace.runtime_${action}`,
entityType: "execution_workspace",
entityId: existing.id,
details: {
runtimeServiceCount,
workspaceCommandId: workspaceCommand?.id ?? target.workspaceCommandId ?? null,
workspaceCommandKind: workspaceCommand?.kind ?? null,
workspaceCommandName: workspaceCommand?.name ?? null,
runtimeServiceId: selectedRuntimeServiceId,
serviceIndex: selectedServiceIndex,
},
});
res.json({
workspace,
operation,
});
}
router.post("/execution-workspaces/:id/runtime-services/:action", validate(workspaceRuntimeControlTargetSchema), handleExecutionWorkspaceRuntimeCommand);
router.post("/execution-workspaces/:id/runtime-commands/:action", validate(workspaceRuntimeControlTargetSchema), handleExecutionWorkspaceRuntimeCommand);
router.patch("/execution-workspaces/:id", validate(updateExecutionWorkspaceSchema), async (req, res) => {
const id = req.params.id as string;
const existing = await svc.getById(id);
if (!existing) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
assertCompanyAccess(req, existing.companyId);
assertNoAgentHostWorkspaceCommandMutation(
req,
collectExecutionWorkspaceCommandPaths({
config: req.body.config,
metadata: req.body.metadata,
}),
);
const patch: Record<string, unknown> = {
...(req.body.name === undefined ? {} : { name: req.body.name }),
...(req.body.cwd === undefined ? {} : { cwd: req.body.cwd }),
...(req.body.repoUrl === undefined ? {} : { repoUrl: req.body.repoUrl }),
...(req.body.baseRef === undefined ? {} : { baseRef: req.body.baseRef }),
...(req.body.branchName === undefined ? {} : { branchName: req.body.branchName }),
...(req.body.providerRef === undefined ? {} : { providerRef: req.body.providerRef }),
...(req.body.status === undefined ? {} : { status: req.body.status }),
...(req.body.cleanupReason === undefined ? {} : { cleanupReason: req.body.cleanupReason }),
...(req.body.cleanupEligibleAt !== undefined
? { cleanupEligibleAt: req.body.cleanupEligibleAt ? new Date(req.body.cleanupEligibleAt) : null }
: {}),
};
if (req.body.metadata !== undefined || req.body.config !== undefined) {
const requestedMetadata = req.body.metadata === undefined
? (existing.metadata as Record<string, unknown> | null)
: (req.body.metadata as Record<string, unknown> | null);
patch.metadata = req.body.config === undefined
? requestedMetadata
: mergeExecutionWorkspaceConfig(requestedMetadata, req.body.config ?? null);
}
let workspace = existing;
let cleanupWarnings: string[] = [];
const configForCleanup = readExecutionWorkspaceConfig(
((patch.metadata as Record<string, unknown> | null | undefined) ?? (existing.metadata as Record<string, unknown> | null)) ?? null,
);
if (req.body.status === "archived" && existing.status !== "archived") {
const readiness = await svc.getCloseReadiness(existing.id);
if (!readiness) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
if (readiness.state === "blocked") {
res.status(409).json({
error: readiness.blockingReasons[0] ?? "Execution workspace cannot be closed right now",
closeReadiness: readiness,
});
return;
}
const closedAt = new Date();
const archivedWorkspace = await svc.update(id, {
...patch,
status: "archived",
closedAt,
cleanupReason: null,
});
if (!archivedWorkspace) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
workspace = archivedWorkspace;
if (existing.mode === "shared_workspace") {
await db
.update(issues)
.set({
executionWorkspaceId: null,
updatedAt: new Date(),
})
.where(
and(
eq(issues.companyId, existing.companyId),
eq(issues.executionWorkspaceId, existing.id),
),
);
}
try {
await stopRuntimeServicesForExecutionWorkspace({
db,
executionWorkspaceId: existing.id,
workspaceCwd: existing.cwd,
});
const projectWorkspace = existing.projectWorkspaceId
? await db
.select({
cwd: projectWorkspaces.cwd,
cleanupCommand: projectWorkspaces.cleanupCommand,
})
.from(projectWorkspaces)
.where(
and(
eq(projectWorkspaces.id, existing.projectWorkspaceId),
eq(projectWorkspaces.companyId, existing.companyId),
),
)
.then((rows) => rows[0] ?? null)
: null;
const projectPolicy = existing.projectId
? await db
.select({
executionWorkspacePolicy: projects.executionWorkspacePolicy,
})
.from(projects)
.where(and(eq(projects.id, existing.projectId), eq(projects.companyId, existing.companyId)))
.then((rows) => parseProjectExecutionWorkspacePolicy(rows[0]?.executionWorkspacePolicy))
: null;
const cleanupResult = await cleanupExecutionWorkspaceArtifacts({
workspace: existing,
projectWorkspace,
teardownCommand: configForCleanup?.teardownCommand ?? projectPolicy?.workspaceStrategy?.teardownCommand ?? null,
cleanupCommand: configForCleanup?.cleanupCommand ?? null,
recorder: workspaceOperationsSvc.createRecorder({
companyId: existing.companyId,
executionWorkspaceId: existing.id,
}),
});
cleanupWarnings = cleanupResult.warnings;
const cleanupPatch: Record<string, unknown> = {
closedAt,
cleanupReason: cleanupWarnings.length > 0 ? cleanupWarnings.join(" | ") : null,
};
if (!cleanupResult.cleaned) {
cleanupPatch.status = "cleanup_failed";
}
if (cleanupResult.warnings.length > 0 || !cleanupResult.cleaned) {
workspace = (await svc.update(id, cleanupPatch)) ?? workspace;
}
} catch (error) {
const failureReason = error instanceof Error ? error.message : String(error);
workspace =
(await svc.update(id, {
status: "cleanup_failed",
closedAt,
cleanupReason: failureReason,
})) ?? workspace;
res.status(500).json({
error: `Failed to archive execution workspace: ${failureReason}`,
});
return;
}
} else {
const updatedWorkspace = await svc.update(id, patch);
if (!updatedWorkspace) {
res.status(404).json({ error: "Execution workspace not found" });
return;
}
workspace = updatedWorkspace;
}
const actor = getActorInfo(req);
await logActivity(db, {
companyId: existing.companyId,
actorType: actor.actorType,
actorId: actor.actorId,
agentId: actor.agentId,
runId: actor.runId,
action: "execution_workspace.updated",
entityType: "execution_workspace",
entityId: workspace.id,
details: {
changedKeys: Object.keys(req.body).sort(),
...(cleanupWarnings.length > 0 ? { cleanupWarnings } : {}),
},
});
res.json(workspace);
});
return router;
}