mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-15 18:30:39 +09:00
Harden remote workspace sync and restore flows (#5444)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - When an agent runs against a remote target, Paperclip syncs the
workspace out to the remote at run start and restores changes back to
the local workspace at run end
> - The previous restore flow naïvely overwrote local files with
whatever the remote returned, so files that the remote run never touched
but had timestamp/mode drift could be needlessly rewritten — and a
single static `refs/paperclip/ssh-sync/imported` ref made concurrent SSH
workspace exports race on the same git ref
> - This pull request adds a `workspace-restore-merge` module that diffs
a pre-run snapshot against the post-run remote state and only writes
back files the remote actually changed; SSH workspace exports now use a
per-import unique ref so concurrent runs can't trample each other
> - Every adapter's execute path threads the snapshot through
`prepareAdapterExecutionTargetRuntime` so the merge has the baseline it
needs
> - The benefit is workspace restores no longer churn untouched files,
and concurrent SSH runs no longer collide on the import ref
## What Changed
- `packages/adapter-utils/src/workspace-restore-merge.{ts,test.ts}`: new
module — directory snapshot (kind/mode/sha256/symlink target) plus
snapshot-aware merge that writes only the files the remote changed
- `packages/adapter-utils/src/ssh.ts`: SSH workspace export uses a
per-import unique ref (`refs/paperclip/ssh-sync/imported/<uuid>`);
restore goes through the new merge helper; `ssh-fixture.test.ts` covers
the unique-ref + merge paths
- `packages/adapter-utils/src/sandbox-managed-runtime.ts` +
`remote-managed-runtime.ts`: thread the snapshot/merge through the
sandbox and SSH paths
- `packages/adapter-utils/src/server-utils.{ts,test.ts}` +
`execution-target.ts`: helpers for capturing the pre-run snapshot;
`prepareAdapterExecutionTargetRuntime` gains required `runId` and
optional `workspaceRemoteDir`, and returns the realized
`workspaceRemoteDir`
- Each adapter's `execute.ts` (acpx, claude, codex, cursor, gemini,
opencode, pi) takes the snapshot at run start and passes it through to
the runtime restore
- Remote execute test mocks updated to match the new
`prepareWorkspaceForSshExecution` return shape and the per-run
`${managedRemoteWorkspace}` cwd subdirectory
## Verification
- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils
--project @paperclipai/adapter-acpx-local --project
@paperclipai/adapter-claude-local --project
@paperclipai/adapter-codex-local --project
@paperclipai/adapter-cursor-local --project
@paperclipai/adapter-gemini-local --project
@paperclipai/adapter-opencode-local --project
@paperclipai/adapter-pi-local` — 196/196 passing
- `pnpm typecheck` clean across the workspace
## Risks
Medium. The restore path now writes a strict subset of what it
previously did — files the remote did not touch are no longer rewritten.
If any flow was relying on a touch-without-content-change being copied
back (timestamp or permission propagation only), that behavior is now
skipped. Snapshot capture adds an O(N-files-in-workspace) hash pass at
run start; the cost is bounded by the existing exclude list. The `runId`
parameter on `prepareAdapterExecutionTargetRuntime` is now required —
every in-tree caller is updated; out-of-tree adapter authors need to
pass it.
## Model Used
Claude Opus 4.7 (1M context)
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — new module +
every adapter execute path covered
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
parent
824298f414
commit
12cb7b40fd
23 changed files with 1234 additions and 183 deletions
|
|
@ -1,3 +1,4 @@
|
|||
import { randomUUID } from "node:crypto";
|
||||
import { execFile, spawn } from "node:child_process";
|
||||
import { constants as fsConstants, createReadStream, createWriteStream, promises as fs } from "node:fs";
|
||||
import net from "node:net";
|
||||
|
|
@ -5,6 +6,8 @@ import os from "node:os";
|
|||
import path from "node:path";
|
||||
import type { CommandManagedRuntimeRunner } from "./command-managed-runtime.js";
|
||||
import type { RunProcessResult } from "./server-utils.js";
|
||||
import type { DirectorySnapshot } from "./workspace-restore-merge.js";
|
||||
import { mergeDirectoryWithBaseline } from "./workspace-restore-merge.js";
|
||||
|
||||
export interface SshConnectionConfig {
|
||||
host: string;
|
||||
|
|
@ -596,7 +599,9 @@ async function importGitWorkspaceToSsh(input: {
|
|||
}): Promise<void> {
|
||||
const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-ssh-bundle-"));
|
||||
const bundlePath = path.join(bundleDir, "workspace.bundle");
|
||||
const tempRef = "refs/paperclip/ssh-sync/import";
|
||||
// Per-import unique ref so concurrent imports against the same local repo
|
||||
// can't race on `update-ref` between this run's update and bundle create.
|
||||
const tempRef = `refs/paperclip/ssh-sync/import/${randomUUID()}`;
|
||||
|
||||
try {
|
||||
await runLocalGit(input.localDir, ["update-ref", tempRef, input.snapshot.headCommit], {
|
||||
|
|
@ -621,6 +626,8 @@ async function importGitWorkspaceToSsh(input: {
|
|||
: `git -C ${shellQuote(input.remoteDir)} -c advice.detachedHead=false checkout --force --detach ${shellQuote(input.snapshot.headCommit)} >/dev/null`,
|
||||
`git -C ${shellQuote(input.remoteDir)} reset --hard ${shellQuote(input.snapshot.headCommit)} >/dev/null`,
|
||||
`git -C ${shellQuote(input.remoteDir)} clean -fdx -e .paperclip-runtime >/dev/null`,
|
||||
// Drop the per-import ref on the remote side too so it can't accumulate.
|
||||
`git -C ${shellQuote(input.remoteDir)} update-ref -d ${shellQuote(tempRef)} >/dev/null 2>&1 || true`,
|
||||
].join("\n");
|
||||
|
||||
await streamLocalFileToSsh({
|
||||
|
|
@ -641,10 +648,12 @@ async function exportGitWorkspaceFromSsh(input: {
|
|||
spec: SshRemoteExecutionSpec;
|
||||
remoteDir: string;
|
||||
localDir: string;
|
||||
}): Promise<void> {
|
||||
importedRef?: string;
|
||||
resetLocalWorkspace?: boolean;
|
||||
}): Promise<string> {
|
||||
const bundleDir = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-ssh-bundle-"));
|
||||
const bundlePath = path.join(bundleDir, "workspace.bundle");
|
||||
const importedRef = "refs/paperclip/ssh-sync/imported";
|
||||
const importedRef = input.importedRef ?? `refs/paperclip/ssh-sync/imported/${randomUUID()}`;
|
||||
|
||||
try {
|
||||
const exportScript = [
|
||||
|
|
@ -668,19 +677,97 @@ async function exportGitWorkspaceFromSsh(input: {
|
|||
timeout: 60_000,
|
||||
maxBuffer: 1024 * 1024,
|
||||
});
|
||||
await runLocalGit(input.localDir, ["reset", "--hard", importedRef], {
|
||||
timeout: 60_000,
|
||||
maxBuffer: 1024 * 1024,
|
||||
});
|
||||
} finally {
|
||||
await runLocalGit(input.localDir, ["update-ref", "-d", importedRef], {
|
||||
if (input.resetLocalWorkspace !== false) {
|
||||
await runLocalGit(input.localDir, ["reset", "--hard", importedRef], {
|
||||
timeout: 60_000,
|
||||
maxBuffer: 1024 * 1024,
|
||||
});
|
||||
}
|
||||
const importedHead = await runLocalGit(input.localDir, ["rev-parse", importedRef], {
|
||||
timeout: 10_000,
|
||||
maxBuffer: 16 * 1024,
|
||||
}).catch(() => undefined);
|
||||
});
|
||||
return importedHead.stdout.trim();
|
||||
} finally {
|
||||
if (input.resetLocalWorkspace !== false) {
|
||||
await runLocalGit(input.localDir, ["update-ref", "-d", importedRef], {
|
||||
timeout: 10_000,
|
||||
maxBuffer: 16 * 1024,
|
||||
}).catch(() => undefined);
|
||||
}
|
||||
await fs.rm(bundleDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
}
|
||||
}
|
||||
|
||||
async function integrateImportedGitHead(input: {
|
||||
localDir: string;
|
||||
importedHead: string;
|
||||
}): Promise<void> {
|
||||
const snapshot = await readLocalGitWorkspaceSnapshot(input.localDir);
|
||||
if (!snapshot) return;
|
||||
|
||||
const currentHead = snapshot.headCommit;
|
||||
if (!currentHead || currentHead === input.importedHead) return;
|
||||
|
||||
const headRef = snapshot.branchName ? `refs/heads/${snapshot.branchName}` : "HEAD";
|
||||
const mergeBase = await runLocalGit(input.localDir, ["merge-base", currentHead, input.importedHead], {
|
||||
timeout: 10_000,
|
||||
maxBuffer: 16 * 1024,
|
||||
}).catch(() => null);
|
||||
const mergeBaseHead = mergeBase?.stdout.trim() ?? "";
|
||||
|
||||
if (mergeBaseHead === input.importedHead) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (mergeBaseHead === currentHead) {
|
||||
await runLocalGit(input.localDir, ["update-ref", headRef, input.importedHead, currentHead], {
|
||||
timeout: 10_000,
|
||||
maxBuffer: 16 * 1024,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
let mergedTree;
|
||||
try {
|
||||
mergedTree = await runLocalGit(input.localDir, ["merge-tree", "--write-tree", currentHead, input.importedHead], {
|
||||
timeout: 60_000,
|
||||
maxBuffer: 256 * 1024,
|
||||
});
|
||||
} catch (error) {
|
||||
const reason = error instanceof Error ? error.message : String(error);
|
||||
throw new Error(
|
||||
`Failed to merge concurrent SSH git histories for ${currentHead.slice(0, 12)} and ${input.importedHead.slice(0, 12)}: ${reason}`,
|
||||
);
|
||||
}
|
||||
const mergedTreeId = mergedTree.stdout.trim().split("\n")[0]?.trim() ?? "";
|
||||
if (!mergedTreeId) {
|
||||
throw new Error("Failed to compute a merged git tree for SSH workspace restore.");
|
||||
}
|
||||
|
||||
const mergeCommit = await runLocalGit(
|
||||
input.localDir,
|
||||
[
|
||||
"commit-tree",
|
||||
mergedTreeId,
|
||||
"-p",
|
||||
currentHead,
|
||||
"-p",
|
||||
input.importedHead,
|
||||
"-m",
|
||||
`Paperclip SSH sync merge ${input.importedHead.slice(0, 12)}`,
|
||||
],
|
||||
{
|
||||
timeout: 60_000,
|
||||
maxBuffer: 64 * 1024,
|
||||
},
|
||||
);
|
||||
await runLocalGit(input.localDir, ["update-ref", headRef, mergeCommit.stdout.trim(), currentHead], {
|
||||
timeout: 10_000,
|
||||
maxBuffer: 16 * 1024,
|
||||
});
|
||||
}
|
||||
|
||||
async function clearRemoteDirectory(input: {
|
||||
spec: SshConnectionConfig;
|
||||
remoteDir: string;
|
||||
|
|
@ -1117,7 +1204,7 @@ export async function prepareWorkspaceForSshExecution(input: {
|
|||
spec: SshRemoteExecutionSpec;
|
||||
localDir: string;
|
||||
remoteDir?: string;
|
||||
}): Promise<void> {
|
||||
}): Promise<{ gitBacked: boolean }> {
|
||||
const remoteDir = input.remoteDir ?? input.spec.remoteCwd;
|
||||
const gitSnapshot = await readLocalGitWorkspaceSnapshot(input.localDir);
|
||||
|
||||
|
|
@ -1139,7 +1226,7 @@ export async function prepareWorkspaceForSshExecution(input: {
|
|||
remoteDir,
|
||||
deletedPaths: gitSnapshot.deletedPaths,
|
||||
});
|
||||
return;
|
||||
return { gitBacked: true };
|
||||
}
|
||||
|
||||
await clearRemoteDirectory({
|
||||
|
|
@ -1153,14 +1240,64 @@ export async function prepareWorkspaceForSshExecution(input: {
|
|||
remoteDir,
|
||||
exclude: [".paperclip-runtime"],
|
||||
});
|
||||
return { gitBacked: false };
|
||||
}
|
||||
|
||||
export async function restoreWorkspaceFromSshExecution(input: {
|
||||
spec: SshRemoteExecutionSpec;
|
||||
localDir: string;
|
||||
remoteDir?: string;
|
||||
baselineSnapshot?: DirectorySnapshot;
|
||||
restoreGitHistory?: boolean;
|
||||
}): Promise<void> {
|
||||
const remoteDir = input.remoteDir ?? input.spec.remoteCwd;
|
||||
if (input.baselineSnapshot) {
|
||||
const stagingDir = await fs.mkdtemp(path.join(os.tmpdir(), "paperclip-ssh-sync-back-"));
|
||||
const importedRef = input.restoreGitHistory
|
||||
? `refs/paperclip/ssh-sync/imported/${randomUUID()}`
|
||||
: null;
|
||||
try {
|
||||
const importedHead = input.restoreGitHistory
|
||||
? await exportGitWorkspaceFromSsh({
|
||||
spec: input.spec,
|
||||
remoteDir,
|
||||
localDir: input.localDir,
|
||||
importedRef: importedRef ?? undefined,
|
||||
resetLocalWorkspace: false,
|
||||
})
|
||||
: null;
|
||||
await syncDirectoryFromSsh({
|
||||
spec: input.spec,
|
||||
remoteDir,
|
||||
localDir: stagingDir,
|
||||
exclude: input.baselineSnapshot.exclude,
|
||||
});
|
||||
await mergeDirectoryWithBaseline({
|
||||
baseline: input.baselineSnapshot,
|
||||
sourceDir: stagingDir,
|
||||
targetDir: input.localDir,
|
||||
// Git history advances via integrateImportedGitHead; the working tree
|
||||
// still comes from the remote file snapshot so dirty remote edits win.
|
||||
beforeApply: importedHead
|
||||
? async () => {
|
||||
await integrateImportedGitHead({
|
||||
localDir: input.localDir,
|
||||
importedHead,
|
||||
});
|
||||
}
|
||||
: undefined,
|
||||
});
|
||||
} finally {
|
||||
if (importedRef) {
|
||||
await runLocalGit(input.localDir, ["update-ref", "-d", importedRef], {
|
||||
timeout: 10_000,
|
||||
maxBuffer: 16 * 1024,
|
||||
}).catch(() => undefined);
|
||||
}
|
||||
await fs.rm(stagingDir, { recursive: true, force: true }).catch(() => undefined);
|
||||
}
|
||||
return;
|
||||
}
|
||||
const gitSnapshot = await readLocalGitWorkspaceSnapshot(input.localDir);
|
||||
|
||||
if (gitSnapshot) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue