mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-18 03:30:39 +09:00
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - When an agent runs against a remote target, Paperclip syncs the
workspace out to the remote at run start and restores changes back to
the local workspace at run end
> - The previous restore flow naïvely overwrote local files with
whatever the remote returned, so files that the remote run never touched
but had timestamp/mode drift could be needlessly rewritten — and a
single static `refs/paperclip/ssh-sync/imported` ref made concurrent SSH
workspace exports race on the same git ref
> - This pull request adds a `workspace-restore-merge` module that diffs
a pre-run snapshot against the post-run remote state and only writes
back files the remote actually changed; SSH workspace exports now use a
per-import unique ref so concurrent runs can't trample each other
> - Every adapter's execute path threads the snapshot through
`prepareAdapterExecutionTargetRuntime` so the merge has the baseline it
needs
> - The benefit is workspace restores no longer churn untouched files,
and concurrent SSH runs no longer collide on the import ref
## What Changed
- `packages/adapter-utils/src/workspace-restore-merge.{ts,test.ts}`: new
module — directory snapshot (kind/mode/sha256/symlink target) plus
snapshot-aware merge that writes only the files the remote changed
- `packages/adapter-utils/src/ssh.ts`: SSH workspace export uses a
per-import unique ref (`refs/paperclip/ssh-sync/imported/<uuid>`);
restore goes through the new merge helper; `ssh-fixture.test.ts` covers
the unique-ref + merge paths
- `packages/adapter-utils/src/sandbox-managed-runtime.ts` +
`remote-managed-runtime.ts`: thread the snapshot/merge through the
sandbox and SSH paths
- `packages/adapter-utils/src/server-utils.{ts,test.ts}` +
`execution-target.ts`: helpers for capturing the pre-run snapshot;
`prepareAdapterExecutionTargetRuntime` gains required `runId` and
optional `workspaceRemoteDir`, and returns the realized
`workspaceRemoteDir`
- Each adapter's `execute.ts` (acpx, claude, codex, cursor, gemini,
opencode, pi) takes the snapshot at run start and passes it through to
the runtime restore
- Remote execute test mocks updated to match the new
`prepareWorkspaceForSshExecution` return shape and the per-run
`${managedRemoteWorkspace}` cwd subdirectory
## Verification
- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils
--project @paperclipai/adapter-acpx-local --project
@paperclipai/adapter-claude-local --project
@paperclipai/adapter-codex-local --project
@paperclipai/adapter-cursor-local --project
@paperclipai/adapter-gemini-local --project
@paperclipai/adapter-opencode-local --project
@paperclipai/adapter-pi-local` — 196/196 passing
- `pnpm typecheck` clean across the workspace
## Risks
Medium. The restore path now writes a strict subset of what it
previously did — files the remote did not touch are no longer rewritten.
If any flow was relying on a touch-without-content-change being copied
back (timestamp or permission propagation only), that behavior is now
skipped. Snapshot capture adds an O(N-files-in-workspace) hash pass at
run start; the cost is bounded by the existing exclude list. The `runId`
parameter on `prepareAdapterExecutionTargetRuntime` is now required —
every in-tree caller is updated; out-of-tree adapter authors need to
pass it.
## Model Used
Claude Opus 4.7 (1M context)
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — new module +
every adapter execute path covered
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
257 lines
8.4 KiB
TypeScript
257 lines
8.4 KiB
TypeScript
import { createHash } from "node:crypto";
|
|
import { createReadStream } from "node:fs";
|
|
import { constants as fsConstants, promises as fs } from "node:fs";
|
|
import path from "node:path";
|
|
|
|
type SnapshotEntry =
|
|
| { kind: "dir" }
|
|
| { kind: "file"; mode: number; hash: string }
|
|
| { kind: "symlink"; target: string };
|
|
|
|
export interface DirectorySnapshot {
|
|
exclude: string[];
|
|
entries: Map<string, SnapshotEntry>;
|
|
}
|
|
|
|
function isRelativePathOrDescendant(relative: string, candidate: string): boolean {
|
|
return relative === candidate || relative.startsWith(`${candidate}/`);
|
|
}
|
|
|
|
function shouldExclude(relative: string, exclude: readonly string[]): boolean {
|
|
return exclude.some((candidate) => isRelativePathOrDescendant(relative, candidate));
|
|
}
|
|
|
|
async function hashFile(filePath: string): Promise<string> {
|
|
return await new Promise((resolve, reject) => {
|
|
const hash = createHash("sha256");
|
|
const stream = createReadStream(filePath);
|
|
stream.on("data", (chunk) => hash.update(chunk));
|
|
stream.on("error", reject);
|
|
stream.on("end", () => resolve(hash.digest("hex")));
|
|
});
|
|
}
|
|
|
|
async function walkDirectory(
|
|
root: string,
|
|
exclude: readonly string[],
|
|
relative = "",
|
|
out: Map<string, SnapshotEntry> = new Map(),
|
|
): Promise<Map<string, SnapshotEntry>> {
|
|
const current = relative ? path.join(root, relative) : root;
|
|
const entries = await fs.readdir(current, { withFileTypes: true }).catch(() => []);
|
|
entries.sort((left, right) => left.name.localeCompare(right.name));
|
|
|
|
for (const entry of entries) {
|
|
const nextRelative = relative ? path.posix.join(relative, entry.name) : entry.name;
|
|
if (shouldExclude(nextRelative, exclude)) continue;
|
|
|
|
const fullPath = path.join(root, nextRelative);
|
|
const stats = await fs.lstat(fullPath);
|
|
if (stats.isDirectory()) {
|
|
out.set(nextRelative, { kind: "dir" });
|
|
await walkDirectory(root, exclude, nextRelative, out);
|
|
continue;
|
|
}
|
|
|
|
if (stats.isSymbolicLink()) {
|
|
out.set(nextRelative, {
|
|
kind: "symlink",
|
|
target: await fs.readlink(fullPath),
|
|
});
|
|
continue;
|
|
}
|
|
|
|
out.set(nextRelative, {
|
|
kind: "file",
|
|
mode: stats.mode,
|
|
hash: await hashFile(fullPath),
|
|
});
|
|
}
|
|
|
|
return out;
|
|
}
|
|
|
|
async function readSnapshotEntry(root: string, relative: string): Promise<SnapshotEntry | null> {
|
|
const fullPath = path.join(root, relative);
|
|
let stats;
|
|
try {
|
|
stats = await fs.lstat(fullPath);
|
|
} catch {
|
|
return null;
|
|
}
|
|
|
|
if (stats.isDirectory()) return { kind: "dir" };
|
|
if (stats.isSymbolicLink()) {
|
|
return {
|
|
kind: "symlink",
|
|
target: await fs.readlink(fullPath),
|
|
};
|
|
}
|
|
return {
|
|
kind: "file",
|
|
mode: stats.mode,
|
|
hash: await hashFile(fullPath),
|
|
};
|
|
}
|
|
|
|
function entriesMatch(left: SnapshotEntry | null | undefined, right: SnapshotEntry | null | undefined): boolean {
|
|
if (!left || !right) return false;
|
|
if (left.kind !== right.kind) return false;
|
|
if (left.kind === "dir") return true;
|
|
if (left.kind === "symlink" && right.kind === "symlink") {
|
|
return left.target === right.target;
|
|
}
|
|
if (left.kind === "file" && right.kind === "file") {
|
|
return left.mode === right.mode && left.hash === right.hash;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
async function isHolderAlive(lockDir: string): Promise<boolean> {
|
|
try {
|
|
const raw = await fs.readFile(path.join(lockDir, "owner.json"), "utf8");
|
|
const owner = JSON.parse(raw) as { pid?: unknown };
|
|
const pid = typeof owner.pid === "number" && Number.isFinite(owner.pid) && owner.pid > 0 ? owner.pid : null;
|
|
if (pid === null) {
|
|
// Owner record is unparseable / missing pid — treat as stale.
|
|
return false;
|
|
}
|
|
try {
|
|
process.kill(pid, 0);
|
|
return true;
|
|
} catch {
|
|
return false;
|
|
}
|
|
} catch {
|
|
// owner.json missing or unreadable — treat as stale.
|
|
return false;
|
|
}
|
|
}
|
|
|
|
async function acquireDirectoryMergeLock(lockDir: string): Promise<() => Promise<void>> {
|
|
const deadline = Date.now() + 30_000;
|
|
while (true) {
|
|
try {
|
|
await fs.mkdir(lockDir);
|
|
await fs.writeFile(
|
|
path.join(lockDir, "owner.json"),
|
|
`${JSON.stringify({ pid: process.pid, createdAt: new Date().toISOString() })}\n`,
|
|
"utf8",
|
|
);
|
|
return async () => {
|
|
await fs.rm(lockDir, { recursive: true, force: true }).catch(() => undefined);
|
|
};
|
|
} catch (error) {
|
|
const code = error && typeof error === "object" ? (error as { code?: unknown }).code : null;
|
|
if (code !== "EEXIST") throw error;
|
|
// Stale-lock detection: if the owner PID is dead (SIGKILL / OOM / crash),
|
|
// the lockDir would otherwise persist forever and stall restores. Mirror
|
|
// the materializePaperclipSkillCopy lock pattern — remove and retry.
|
|
if (!(await isHolderAlive(lockDir))) {
|
|
await fs.rm(lockDir, { recursive: true, force: true }).catch(() => undefined);
|
|
continue;
|
|
}
|
|
if (Date.now() >= deadline) {
|
|
throw new Error(`Timed out waiting for workspace restore lock at ${lockDir}`);
|
|
}
|
|
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
}
|
|
}
|
|
}
|
|
|
|
export async function withDirectoryMergeLock<T>(
|
|
targetDir: string,
|
|
fn: () => Promise<T>,
|
|
): Promise<T> {
|
|
const releaseLock = await acquireDirectoryMergeLock(`${targetDir}.paperclip-restore.lock`);
|
|
try {
|
|
return await fn();
|
|
} finally {
|
|
await releaseLock();
|
|
}
|
|
}
|
|
|
|
async function copySnapshotEntry(sourceDir: string, targetDir: string, relative: string, entry: SnapshotEntry): Promise<void> {
|
|
const sourcePath = path.join(sourceDir, relative);
|
|
const targetPath = path.join(targetDir, relative);
|
|
|
|
if (entry.kind === "dir") {
|
|
const existing = await fs.lstat(targetPath).catch(() => null);
|
|
if (existing?.isDirectory()) {
|
|
return;
|
|
}
|
|
if (existing) {
|
|
await fs.rm(targetPath, { recursive: true, force: true }).catch(() => undefined);
|
|
}
|
|
await fs.mkdir(targetPath, { recursive: true });
|
|
return;
|
|
}
|
|
|
|
await fs.mkdir(path.dirname(targetPath), { recursive: true });
|
|
await fs.rm(targetPath, { recursive: true, force: true }).catch(() => undefined);
|
|
if (entry.kind === "symlink") {
|
|
await fs.symlink(entry.target, targetPath);
|
|
return;
|
|
}
|
|
|
|
await fs.copyFile(sourcePath, targetPath, fsConstants.COPYFILE_FICLONE).catch(async () => {
|
|
await fs.copyFile(sourcePath, targetPath);
|
|
});
|
|
await fs.chmod(targetPath, entry.mode);
|
|
}
|
|
|
|
export async function captureDirectorySnapshot(
|
|
rootDir: string,
|
|
options: { exclude?: string[] } = {},
|
|
): Promise<DirectorySnapshot> {
|
|
const exclude = [...new Set(options.exclude ?? [])];
|
|
return {
|
|
exclude,
|
|
entries: await walkDirectory(rootDir, exclude),
|
|
};
|
|
}
|
|
|
|
export async function mergeDirectoryWithBaseline(input: {
|
|
baseline: DirectorySnapshot;
|
|
sourceDir: string;
|
|
targetDir: string;
|
|
beforeApply?: () => Promise<void>;
|
|
}): Promise<void> {
|
|
const source = await captureDirectorySnapshot(input.sourceDir, { exclude: input.baseline.exclude });
|
|
await withDirectoryMergeLock(input.targetDir, async () => {
|
|
await input.beforeApply?.();
|
|
const current = await captureDirectorySnapshot(input.targetDir, { exclude: input.baseline.exclude });
|
|
const deletedLeafEntries = [...input.baseline.entries.entries()]
|
|
.filter(([relative, entry]) => entry.kind !== "dir" && !source.entries.has(relative))
|
|
.sort(([left], [right]) => right.length - left.length);
|
|
|
|
for (const [relative, baselineEntry] of deletedLeafEntries) {
|
|
if (!entriesMatch(current.entries.get(relative), baselineEntry)) continue;
|
|
await fs.rm(path.join(input.targetDir, relative), { recursive: true, force: true }).catch(() => undefined);
|
|
}
|
|
|
|
const deletedDirs = [...input.baseline.entries.entries()]
|
|
.filter(([relative, entry]) => entry.kind === "dir" && !source.entries.has(relative))
|
|
.sort(([left], [right]) => right.length - left.length);
|
|
|
|
for (const [relative] of deletedDirs) {
|
|
await fs.rmdir(path.join(input.targetDir, relative)).catch(() => undefined);
|
|
}
|
|
|
|
const changedSourceEntries = [...source.entries.entries()]
|
|
.filter(([relative, entry]) => !entriesMatch(input.baseline.entries.get(relative), entry))
|
|
.sort(([left], [right]) => left.localeCompare(right));
|
|
|
|
for (const [relative, entry] of changedSourceEntries) {
|
|
await copySnapshotEntry(input.sourceDir, input.targetDir, relative, entry);
|
|
}
|
|
});
|
|
}
|
|
|
|
export async function directoryEntryMatchesBaseline(
|
|
rootDir: string,
|
|
relative: string,
|
|
baselineEntry: SnapshotEntry,
|
|
): Promise<boolean> {
|
|
return entriesMatch(await readSnapshotEntry(rootDir, relative), baselineEntry);
|
|
}
|