mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-19 04:00:38 +09:00
Limit isolated workspace memory spikes
Co-Authored-By: Paperclip <noreply@paperclip.ing>
This commit is contained in:
parent
37d2d5ef02
commit
0a9a8b5a44
4 changed files with 238 additions and 16 deletions
|
|
@ -176,4 +176,49 @@ describeEmbeddedPostgres("runDatabaseBackup", () => {
|
||||||
},
|
},
|
||||||
60_000,
|
60_000,
|
||||||
);
|
);
|
||||||
|
|
||||||
|
it(
|
||||||
|
"restores statements incrementally when backup comments precede the first breakpoint",
|
||||||
|
async () => {
|
||||||
|
const restoreConnectionString = await createTempDatabase();
|
||||||
|
const restoreSql = postgres(restoreConnectionString, { max: 1, onnotice: () => {} });
|
||||||
|
const backupDir = createTempDir("paperclip-db-restore-manual-");
|
||||||
|
const backupFile = path.join(backupDir, "manual.sql");
|
||||||
|
|
||||||
|
try {
|
||||||
|
await fs.promises.writeFile(
|
||||||
|
backupFile,
|
||||||
|
[
|
||||||
|
"-- Paperclip database backup",
|
||||||
|
"-- Created: 2026-04-06T00:00:00.000Z",
|
||||||
|
"",
|
||||||
|
"BEGIN;",
|
||||||
|
"-- paperclip statement breakpoint 69f6f3f1-42fd-46a6-bf17-d1d85f8f3900",
|
||||||
|
"CREATE TABLE public.restore_stream_test (id integer primary key, payload text not null);",
|
||||||
|
"-- paperclip statement breakpoint 69f6f3f1-42fd-46a6-bf17-d1d85f8f3900",
|
||||||
|
"INSERT INTO public.restore_stream_test (id, payload)",
|
||||||
|
"VALUES (1, 'hello');",
|
||||||
|
"-- paperclip statement breakpoint 69f6f3f1-42fd-46a6-bf17-d1d85f8f3900",
|
||||||
|
"COMMIT;",
|
||||||
|
"-- paperclip statement breakpoint 69f6f3f1-42fd-46a6-bf17-d1d85f8f3900",
|
||||||
|
].join("\n"),
|
||||||
|
"utf8",
|
||||||
|
);
|
||||||
|
|
||||||
|
await runDatabaseRestore({
|
||||||
|
connectionString: restoreConnectionString,
|
||||||
|
backupFile,
|
||||||
|
});
|
||||||
|
|
||||||
|
const rows = await restoreSql.unsafe<{ payload: string }[]>(`
|
||||||
|
SELECT payload
|
||||||
|
FROM public.restore_stream_test
|
||||||
|
`);
|
||||||
|
expect(rows).toEqual([{ payload: "hello" }]);
|
||||||
|
} finally {
|
||||||
|
await restoreSql.end();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
20_000,
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import { createWriteStream, existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from "node:fs";
|
import { createReadStream, createWriteStream, existsSync, mkdirSync, readdirSync, statSync, unlinkSync } from "node:fs";
|
||||||
import { readFile } from "node:fs/promises";
|
|
||||||
import { basename, resolve } from "node:path";
|
import { basename, resolve } from "node:path";
|
||||||
|
import { createInterface } from "node:readline";
|
||||||
import postgres from "postgres";
|
import postgres from "postgres";
|
||||||
|
|
||||||
export type RunDatabaseBackupOptions = {
|
export type RunDatabaseBackupOptions = {
|
||||||
|
|
@ -142,6 +142,42 @@ function tableKey(schemaName: string, tableName: string): string {
|
||||||
return `${schemaName}.${tableName}`;
|
return `${schemaName}.${tableName}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function* readRestoreStatements(backupFile: string): AsyncGenerator<string> {
|
||||||
|
const stream = createReadStream(backupFile, { encoding: "utf8" });
|
||||||
|
const reader = createInterface({
|
||||||
|
input: stream,
|
||||||
|
crlfDelay: Infinity,
|
||||||
|
});
|
||||||
|
let statementLines: string[] = [];
|
||||||
|
|
||||||
|
const flushStatement = () => {
|
||||||
|
const statement = statementLines.join("\n").trim();
|
||||||
|
statementLines = [];
|
||||||
|
return statement;
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
for await (const line of reader) {
|
||||||
|
if (line === STATEMENT_BREAKPOINT) {
|
||||||
|
const statement = flushStatement();
|
||||||
|
if (statement.length > 0) {
|
||||||
|
yield statement;
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
statementLines.push(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
const trailingStatement = flushStatement();
|
||||||
|
if (trailingStatement.length > 0) {
|
||||||
|
yield trailingStatement;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
reader.close();
|
||||||
|
stream.destroy();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
export function createBufferedTextFileWriter(filePath: string, maxBufferedBytes = DEFAULT_BACKUP_WRITE_BUFFER_BYTES) {
|
export function createBufferedTextFileWriter(filePath: string, maxBufferedBytes = DEFAULT_BACKUP_WRITE_BUFFER_BYTES) {
|
||||||
const stream = createWriteStream(filePath, { encoding: "utf8" });
|
const stream = createWriteStream(filePath, { encoding: "utf8" });
|
||||||
const flushThreshold = Math.max(1, Math.trunc(maxBufferedBytes));
|
const flushThreshold = Math.max(1, Math.trunc(maxBufferedBytes));
|
||||||
|
|
@ -626,13 +662,7 @@ export async function runDatabaseRestore(opts: RunDatabaseRestoreOptions): Promi
|
||||||
|
|
||||||
try {
|
try {
|
||||||
await sql`SELECT 1`;
|
await sql`SELECT 1`;
|
||||||
const contents = await readFile(opts.backupFile, "utf8");
|
for await (const statement of readRestoreStatements(opts.backupFile)) {
|
||||||
const statements = contents
|
|
||||||
.split(STATEMENT_BREAKPOINT)
|
|
||||||
.map((statement) => statement.trim())
|
|
||||||
.filter((statement) => statement.length > 0);
|
|
||||||
|
|
||||||
for (const statement of statements) {
|
|
||||||
await sql.unsafe(statement).execute();
|
await sql.unsafe(statement).execute();
|
||||||
}
|
}
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
|
|
||||||
|
|
@ -957,6 +957,57 @@ describe("realizeExecutionWorkspace", () => {
|
||||||
expect(operations[1]?.command).toBe("bash ./scripts/provision.sh");
|
expect(operations[1]?.command).toBe("bash ./scripts/provision.sh");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("truncates oversized provision command output before storing it in memory", async () => {
|
||||||
|
const repoRoot = await createTempRepo();
|
||||||
|
const { recorder, operations } = createWorkspaceOperationRecorderDouble();
|
||||||
|
|
||||||
|
await fs.mkdir(path.join(repoRoot, "scripts"), { recursive: true });
|
||||||
|
await fs.writeFile(
|
||||||
|
path.join(repoRoot, "scripts", "noisy.js"),
|
||||||
|
'process.stdout.write("x".repeat(400000));\n',
|
||||||
|
"utf8",
|
||||||
|
);
|
||||||
|
await runGit(repoRoot, ["add", "scripts/noisy.js"]);
|
||||||
|
await runGit(repoRoot, ["commit", "-m", "Add noisy provision script"]);
|
||||||
|
|
||||||
|
await realizeExecutionWorkspace({
|
||||||
|
base: {
|
||||||
|
baseCwd: repoRoot,
|
||||||
|
source: "project_primary",
|
||||||
|
projectId: "project-1",
|
||||||
|
workspaceId: "workspace-1",
|
||||||
|
repoUrl: null,
|
||||||
|
repoRef: "HEAD",
|
||||||
|
},
|
||||||
|
config: {
|
||||||
|
workspaceStrategy: {
|
||||||
|
type: "git_worktree",
|
||||||
|
branchTemplate: "{{issue.identifier}}-{{slug}}",
|
||||||
|
provisionCommand: "node ./scripts/noisy.js",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
issue: {
|
||||||
|
id: "issue-1",
|
||||||
|
identifier: "PAP-1142",
|
||||||
|
title: "Limit noisy provision output",
|
||||||
|
},
|
||||||
|
agent: {
|
||||||
|
id: "agent-1",
|
||||||
|
name: "Codex Coder",
|
||||||
|
companyId: "company-1",
|
||||||
|
},
|
||||||
|
recorder,
|
||||||
|
});
|
||||||
|
|
||||||
|
const provisionOperation = operations.find((operation) => operation.phase === "workspace_provision");
|
||||||
|
expect(provisionOperation?.result.metadata).toMatchObject({
|
||||||
|
stdoutTruncated: true,
|
||||||
|
stderrTruncated: false,
|
||||||
|
});
|
||||||
|
expect(provisionOperation?.result.stdout).toContain("[output truncated to last");
|
||||||
|
expect(provisionOperation?.result.stdout?.length ?? 0).toBeLessThan(300000);
|
||||||
|
});
|
||||||
|
|
||||||
it("reuses an existing branch without resetting it when recreating a missing worktree", async () => {
|
it("reuses an existing branch without resetting it when recreating a missing worktree", async () => {
|
||||||
const repoRoot = await createTempRepo();
|
const repoRoot = await createTempRepo();
|
||||||
const branchName = "PAP-450-recreate-missing-worktree";
|
const branchName = "PAP-450-recreate-missing-worktree";
|
||||||
|
|
|
||||||
|
|
@ -102,6 +102,18 @@ interface RuntimeServiceRecord extends RuntimeServiceRef {
|
||||||
const runtimeServicesById = new Map<string, RuntimeServiceRecord>();
|
const runtimeServicesById = new Map<string, RuntimeServiceRecord>();
|
||||||
const runtimeServicesByReuseKey = new Map<string, string>();
|
const runtimeServicesByReuseKey = new Map<string, string>();
|
||||||
const runtimeServiceLeasesByRun = new Map<string, string[]>();
|
const runtimeServiceLeasesByRun = new Map<string, string[]>();
|
||||||
|
const DEFAULT_EXECUTE_PROCESS_OUTPUT_BYTES = 256 * 1024;
|
||||||
|
|
||||||
|
type ProcessOutputCapture = {
|
||||||
|
text: string;
|
||||||
|
truncated: boolean;
|
||||||
|
totalBytes: number;
|
||||||
|
};
|
||||||
|
|
||||||
|
type ProcessOutputAccumulator = {
|
||||||
|
append(chunk: string): void;
|
||||||
|
finish(): ProcessOutputCapture;
|
||||||
|
};
|
||||||
|
|
||||||
export async function resetRuntimeServicesForTests() {
|
export async function resetRuntimeServicesForTests() {
|
||||||
for (const record of runtimeServicesById.values()) {
|
for (const record of runtimeServicesById.values()) {
|
||||||
|
|
@ -381,30 +393,96 @@ function formatCommandForDisplay(command: string, args: string[]) {
|
||||||
.join(" ");
|
.join(" ");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function createProcessOutputCapture(maxBytes: number): ProcessOutputAccumulator {
|
||||||
|
const limit = Math.max(1, Math.trunc(maxBytes));
|
||||||
|
let chunks: string[] = [];
|
||||||
|
let truncated = false;
|
||||||
|
let totalBytes = 0;
|
||||||
|
|
||||||
|
return {
|
||||||
|
append(chunk: string) {
|
||||||
|
if (!chunk) return;
|
||||||
|
chunks.push(chunk);
|
||||||
|
totalBytes += Buffer.byteLength(chunk, "utf8");
|
||||||
|
|
||||||
|
let currentBytes = chunks.reduce((sum, value) => sum + Buffer.byteLength(value, "utf8"), 0);
|
||||||
|
if (currentBytes <= limit) return;
|
||||||
|
|
||||||
|
const combined = Buffer.from(chunks.join(""), "utf8");
|
||||||
|
const tail = combined.subarray(Math.max(0, combined.length - limit)).toString("utf8");
|
||||||
|
chunks = [tail];
|
||||||
|
truncated = true;
|
||||||
|
currentBytes = Buffer.byteLength(tail, "utf8");
|
||||||
|
if (currentBytes > limit) {
|
||||||
|
chunks = [Buffer.from(tail, "utf8").subarray(Math.max(0, currentBytes - limit)).toString("utf8")];
|
||||||
|
}
|
||||||
|
},
|
||||||
|
finish(): ProcessOutputCapture {
|
||||||
|
const text = chunks.join("");
|
||||||
|
if (!truncated) {
|
||||||
|
return {
|
||||||
|
text,
|
||||||
|
truncated: false,
|
||||||
|
totalBytes,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
return {
|
||||||
|
text: `[output truncated to last ${limit} bytes; total ${totalBytes} bytes]\n${text}`,
|
||||||
|
truncated: true,
|
||||||
|
totalBytes,
|
||||||
|
};
|
||||||
|
},
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
async function executeProcess(input: {
|
async function executeProcess(input: {
|
||||||
command: string;
|
command: string;
|
||||||
args: string[];
|
args: string[];
|
||||||
cwd: string;
|
cwd: string;
|
||||||
env?: NodeJS.ProcessEnv;
|
env?: NodeJS.ProcessEnv;
|
||||||
}): Promise<{ stdout: string; stderr: string; code: number | null }> {
|
maxStdoutBytes?: number;
|
||||||
const proc = await new Promise<{ stdout: string; stderr: string; code: number | null }>((resolve, reject) => {
|
maxStderrBytes?: number;
|
||||||
|
}): Promise<{
|
||||||
|
stdout: string;
|
||||||
|
stderr: string;
|
||||||
|
code: number | null;
|
||||||
|
stdoutTruncated: boolean;
|
||||||
|
stderrTruncated: boolean;
|
||||||
|
stdoutBytes: number;
|
||||||
|
stderrBytes: number;
|
||||||
|
}> {
|
||||||
|
const proc = await new Promise<{
|
||||||
|
stdout: ProcessOutputAccumulator;
|
||||||
|
stderr: ProcessOutputAccumulator;
|
||||||
|
code: number | null;
|
||||||
|
}>((resolve, reject) => {
|
||||||
const child = spawn(input.command, input.args, {
|
const child = spawn(input.command, input.args, {
|
||||||
cwd: input.cwd,
|
cwd: input.cwd,
|
||||||
stdio: ["ignore", "pipe", "pipe"],
|
stdio: ["ignore", "pipe", "pipe"],
|
||||||
env: input.env ?? process.env,
|
env: input.env ?? process.env,
|
||||||
});
|
});
|
||||||
let stdout = "";
|
const stdout = createProcessOutputCapture(input.maxStdoutBytes ?? DEFAULT_EXECUTE_PROCESS_OUTPUT_BYTES);
|
||||||
let stderr = "";
|
const stderr = createProcessOutputCapture(input.maxStderrBytes ?? DEFAULT_EXECUTE_PROCESS_OUTPUT_BYTES);
|
||||||
child.stdout?.on("data", (chunk) => {
|
child.stdout?.on("data", (chunk) => {
|
||||||
stdout += String(chunk);
|
stdout.append(String(chunk));
|
||||||
});
|
});
|
||||||
child.stderr?.on("data", (chunk) => {
|
child.stderr?.on("data", (chunk) => {
|
||||||
stderr += String(chunk);
|
stderr.append(String(chunk));
|
||||||
});
|
});
|
||||||
child.on("error", reject);
|
child.on("error", reject);
|
||||||
child.on("close", (code) => resolve({ stdout, stderr, code }));
|
child.on("close", (code) => resolve({ stdout, stderr, code }));
|
||||||
});
|
});
|
||||||
return proc;
|
const stdout = proc.stdout.finish();
|
||||||
|
const stderr = proc.stderr.finish();
|
||||||
|
return {
|
||||||
|
stdout: stdout.text,
|
||||||
|
stderr: stderr.text,
|
||||||
|
code: proc.code,
|
||||||
|
stdoutTruncated: stdout.truncated,
|
||||||
|
stderrTruncated: stderr.truncated,
|
||||||
|
stdoutBytes: stdout.totalBytes,
|
||||||
|
stderrBytes: stderr.totalBytes,
|
||||||
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
async function runGit(args: string[], cwd: string): Promise<string> {
|
async function runGit(args: string[], cwd: string): Promise<string> {
|
||||||
|
|
@ -588,6 +666,15 @@ async function recordGitOperation(
|
||||||
stdout: result.stdout,
|
stdout: result.stdout,
|
||||||
stderr: result.stderr,
|
stderr: result.stderr,
|
||||||
system: result.code === 0 ? input.successMessage ?? null : null,
|
system: result.code === 0 ? input.successMessage ?? null : null,
|
||||||
|
metadata:
|
||||||
|
result.stdoutTruncated || result.stderrTruncated
|
||||||
|
? {
|
||||||
|
stdoutTruncated: result.stdoutTruncated,
|
||||||
|
stderrTruncated: result.stderrTruncated,
|
||||||
|
stdoutBytes: result.stdoutBytes,
|
||||||
|
stderrBytes: result.stderrBytes,
|
||||||
|
}
|
||||||
|
: null,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
@ -646,6 +733,15 @@ async function recordWorkspaceCommandOperation(
|
||||||
stdout: result.stdout,
|
stdout: result.stdout,
|
||||||
stderr: result.stderr,
|
stderr: result.stderr,
|
||||||
system: result.code === 0 ? input.successMessage ?? null : null,
|
system: result.code === 0 ? input.successMessage ?? null : null,
|
||||||
|
metadata:
|
||||||
|
result.stdoutTruncated || result.stderrTruncated
|
||||||
|
? {
|
||||||
|
stdoutTruncated: result.stdoutTruncated,
|
||||||
|
stderrTruncated: result.stderrTruncated,
|
||||||
|
stdoutBytes: result.stdoutBytes,
|
||||||
|
stderrBytes: result.stderrBytes,
|
||||||
|
}
|
||||||
|
: null,
|
||||||
};
|
};
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue