Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
import { beforeEach, describe, expect, it, vi } from "vitest";
|
|
|
|
|
|
|
|
|
|
const mockCreate = vi.hoisted(() => vi.fn());
|
|
|
|
|
const mockConnect = vi.hoisted(() => vi.fn());
|
|
|
|
|
const { MockCommandExitError, MockSandboxNotFoundError, MockTimeoutError } = vi.hoisted(() => {
|
|
|
|
|
class MockCommandExitError extends Error {
|
|
|
|
|
exitCode: number;
|
|
|
|
|
stdout: string;
|
|
|
|
|
stderr: string;
|
|
|
|
|
|
|
|
|
|
constructor(result: { exitCode: number; stdout: string; stderr: string }) {
|
|
|
|
|
super("command failed");
|
|
|
|
|
this.exitCode = result.exitCode;
|
|
|
|
|
this.stdout = result.stdout;
|
|
|
|
|
this.stderr = result.stderr;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
class MockSandboxNotFoundError extends Error {}
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
class MockTimeoutError extends Error {
|
|
|
|
|
stdout: string;
|
|
|
|
|
stderr: string;
|
|
|
|
|
result?: { stdout?: string; stderr?: string };
|
|
|
|
|
|
|
|
|
|
constructor(message: string, streams: { stdout?: string; stderr?: string; nested?: boolean } = {}) {
|
|
|
|
|
super(message);
|
|
|
|
|
this.stdout = streams.nested ? "" : (streams.stdout ?? "");
|
|
|
|
|
this.stderr = streams.nested ? "" : (streams.stderr ?? "");
|
|
|
|
|
this.result = streams.nested ? { stdout: streams.stdout, stderr: streams.stderr } : undefined;
|
|
|
|
|
}
|
|
|
|
|
}
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
return { MockCommandExitError, MockSandboxNotFoundError, MockTimeoutError };
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
vi.mock("e2b", () => ({
|
|
|
|
|
CommandExitError: MockCommandExitError,
|
|
|
|
|
SandboxNotFoundError: MockSandboxNotFoundError,
|
|
|
|
|
TimeoutError: MockTimeoutError,
|
|
|
|
|
Sandbox: {
|
|
|
|
|
create: mockCreate,
|
|
|
|
|
connect: mockConnect,
|
|
|
|
|
},
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
import plugin from "./plugin.js";
|
|
|
|
|
|
|
|
|
|
function createMockSandbox(overrides: {
|
|
|
|
|
sandboxId?: string;
|
|
|
|
|
sandboxDomain?: string;
|
|
|
|
|
pwd?: string;
|
|
|
|
|
waitResult?: { exitCode: number; stdout: string; stderr: string };
|
|
|
|
|
} = {}) {
|
|
|
|
|
const handle = {
|
|
|
|
|
pid: 42,
|
|
|
|
|
stdout: "",
|
|
|
|
|
stderr: "",
|
|
|
|
|
wait: vi.fn().mockResolvedValue(overrides.waitResult ?? {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "ok\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
return {
|
|
|
|
|
sandboxId: overrides.sandboxId ?? "sandbox-123",
|
|
|
|
|
sandboxDomain: overrides.sandboxDomain ?? "sandbox.example.test",
|
|
|
|
|
setTimeout: vi.fn().mockResolvedValue(undefined),
|
|
|
|
|
kill: vi.fn().mockResolvedValue(undefined),
|
|
|
|
|
pause: vi.fn().mockResolvedValue(undefined),
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
files: {
|
|
|
|
|
write: vi.fn().mockResolvedValue(undefined),
|
|
|
|
|
remove: vi.fn().mockResolvedValue(undefined),
|
|
|
|
|
},
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
commands: {
|
|
|
|
|
run: vi.fn(async (command: string, options?: { background?: boolean }) => {
|
|
|
|
|
if (options?.background) return handle;
|
|
|
|
|
if (command === "pwd") {
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: `${overrides.pwd ?? "/home/user"}\n`,
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
}),
|
|
|
|
|
sendStdin: vi.fn().mockResolvedValue(undefined),
|
|
|
|
|
closeStdin: vi.fn().mockResolvedValue(undefined),
|
|
|
|
|
},
|
|
|
|
|
handle,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
describe("E2B sandbox provider plugin", () => {
|
|
|
|
|
beforeEach(() => {
|
|
|
|
|
mockCreate.mockReset();
|
|
|
|
|
mockConnect.mockReset();
|
|
|
|
|
vi.restoreAllMocks();
|
|
|
|
|
delete process.env.E2B_API_KEY;
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("declares environment lifecycle handlers", async () => {
|
|
|
|
|
expect(await plugin.definition.onHealth?.()).toEqual({
|
|
|
|
|
status: "ok",
|
|
|
|
|
message: "E2B sandbox provider plugin healthy",
|
|
|
|
|
});
|
|
|
|
|
expect(plugin.definition.onEnvironmentAcquireLease).toBeTypeOf("function");
|
|
|
|
|
expect(plugin.definition.onEnvironmentExecute).toBeTypeOf("function");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("normalizes E2B config through the generic provider shape", async () => {
|
|
|
|
|
const result = await plugin.definition.onEnvironmentValidateConfig?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
config: {
|
|
|
|
|
template: " base ",
|
|
|
|
|
apiKey: " e2b_test_key ",
|
|
|
|
|
timeoutMs: "450000.9",
|
|
|
|
|
reuseLease: true,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
ok: true,
|
|
|
|
|
normalizedConfig: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "e2b_test_key",
|
|
|
|
|
timeoutMs: 450000,
|
|
|
|
|
reuseLease: true,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2026-04-30 22:43:24 -07:00
|
|
|
it("defaults a missing template to base", async () => {
|
|
|
|
|
const result = await plugin.definition.onEnvironmentValidateConfig?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
config: {
|
|
|
|
|
timeoutMs: "450000.9",
|
|
|
|
|
reuseLease: true,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
ok: true,
|
|
|
|
|
normalizedConfig: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: null,
|
|
|
|
|
timeoutMs: 450000,
|
|
|
|
|
reuseLease: true,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
it("rejects empty template strings instead of silently normalizing them", async () => {
|
|
|
|
|
await expect(plugin.definition.onEnvironmentValidateConfig?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
config: {
|
|
|
|
|
template: " ",
|
|
|
|
|
},
|
|
|
|
|
})).resolves.toEqual({
|
|
|
|
|
ok: false,
|
|
|
|
|
errors: ["E2B sandbox environments require a template."],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("uses resolved config keys before falling back to E2B_API_KEY", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
mockCreate.mockResolvedValue(sandbox);
|
|
|
|
|
process.env.E2B_API_KEY = "host-key";
|
|
|
|
|
|
|
|
|
|
const lease = await plugin.definition.onEnvironmentAcquireLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
runId: "run-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(mockCreate).toHaveBeenCalledWith("base", expect.objectContaining({
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
}));
|
|
|
|
|
expect(lease).toMatchObject({
|
|
|
|
|
providerLeaseId: "sandbox-123",
|
|
|
|
|
metadata: {
|
|
|
|
|
provider: "e2b",
|
|
|
|
|
remoteCwd: "/home/user/paperclip-workspace",
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
expect(sandbox.commands.run).toHaveBeenNthCalledWith(1, "pwd");
|
|
|
|
|
expect(sandbox.commands.run).toHaveBeenNthCalledWith(2, "mkdir -p '/home/user/paperclip-workspace'");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("kills the sandbox if acquire setup fails after creation", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
const failure = new Error("set-timeout failed");
|
|
|
|
|
sandbox.setTimeout.mockRejectedValueOnce(failure);
|
|
|
|
|
mockCreate.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentAcquireLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
runId: "run-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
})).rejects.toThrow("set-timeout failed");
|
|
|
|
|
|
|
|
|
|
expect(sandbox.kill).toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("falls back to host E2B_API_KEY when config omits the API key", async () => {
|
|
|
|
|
process.env.E2B_API_KEY = "host-key";
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
mockCreate.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentAcquireLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
runId: "run-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: null,
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
})).resolves.toMatchObject({
|
|
|
|
|
providerLeaseId: "sandbox-123",
|
|
|
|
|
});
|
|
|
|
|
expect(mockCreate).toHaveBeenCalledWith("base", expect.objectContaining({ apiKey: "host-key" }));
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("kills the sandbox if resume setup fails after reconnect", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
const failure = new Error("set-timeout failed");
|
|
|
|
|
sandbox.setTimeout.mockRejectedValueOnce(failure);
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentResumeLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
runId: "run-1",
|
|
|
|
|
providerLeaseId: "sandbox-123",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
})).rejects.toThrow("set-timeout failed");
|
|
|
|
|
|
|
|
|
|
expect(sandbox.kill).toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
it("executes commands through a connected sandbox when stdin is provided", async () => {
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
const sandbox = createMockSandbox();
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
sandbox.commands.run.mockImplementation(async (command: string, options?: { background?: boolean }) => {
|
|
|
|
|
if (options?.background) return sandbox.handle;
|
|
|
|
|
if (command === "pwd") {
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "/home/user\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "stdin\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
});
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
const result = await plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: { FOO: "bar" },
|
|
|
|
|
stdin: "input",
|
|
|
|
|
timeoutMs: 1000,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(mockConnect).toHaveBeenCalledWith("sandbox-123", expect.objectContaining({ apiKey: "resolved-key" }));
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
expect(sandbox.files.write).toHaveBeenCalledWith(expect.stringMatching(/^\/tmp\/paperclip-stdin-/), "input");
|
Run a real command-v probe and source login profiles before exec in e2b sandboxes (#5279)
> **Stacked PR.** Sits on top of #5278 (`e2b/stage-stdin-to-temp-file`)
which ships the stdin-staging fix this builds on. The cumulative diff
against `master` includes that PR's content; the files touched by *this*
PR's commit are `packages/adapter-utils/src/execution-target.ts`,
`packages/plugins/sandbox-providers/e2b/src/plugin.ts`, and
`packages/plugins/sandbox-providers/e2b/src/plugin.test.ts`.
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - The adapter Test flow does an "is the command resolvable?" probe
before running the hello probe so the report distinguishes "binary not
installed" from "binary errored"
> - For sandbox targets, that resolvability check was a no-op
early-return — every sandboxed adapter test reported "Command is
executable" regardless of whether the binary existed
> - That made the resolvability check disagree with the hello probe in a
way that looked like a PATH bug, when it was actually a missing CLI
> - Separately, the e2b spawn used `sandbox.commands.run` with a
non-login non-interactive shell whose PATH did not include npm-globals,
nvm shims, or anything else the template installs via
`.profile`/`.bashrc`
> - This pull request makes the resolvability check honest by running a
real `command -v` invocation through the sandbox runner, and aligns the
e2b spawn with SSH by sourcing login profiles before `exec env KEY=val
<cmd>`
> - The benefit is the e2b sandbox spawn agrees with the hello probe and
finds CLIs at template-installed paths
## What Changed
- `packages/adapter-utils/src/execution-target.ts`: add
`ensureSandboxCommandResolvable` that runs `command -v <cli>` through
the sandbox runner; replace the early-return in
`ensureAdapterExecutionTargetCommandResolvable` for sandbox targets
- `packages/plugins/sandbox-providers/e2b/src/plugin.ts`: replace
`buildCommandLine` with `buildLoginShellScript` (sources `/etc/profile`,
`~/.profile`, `~/.bash_profile`, `~/.bashrc`, `~/.zprofile`, and nvm.sh
before `exec env KEY=val <cmd>`); env vars are interpolated inline so
user-configured adapter env always wins over profile-exported values;
drop the now-unused `envs:` SDK option
- `plugin.test.ts` updated for the login-shell wrapping
## Verification
- `pnpm vitest run --no-coverage --project @paperclipai/sandbox-e2b` —
17/17 plugin tests pass
- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils`
clean
- `pnpm typecheck` clean
- Manual: previously every sandboxed adapter said "Command is
executable" then the hello probe failed with "exec: not found". After
this change, missing CLIs surface honestly at the resolvability step.
SSH no-regression: SSH Claude probe still passes.
## Risks
Medium — sandbox adapter Test reports will start failing at the
resolvability step for environments where the CLI was never actually
installed. This was always the real state; the previous "Command is
executable" message was incorrect. Operators should expect
previously-green-but-broken sandbox environments to report accurately.
## Model Used
Claude Opus 4.7 (1M context)
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — `plugin.test.ts`
updated for the login-shell wrapping
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-05 08:21:37 -07:00
|
|
|
const stdinCall = sandbox.commands.run.mock.calls.find(([cmd]: [string]) => cmd.includes("'printf'"));
|
|
|
|
|
expect(stdinCall).toBeDefined();
|
|
|
|
|
if (!stdinCall) throw new Error("stdinCall not found");
|
|
|
|
|
expect(stdinCall[0]).toMatch(/\.profile/);
|
|
|
|
|
expect(stdinCall[0]).toMatch(/exec env FOO='bar' 'printf' 'hello' < '\/tmp\/paperclip-stdin-/);
|
|
|
|
|
expect(stdinCall[1]).toEqual(expect.objectContaining({ cwd: "/workspace", timeoutMs: 1000 }));
|
|
|
|
|
expect(stdinCall[1]).not.toHaveProperty("envs");
|
|
|
|
|
expect(stdinCall[1]).not.toHaveProperty("background");
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
expect(sandbox.commands.sendStdin).not.toHaveBeenCalled();
|
|
|
|
|
expect(sandbox.commands.closeStdin).not.toHaveBeenCalled();
|
|
|
|
|
expect(sandbox.handle.wait).not.toHaveBeenCalled();
|
|
|
|
|
expect(sandbox.files.remove).toHaveBeenCalledWith(expect.stringMatching(/^\/tmp\/paperclip-stdin-/));
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
timedOut: false,
|
|
|
|
|
stdout: "stdin\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("executes non-stdin commands in foreground mode", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
sandbox.commands.run.mockImplementation(async (command: string, options?: { background?: boolean }) => {
|
|
|
|
|
if (options?.background) return sandbox.handle;
|
|
|
|
|
if (command === "pwd") {
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "/home/user\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "foreground\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
});
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
const result = await plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: { FOO: "bar" },
|
|
|
|
|
timeoutMs: 1000,
|
|
|
|
|
});
|
|
|
|
|
|
Run a real command-v probe and source login profiles before exec in e2b sandboxes (#5279)
> **Stacked PR.** Sits on top of #5278 (`e2b/stage-stdin-to-temp-file`)
which ships the stdin-staging fix this builds on. The cumulative diff
against `master` includes that PR's content; the files touched by *this*
PR's commit are `packages/adapter-utils/src/execution-target.ts`,
`packages/plugins/sandbox-providers/e2b/src/plugin.ts`, and
`packages/plugins/sandbox-providers/e2b/src/plugin.test.ts`.
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - The adapter Test flow does an "is the command resolvable?" probe
before running the hello probe so the report distinguishes "binary not
installed" from "binary errored"
> - For sandbox targets, that resolvability check was a no-op
early-return — every sandboxed adapter test reported "Command is
executable" regardless of whether the binary existed
> - That made the resolvability check disagree with the hello probe in a
way that looked like a PATH bug, when it was actually a missing CLI
> - Separately, the e2b spawn used `sandbox.commands.run` with a
non-login non-interactive shell whose PATH did not include npm-globals,
nvm shims, or anything else the template installs via
`.profile`/`.bashrc`
> - This pull request makes the resolvability check honest by running a
real `command -v` invocation through the sandbox runner, and aligns the
e2b spawn with SSH by sourcing login profiles before `exec env KEY=val
<cmd>`
> - The benefit is the e2b sandbox spawn agrees with the hello probe and
finds CLIs at template-installed paths
## What Changed
- `packages/adapter-utils/src/execution-target.ts`: add
`ensureSandboxCommandResolvable` that runs `command -v <cli>` through
the sandbox runner; replace the early-return in
`ensureAdapterExecutionTargetCommandResolvable` for sandbox targets
- `packages/plugins/sandbox-providers/e2b/src/plugin.ts`: replace
`buildCommandLine` with `buildLoginShellScript` (sources `/etc/profile`,
`~/.profile`, `~/.bash_profile`, `~/.bashrc`, `~/.zprofile`, and nvm.sh
before `exec env KEY=val <cmd>`); env vars are interpolated inline so
user-configured adapter env always wins over profile-exported values;
drop the now-unused `envs:` SDK option
- `plugin.test.ts` updated for the login-shell wrapping
## Verification
- `pnpm vitest run --no-coverage --project @paperclipai/sandbox-e2b` —
17/17 plugin tests pass
- `pnpm vitest run --no-coverage --project @paperclipai/adapter-utils`
clean
- `pnpm typecheck` clean
- Manual: previously every sandboxed adapter said "Command is
executable" then the hello probe failed with "exec: not found". After
this change, missing CLIs surface honestly at the resolvability step.
SSH no-regression: SSH Claude probe still passes.
## Risks
Medium — sandbox adapter Test reports will start failing at the
resolvability step for environments where the CLI was never actually
installed. This was always the real state; the previous "Command is
executable" message was incorrect. Operators should expect
previously-green-but-broken sandbox environments to report accurately.
## Model Used
Claude Opus 4.7 (1M context)
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable — `plugin.test.ts`
updated for the login-shell wrapping
- [x] If this change affects the UI, I have included before/after
screenshots — N/A (no UI)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-05-05 08:21:37 -07:00
|
|
|
const fgCall = sandbox.commands.run.mock.calls.find(([cmd]: [string]) => cmd.includes("'printf'"));
|
|
|
|
|
expect(fgCall).toBeDefined();
|
|
|
|
|
if (!fgCall) throw new Error("fgCall not found");
|
|
|
|
|
expect(fgCall[0]).toMatch(/\.profile/);
|
|
|
|
|
expect(fgCall[0]).toMatch(/exec env FOO='bar' 'printf' 'hello'$/);
|
|
|
|
|
expect(fgCall[1]).toEqual(expect.objectContaining({ cwd: "/workspace", timeoutMs: 1000 }));
|
|
|
|
|
expect(fgCall[1]).not.toHaveProperty("envs");
|
|
|
|
|
expect(fgCall[1]).not.toHaveProperty("background");
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
expect(sandbox.commands.sendStdin).not.toHaveBeenCalled();
|
|
|
|
|
expect(sandbox.commands.closeStdin).not.toHaveBeenCalled();
|
|
|
|
|
expect(sandbox.handle.wait).not.toHaveBeenCalled();
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
expect(result).toEqual({
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
timedOut: false,
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
stdout: "foreground\n",
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
stderr: "",
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
fix(sandbox): prevent E2B workspace upload + lease idle failures (PAPA-382) (#6560)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Heartbeats run inside managed sandboxes (E2B, Cloudflare Sandbox),
and each run begins by uploading the agent's workspace as a tar archive
> - PAPA-381's E2B runs were failing at 5 and 11 minutes — two distinct
failure modes were entangled: workspace tar extraction errors on Linux,
and sandbox idle/lease timeouts during normal heartbeat gaps
> - Workspace tar extraction failed because macOS bsdtar embeds
`LIBARCHIVE.xattr.*` PAX headers that GNU tar on Linux rejects with
"This does not look like a tar archive"; the existing
`COPYFILE_DISABLE=1` only suppresses AppleDouble `._*` sidecars, not
inline PAX xattr entries
> - E2B sandboxes also expired between heartbeats because `timeoutMs`
defaulted to a short window and was never refreshed per execute, and
Cloudflare sandboxes idled out because `sleepAfter` defaulted to 10
minutes
> - This pull request adds `--no-xattrs` to the workspace tar
invocation, refreshes the E2B sandbox lifetime on each execute and bumps
the default `timeoutMs` to 1h, and raises the Cloudflare `sleepAfter`
default to 1h
> - The benefit is that long-running heartbeat-driven runs (Claude,
Codex, etc.) survive across both their initial workspace upload and the
natural idle gaps between executes on both E2B and Cloudflare
## What Changed
- `packages/adapter-utils/src/sandbox-managed-runtime.ts`: added
`--no-xattrs` to `createTarballFromDirectory` so macOS bsdtar produces a
clean POSIX tar that GNU tar on Linux can extract, with an inline
comment explaining why `COPYFILE_DISABLE=1` alone is insufficient.
- `packages/plugins/sandbox-providers/e2b/src/plugin.ts`: refresh the
sandbox lifetime on every execute (so long runs don't expire mid-job)
and raised the default `timeoutMs` to 1h.
- `packages/plugins/sandbox-providers/e2b/src/manifest.ts` and
`plugin.test.ts`: updated manifest defaults and added regression
coverage for the new behavior.
- `packages/plugins/sandbox-providers/cloudflare/src/config.ts`,
`manifest.ts`, `plugin.test.ts`: raised default `sleepAfter` from 10m to
1h, mirroring the E2B 1h default, and added a regression test asserting
the acquire-lease request body sends `sleepAfter: "1h"` when not
overridden.
## Verification
- `pnpm --filter @paperclipai/plugin-e2b test`
- `pnpm --filter @paperclipai/plugin-cloudflare-sandbox test`
- Locally cherry-picked the `--no-xattrs` fix onto master and confirmed
end-to-end via a real PAPA-381-style heartbeat-driven E2B run that the
workspace upload now extracts cleanly on Linux. The user (board
operator) tested this on master and reported "Ok, that worked."
- Manual reviewer steps: trigger an E2B heartbeat from a macOS host
(this is where the bsdtar xattr headers come from), confirm the
workspace tar extracts on the Linux sandbox side; run a long (>15 min)
Cloudflare sandbox flow and confirm no lost-lease/idle errors between
executes.
## Risks
- Low risk overall.
- `--no-xattrs` is widely supported by both macOS bsdtar and GNU tar
(Linux). Worst case it silently no-ops on a future host that doesn't
support it; in that case the existing failure mode reappears, it doesn't
introduce a new one.
- Raising default `timeoutMs` (E2B) and `sleepAfter` (Cloudflare) from
short values to 1h means sandboxes stay alive longer between executes by
default. This is the intended behavior — operators that want a tighter
idle window can still override via plugin config.
- E2B per-execute sandbox lifetime refresh adds a small API call per
execute; it is bounded by the same client that already handles execute
traffic, so no new dependencies or retry semantics.
## Model Used
- Claude (Anthropic), `claude-opus-4-7`, extended thinking enabled, tool
use enabled (file/grep/git tools and Paperclip control-plane API). Used
to diagnose the dual failure mode (workspace tar PAX xattr headers +
sandbox lifetime), write the fixes and tests, and drive the verification
loop with the board operator.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots (N/A — no UI changes)
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
---------
Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-05-22 13:34:11 -07:00
|
|
|
it("refreshes the sandbox lifetime on every execute so long runs don't die mid-command", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 1_800_000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: {},
|
|
|
|
|
timeoutMs: 1000,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(sandbox.setTimeout).toHaveBeenCalledWith(1_800_000);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("still runs the command when the setTimeout refresh fails transiently", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
sandbox.setTimeout.mockRejectedValueOnce(new Error("transient e2b api error"));
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
const result = await plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 1_800_000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: {},
|
|
|
|
|
timeoutMs: 1000,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(sandbox.setTimeout).toHaveBeenCalledWith(1_800_000);
|
|
|
|
|
expect(sandbox.commands.run).toHaveBeenCalled();
|
|
|
|
|
expect(result?.exitCode).toBe(0);
|
|
|
|
|
});
|
|
|
|
|
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
it("cleans up staged stdin even when writing it fails", async () => {
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
const sandbox = createMockSandbox();
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
const failure = new Error("write failed");
|
|
|
|
|
sandbox.files.write.mockRejectedValueOnce(failure);
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: { FOO: "bar" },
|
|
|
|
|
stdin: "input",
|
|
|
|
|
timeoutMs: 1000,
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
})).rejects.toThrow("write failed");
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
expect(sandbox.files.remove).toHaveBeenCalledWith(expect.stringMatching(/^\/tmp\/paperclip-stdin-/));
|
|
|
|
|
expect(sandbox.commands.sendStdin).not.toHaveBeenCalled();
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
expect(sandbox.handle.wait).not.toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
Add sandbox callback bridge for remote environment API access (#4801)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Agents can run inside sandboxed environments like E2B, which are
isolated from the host network
> - Sandboxed agents need to call back to the Paperclip API to report
progress, post comments, and update issue status
> - But sandbox environments cannot reach the Paperclip server directly
because they run in isolated network namespaces
> - This PR adds a callback bridge that proxies API requests from the
sandbox to the Paperclip server, running as a local HTTP server on the
host that forwards authenticated requests
> - The bridge is started automatically when an adapter launches a
sandbox execution, and torn down when the run completes
> - The benefit is sandboxed agents can interact with the Paperclip API
without requiring network-level access to the host, enabling E2B and
similar providers to work end-to-end
## What Changed
- Added `sandbox-callback-bridge.ts` in `packages/adapter-utils/` — a
lightweight HTTP bridge server that accepts requests from sandbox
environments and proxies them to the Paperclip API with authentication
- Added request validation and security policy: the bridge only forwards
requests to the configured API URL, validates content types, enforces
size limits, and rejects non-API paths
- Wired the bridge into all remote adapter execute paths (claude, codex,
cursor, gemini, pi) — the bridge starts before the agent process and the
bridge URL is passed via environment variables
- Updated `environment-execution-target.ts` to prefer the explicit API
URL from environment lease metadata for sandbox callback routing
- Fixed Claude sandbox runtime setup to work with the bridge
configuration
- Added comprehensive test coverage for bridge request handling, policy
enforcement, and sandbox execution integration
- Fixed browser bundling — the bridge module is excluded from the
frontend bundle via the adapter-utils index export
## Verification
- `pnpm test` — all existing and new tests pass, including bridge unit
tests and sandbox execution integration tests
- `pnpm typecheck` — clean
- Manual: configure an E2B environment, run an agent task, verify the
agent can post comments and update issue status through the bridge
## Risks
- Medium. This is a new network-facing component (HTTP server on
localhost). The security policy restricts forwarding to the configured
API URL only and validates all requests, but any proxy introduces attack
surface. The bridge binds to localhost only and is scoped to the
lifetime of a single agent run.
## Model Used
Codex GPT 5.4 high via Paperclip.
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-29 16:37:34 -07:00
|
|
|
it("preserves partial foreground output when a non-stdin command times out", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
sandbox.commands.run.mockImplementation(async (command: string, options?: { background?: boolean }) => {
|
|
|
|
|
if (options?.background) return sandbox.handle;
|
|
|
|
|
if (command === "pwd") {
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "/home/user\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
throw new MockTimeoutError("command timed out", {
|
|
|
|
|
stdout: "partial stdout\n",
|
|
|
|
|
stderr: "partial stderr\n",
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
const result = await plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: { FOO: "bar" },
|
|
|
|
|
timeoutMs: 1000,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
exitCode: null,
|
|
|
|
|
timedOut: true,
|
|
|
|
|
stdout: "partial stdout\n",
|
|
|
|
|
stderr: "partial stderr\ncommand timed out\n",
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("preserves partial foreground output when a stdin command times out", async () => {
|
|
|
|
|
const sandbox = createMockSandbox();
|
|
|
|
|
sandbox.commands.run.mockImplementation(async (command: string, options?: { background?: boolean }) => {
|
|
|
|
|
if (options?.background) return sandbox.handle;
|
|
|
|
|
if (command === "pwd") {
|
|
|
|
|
return {
|
|
|
|
|
exitCode: 0,
|
|
|
|
|
stdout: "/home/user\n",
|
|
|
|
|
stderr: "",
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
throw new MockTimeoutError("command timed out", {
|
|
|
|
|
stdout: "stdin stdout\n",
|
|
|
|
|
stderr: "stdin stderr\n",
|
|
|
|
|
nested: true,
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
const result = await plugin.definition.onEnvironmentExecute?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: { providerLeaseId: "sandbox-123", metadata: {} },
|
|
|
|
|
command: "printf",
|
|
|
|
|
args: ["hello"],
|
|
|
|
|
cwd: "/workspace",
|
|
|
|
|
env: { FOO: "bar" },
|
|
|
|
|
stdin: "input",
|
|
|
|
|
timeoutMs: 1000,
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(result).toEqual({
|
|
|
|
|
exitCode: null,
|
|
|
|
|
timedOut: true,
|
|
|
|
|
stdout: "stdin stdout\n",
|
|
|
|
|
stderr: "stdin stderr\ncommand timed out\n",
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
Add E2B sandbox provider plugin (#4452)
## Thinking Path
> - Paperclip orchestrates AI agents for zero-human companies
> - Sandbox environments are part of that execution layer, and the
recent core refactor moved provider-specific behavior to a generic
plugin seam
> - This pull request adds a dedicated `@paperclipai/plugin-e2b` package
so E2B can live entirely outside core host code
> - Because the feature is still unreleased, the plugin should model
third-party packaging directly instead of carrying extra
backward-compatibility complexity in core or the workspace lockfile
> - This branch therefore makes the E2B provider a standalone
publishable package, documents the package-local dev flow, and keeps the
publish manifest/runtime dependency story correct
> - The benefit is that E2B becomes a true plugin reference
implementation that can be installed by package name without reopening
core Paperclip code
## What Changed
- Added `packages/plugins/paperclip-plugin-e2b` as the E2B sandbox
provider plugin package
- Implemented config validation, lease acquire/resume/release/destroy
handlers, workspace realization, and command execution for E2B sandboxes
- Excluded the E2B plugin package from the root workspace so the repo no
longer needs `pnpm-lock.yaml` churn for its third-party dependency graph
- Added package-local development/install support plus a prepack
manifest generator so the published tarball still declares
`@paperclipai/plugin-sdk` and `e2b` runtime dependencies
- Addressed review feedback by fixing sandbox cleanup on acquire
failures, rejecting blank templates, normalizing fractional `timeoutMs`,
and always passing the configured template name to the E2B SDK
- Updated focused Vitest coverage for config normalization, validation,
acquire cleanup, command execution, and lease release behavior
- Updated the Dockerfile deps stage to copy the E2B package manifest so
the policy check stays in sync
## Verification
- `cd packages/plugins/paperclip-plugin-e2b && pnpm install
--ignore-workspace --no-lockfile`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm build`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
test`
- `cd packages/plugins/paperclip-plugin-e2b && pnpm --ignore-workspace
typecheck`
- `cd packages/plugins/paperclip-plugin-e2b && npm pack --dry-run`
## Risks
- The package now relies on a prepack manifest rewrite so the
publish-time dependency list stays correct while the repo-local dev
manifest stays workspace-light
- The current repo snapshot is still unreleased, so the generated
publish manifest points at the repo SDK version until the normal release
flow rewrites versions before publish
- Real-world E2B environments may still expose edge cases around
lifecycle timing or sandbox metadata beyond the mocked unit coverage
> For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and
discuss it in `#dev` before opening the PR. Feature PRs that overlap
with planned core work may need to be redirected — check the roadmap
first. See `CONTRIBUTING.md`.
## Model Used
- OpenAI Codex via `codex_local`
- Model ID: `gpt-5.4`
- Reasoning effort: `high`
- Context window observed in runtime session metadata: `258400` tokens
- Capabilities used: terminal tool execution, git, GitHub CLI, and local
build/test inspection
## Checklist
- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [ ] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
2026-04-25 11:01:11 -07:00
|
|
|
it("pauses reusable leases and kills ephemeral leases on release", async () => {
|
|
|
|
|
const reusable = createMockSandbox({ sandboxId: "sandbox-reusable" });
|
|
|
|
|
const ephemeral = createMockSandbox({ sandboxId: "sandbox-ephemeral" });
|
|
|
|
|
mockConnect.mockResolvedValueOnce(reusable).mockResolvedValueOnce(ephemeral);
|
|
|
|
|
|
|
|
|
|
await plugin.definition.onEnvironmentReleaseLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: true,
|
|
|
|
|
},
|
|
|
|
|
providerLeaseId: "sandbox-reusable",
|
|
|
|
|
});
|
|
|
|
|
await plugin.definition.onEnvironmentReleaseLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
providerLeaseId: "sandbox-ephemeral",
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(reusable.pause).toHaveBeenCalled();
|
|
|
|
|
expect(reusable.kill).not.toHaveBeenCalled();
|
|
|
|
|
expect(ephemeral.kill).toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("falls back to kill when pausing a reusable lease fails", async () => {
|
|
|
|
|
const sandbox = createMockSandbox({ sandboxId: "sandbox-reusable" });
|
|
|
|
|
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => undefined);
|
|
|
|
|
sandbox.pause.mockRejectedValueOnce(new Error("pause failed"));
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentReleaseLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: true,
|
|
|
|
|
},
|
|
|
|
|
providerLeaseId: "sandbox-reusable",
|
|
|
|
|
})).resolves.toBeUndefined();
|
|
|
|
|
|
|
|
|
|
expect(sandbox.pause).toHaveBeenCalled();
|
|
|
|
|
expect(sandbox.kill).toHaveBeenCalled();
|
|
|
|
|
expect(warnSpy).toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("creates the remote workspace before returning it", async () => {
|
|
|
|
|
const sandbox = createMockSandbox({ sandboxId: "sandbox-realize" });
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentRealizeWorkspace?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
lease: {
|
|
|
|
|
providerLeaseId: "sandbox-realize",
|
|
|
|
|
metadata: { remoteCwd: "/home/user/paperclip-workspace" },
|
|
|
|
|
},
|
|
|
|
|
workspace: {
|
|
|
|
|
localPath: "/tmp/paperclip-workspace",
|
|
|
|
|
},
|
|
|
|
|
})).resolves.toEqual({
|
|
|
|
|
cwd: "/home/user/paperclip-workspace",
|
|
|
|
|
metadata: {
|
|
|
|
|
provider: "e2b",
|
|
|
|
|
remoteCwd: "/home/user/paperclip-workspace",
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(mockConnect).toHaveBeenCalledWith("sandbox-realize", expect.objectContaining({ apiKey: "resolved-key" }));
|
|
|
|
|
expect(sandbox.commands.run).toHaveBeenCalledWith("mkdir -p '/home/user/paperclip-workspace'");
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it("swallows destroy kill errors after logging them", async () => {
|
|
|
|
|
const sandbox = createMockSandbox({ sandboxId: "sandbox-destroy" });
|
|
|
|
|
const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => undefined);
|
|
|
|
|
sandbox.kill.mockRejectedValueOnce(new Error("kill failed"));
|
|
|
|
|
mockConnect.mockResolvedValue(sandbox);
|
|
|
|
|
|
|
|
|
|
await expect(plugin.definition.onEnvironmentDestroyLease?.({
|
|
|
|
|
driverKey: "e2b",
|
|
|
|
|
companyId: "company-1",
|
|
|
|
|
environmentId: "env-1",
|
|
|
|
|
config: {
|
|
|
|
|
template: "base",
|
|
|
|
|
apiKey: "resolved-key",
|
|
|
|
|
timeoutMs: 300000,
|
|
|
|
|
reuseLease: false,
|
|
|
|
|
},
|
|
|
|
|
providerLeaseId: "sandbox-destroy",
|
|
|
|
|
})).resolves.toBeUndefined();
|
|
|
|
|
|
|
|
|
|
expect(sandbox.kill).toHaveBeenCalled();
|
|
|
|
|
expect(warnSpy).toHaveBeenCalled();
|
|
|
|
|
});
|
|
|
|
|
});
|