[codex] Add Claude model refresh (#6953)

## Thinking Path

> - Paperclip orchestrates AI-agent companies through adapter-backed
local and external runtimes.
> - The agent configuration UI lets operators choose adapter models and
refresh model lists when adapters support live discovery.
> - Codex already had a live refresh path, but Claude Local only exposed
static fallback models and the UI hid the refresh action for Claude.
> - A newly available Claude Opus model should not require a code
release every time the model catalog changes.
> - This pull request adds Anthropic model discovery for Claude Local,
keeps the static fallback current with Claude Opus 4.8, and exposes the
existing refresh button in the Claude Local dropdown.
> - The benefit is that operators can refresh Claude models from the
same model selector flow they already use for Codex.

## What Changed

- Added `claude-opus-4-8` to the Claude Local fallback model list.
- Added Claude model discovery through Anthropic-compatible `GET
/v1/models` when `ANTHROPIC_API_KEY` is available.
- Added normal cache reuse, forced refresh support, a SHA-256-based
API-key fingerprint for cache keys, and warning logging for discovery
errors before fallback.
- Wired `claude_local.refreshModels` into the server adapter registry.
- Enabled the existing `Refresh models` dropdown action for
`claude_local` in `AgentConfigForm`.
- Added tests for Claude fallback, live discovery, API-failure fallback,
forced refresh, and the UI refresh-button gate.

## Verification

- `pnpm exec vitest run server/src/__tests__/adapter-models.test.ts`
- `pnpm exec vitest run ui/src/components/AgentConfigForm.test.ts`
- `pnpm --filter @paperclipai/adapter-claude-local typecheck`
- `pnpm --filter @paperclipai/server typecheck`
- `pnpm --filter @paperclipai/ui typecheck`
- Greptile review reached Confidence Score: 5/5 on commit `b796cf4f1`
with addressed threads resolved.

UI note: the visible change is a conditional action row inside the
existing model dropdown; the regression test covers that `claude_local`
now receives the refresh action.

## Risks

- Low risk. Without `ANTHROPIC_API_KEY`, Claude Local still uses the
static fallback list.
- If Anthropic model discovery fails or times out, Paperclip falls back
to the existing cached or static list.
- Bedrock environments remain on Bedrock-native model IDs.

## Model Used

OpenAI GPT-5 via Codex local coding agent, with repository file access,
shell command execution, git operations, and targeted test/typecheck
verification. Exact context window is not exposed by the runtime.

## Checklist

- [x] I have included a thinking path that traces from project context
to this change
- [x] I have specified the model used (with version and capability
details)
- [x] I have checked ROADMAP.md and confirmed this PR does not duplicate
planned core work
- [x] I have run tests locally and they pass
- [x] I have added or updated tests where applicable
- [x] If this change affects the UI, I have included before/after
screenshots
- [x] I have updated relevant documentation to reflect my changes
- [x] I have considered and documented any risks above
- [x] I will address all Greptile and reviewer comments before
requesting merge
This commit is contained in:
Dotta 2026-05-29 07:03:07 -10:00 committed by GitHub
parent 1f70fd9a22
commit 5153b01ada
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 228 additions and 3 deletions

View file

@ -1,4 +1,6 @@
import { beforeEach, describe, expect, it, vi } from "vitest";
import { models as claudeFallbackModels } from "@paperclipai/adapter-claude-local";
import { resetClaudeModelsCacheForTests } from "@paperclipai/adapter-claude-local/server";
import { models as codexFallbackModels } from "@paperclipai/adapter-codex-local";
import { models as cursorFallbackModels } from "@paperclipai/adapter-cursor-local";
import { models as opencodeFallbackModels } from "@paperclipai/adapter-opencode-local";
@ -17,7 +19,12 @@ vi.mock("acpx/runtime", () => ({
describe("adapter model listing", () => {
beforeEach(() => {
delete process.env.OPENAI_API_KEY;
delete process.env.ANTHROPIC_API_KEY;
delete process.env.ANTHROPIC_BASE_URL;
delete process.env.ANTHROPIC_BEDROCK_BASE_URL;
delete process.env.CLAUDE_CODE_USE_BEDROCK;
delete process.env.PAPERCLIP_OPENCODE_COMMAND;
resetClaudeModelsCacheForTests();
resetCodexModelsCacheForTests();
resetCursorModelsCacheForTests();
setCursorModelsRunnerForTests(null);
@ -45,6 +52,72 @@ describe("adapter model listing", () => {
expect(fetchSpy).not.toHaveBeenCalled();
});
it("returns claude fallback models including the latest Opus alias when no Anthropic key is available", async () => {
const fetchSpy = vi.spyOn(globalThis, "fetch");
const models = await listAdapterModels("claude_local");
expect(models).toEqual(claudeFallbackModels);
expect(models.some((model) => model.id === "claude-opus-4-8")).toBe(true);
expect(fetchSpy).not.toHaveBeenCalled();
});
it("loads claude models dynamically and merges fallback options", async () => {
process.env.ANTHROPIC_API_KEY = "sk-ant-test";
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue({
ok: true,
json: async () => ({
data: [
{ id: "claude-sonnet-4-20250514", display_name: "Claude Sonnet 4" },
{ id: "claude-opus-4-8-20260529", display_name: "Claude Opus 4.8" },
],
}),
} as Response);
const first = await listAdapterModels("claude_local");
const second = await listAdapterModels("claude_local");
expect(fetchSpy).toHaveBeenCalledTimes(1);
expect(first).toEqual(second);
expect(first.some((model) => model.id === "claude-opus-4-8-20260529")).toBe(true);
expect(first.some((model) => model.id === "claude-opus-4-8")).toBe(true);
});
it("refreshes cached claude models on demand", async () => {
process.env.ANTHROPIC_API_KEY = "sk-ant-test";
const fetchSpy = vi.spyOn(globalThis, "fetch")
.mockResolvedValueOnce({
ok: true,
json: async () => ({
data: [{ id: "claude-sonnet-4-20250514", display_name: "Claude Sonnet 4" }],
}),
} as Response)
.mockResolvedValueOnce({
ok: true,
json: async () => ({
data: [{ id: "claude-opus-4-8-20260529", display_name: "Claude Opus 4.8" }],
}),
} as Response);
const initial = await listAdapterModels("claude_local");
const refreshed = await refreshAdapterModels("claude_local");
expect(fetchSpy).toHaveBeenCalledTimes(2);
expect(initial.some((model) => model.id === "claude-sonnet-4-20250514")).toBe(true);
expect(refreshed.some((model) => model.id === "claude-opus-4-8-20260529")).toBe(true);
});
it("falls back to static claude models when Anthropic model discovery fails", async () => {
process.env.ANTHROPIC_API_KEY = "sk-ant-test";
vi.spyOn(globalThis, "fetch").mockResolvedValue({
ok: false,
status: 401,
json: async () => ({}),
} as Response);
const models = await listAdapterModels("claude_local");
expect(models).toEqual(claudeFallbackModels);
});
it("loads codex models dynamically and merges fallback options", async () => {
process.env.OPENAI_API_KEY = "sk-test";
const fetchSpy = vi.spyOn(globalThis, "fetch").mockResolvedValue({

View file

@ -25,6 +25,7 @@ import {
listClaudeSkills,
syncClaudeSkills,
listClaudeModels,
refreshClaudeModels,
testEnvironment as claudeTestEnvironment,
sessionCodec as claudeSessionCodec,
getQuotaWindows as claudeGetQuotaWindows,
@ -255,6 +256,7 @@ const claudeLocalAdapter: ServerAdapterModule = {
models: claudeModels,
modelProfiles: claudeModelProfiles,
listModels: listClaudeModels,
refreshModels: refreshClaudeModels,
supportsLocalAgentJwt: true,
supportsInstructionsBundle: true,
instructionsPathKey: "instructionsFilePath",