mirror of
https://github.com/alkimake/paperclip.git
synced 2026-06-14 01:50:39 +09:00
[codex] Add configurable liveness auto-recovery controls (#4587)
## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies. > - Heartbeat liveness recovery decides when stalled issue trees need manager-visible follow-up. > - Automatic recovery issue creation is useful, but operators need instance-level controls for how aggressive it is. > - Without controls, recovery behavior is harder to tune for local development, production operations, and noisy edge cases. > - This pull request adds configurable liveness auto-recovery settings across shared contracts, API routes, services, and the instance experimental settings UI. > - The benefit is that operators can keep liveness findings advisory or enable bounded recovery automation with explicit intervals and lookback windows. ## What Changed - Added shared types and validators for liveness auto-recovery settings. - Extended instance settings routes and services to persist and validate the new controls. - Wired heartbeat/recovery services to honor enablement, minimum interval, and lookback settings. - Added UI controls for liveness recovery under instance experimental settings. - Covered the new server behavior with instance settings and liveness escalation tests. ## Verification - `pnpm exec vitest run --project @paperclipai/server server/src/__tests__/heartbeat-issue-liveness-escalation.test.ts server/src/__tests__/instance-settings-routes.test.ts --pool=forks --poolOptions.forks.isolate=true` - `pnpm --filter @paperclipai/shared typecheck` - `pnpm --filter @paperclipai/server typecheck` - `pnpm --filter @paperclipai/ui typecheck` ## Risks - Moderate behavioral risk because recovery automation timing changes when enabled; defaults keep existing advisory behavior unless the setting is turned on. - No database migration in this PR; settings are stored through the existing instance settings path. > For core feature work, check [`ROADMAP.md`](ROADMAP.md) first and discuss it in `#dev` before opening the PR. Feature PRs that overlap with planned core work may need to be redirected — check the roadmap first. See `CONTRIBUTING.md`. ## Model Used - OpenAI Codex, `gpt-5`, coding model with tool use and local command execution; context window not exposed by the runtime. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have checked ROADMAP.md and confirmed this PR does not duplicate planned core work - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
This commit is contained in:
parent
f0f9460d1d
commit
fda296ee4f
14 changed files with 679 additions and 54 deletions
|
|
@ -1,6 +1,7 @@
|
|||
import type {
|
||||
InstanceExperimentalSettings,
|
||||
InstanceGeneralSettings,
|
||||
IssueGraphLivenessAutoRecoveryPreview,
|
||||
PatchInstanceGeneralSettings,
|
||||
PatchInstanceExperimentalSettings,
|
||||
} from "@paperclipai/shared";
|
||||
|
|
@ -15,4 +16,25 @@ export const instanceSettingsApi = {
|
|||
api.get<InstanceExperimentalSettings>("/instance/settings/experimental"),
|
||||
updateExperimental: (patch: PatchInstanceExperimentalSettings) =>
|
||||
api.patch<InstanceExperimentalSettings>("/instance/settings/experimental", patch),
|
||||
previewIssueGraphLivenessAutoRecovery: (input: { lookbackHours?: number }) =>
|
||||
api.post<IssueGraphLivenessAutoRecoveryPreview>(
|
||||
"/instance/settings/experimental/issue-graph-liveness-auto-recovery/preview",
|
||||
input,
|
||||
),
|
||||
runIssueGraphLivenessAutoRecovery: (input: { lookbackHours?: number }) =>
|
||||
api.post<{
|
||||
findings: number;
|
||||
autoRecoveryEnabled: boolean;
|
||||
lookbackHours: number;
|
||||
cutoff: string;
|
||||
escalationsCreated: number;
|
||||
existingEscalations: number;
|
||||
skipped: number;
|
||||
skippedAutoRecoveryDisabled: number;
|
||||
skippedOutsideLookback: number;
|
||||
escalationIssueIds: string[];
|
||||
}>(
|
||||
"/instance/settings/experimental/issue-graph-liveness-auto-recovery/run",
|
||||
input,
|
||||
),
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,16 +1,130 @@
|
|||
import { useEffect, useState } from "react";
|
||||
import { useMutation, useQuery, useQueryClient } from "@tanstack/react-query";
|
||||
import { FlaskConical } from "lucide-react";
|
||||
import type { PatchInstanceExperimentalSettings } from "@paperclipai/shared";
|
||||
import { Clock, FlaskConical, Play, Search } from "lucide-react";
|
||||
import type {
|
||||
IssueGraphLivenessAutoRecoveryPreview,
|
||||
PatchInstanceExperimentalSettings,
|
||||
} from "@paperclipai/shared";
|
||||
import { instanceSettingsApi } from "@/api/instanceSettings";
|
||||
import { useBreadcrumbs } from "../context/BreadcrumbContext";
|
||||
import { queryKeys } from "../lib/queryKeys";
|
||||
import { ToggleSwitch } from "@/components/ui/toggle-switch";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import {
|
||||
Dialog,
|
||||
DialogContent,
|
||||
DialogDescription,
|
||||
DialogFooter,
|
||||
DialogHeader,
|
||||
DialogTitle,
|
||||
} from "@/components/ui/dialog";
|
||||
|
||||
function issueHref(identifier: string | null, issueId: string) {
|
||||
if (!identifier) return `/issues/${issueId}`;
|
||||
const prefix = identifier.split("-")[0] || "PAP";
|
||||
return `/${prefix}/issues/${identifier}`;
|
||||
}
|
||||
|
||||
function formatRecoveryState(state: string) {
|
||||
return state.replace(/_/g, " ");
|
||||
}
|
||||
|
||||
function RecoveryPreviewDialog({
|
||||
preview,
|
||||
open,
|
||||
onOpenChange,
|
||||
onEnableOnly,
|
||||
onEnableAndRun,
|
||||
isPending,
|
||||
}: {
|
||||
preview: IssueGraphLivenessAutoRecoveryPreview | null;
|
||||
open: boolean;
|
||||
onOpenChange: (open: boolean) => void;
|
||||
onEnableOnly: () => void;
|
||||
onEnableAndRun: () => void;
|
||||
isPending: boolean;
|
||||
}) {
|
||||
const count = preview?.recoverableFindings ?? 0;
|
||||
return (
|
||||
<Dialog open={open} onOpenChange={onOpenChange}>
|
||||
<DialogContent className="sm:max-w-3xl">
|
||||
<DialogHeader>
|
||||
<DialogTitle>Confirm auto-recovery</DialogTitle>
|
||||
<DialogDescription>
|
||||
{preview
|
||||
? `${count} recovery ${count === 1 ? "task" : "tasks"} match the last ${preview.lookbackHours} hours.`
|
||||
: "Checking recovery candidates before enabling."}
|
||||
</DialogDescription>
|
||||
</DialogHeader>
|
||||
|
||||
<div className="max-h-[min(28rem,65vh)] space-y-3 overflow-y-auto pr-1">
|
||||
{preview && preview.items.length === 0 ? (
|
||||
<div className="rounded-md border border-border bg-muted/30 px-3 py-4 text-sm text-muted-foreground">
|
||||
No recovery tasks would be created right now. Auto-recovery can still run for future liveness incidents in
|
||||
this window.
|
||||
</div>
|
||||
) : null}
|
||||
|
||||
{preview?.items.map((item) => (
|
||||
<div key={item.incidentKey} className="rounded-md border border-border bg-card px-3 py-3">
|
||||
<div className="flex flex-wrap items-center gap-2">
|
||||
<a
|
||||
href={issueHref(item.identifier, item.issueId)}
|
||||
className="text-sm font-medium text-primary underline-offset-2 hover:underline"
|
||||
>
|
||||
{item.identifier ?? item.issueId}
|
||||
</a>
|
||||
<span className="rounded-sm bg-muted px-1.5 py-0.5 text-xs text-muted-foreground">
|
||||
{formatRecoveryState(item.state)}
|
||||
</span>
|
||||
</div>
|
||||
<p className="mt-1 text-sm text-foreground">{item.title}</p>
|
||||
<p className="mt-1 text-xs text-muted-foreground">{item.reason}</p>
|
||||
<div className="mt-2 text-xs text-muted-foreground">
|
||||
Recovery target:{" "}
|
||||
<a
|
||||
href={issueHref(item.recoveryIdentifier, item.recoveryIssueId)}
|
||||
className="text-primary underline-offset-2 hover:underline"
|
||||
>
|
||||
{item.recoveryIdentifier ?? item.recoveryIssueId}
|
||||
</a>
|
||||
</div>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
|
||||
{preview && preview.skippedOutsideLookback > 0 ? (
|
||||
<p className="text-xs text-muted-foreground">
|
||||
{preview.skippedOutsideLookback} current{" "}
|
||||
{preview.skippedOutsideLookback === 1 ? "finding is" : "findings are"} outside the configured lookback and
|
||||
will not be touched.
|
||||
</p>
|
||||
) : null}
|
||||
|
||||
<DialogFooter>
|
||||
<Button variant="outline" onClick={() => onOpenChange(false)} disabled={isPending}>
|
||||
Cancel
|
||||
</Button>
|
||||
<Button variant="outline" onClick={onEnableOnly} disabled={isPending || !preview}>
|
||||
Enable only
|
||||
</Button>
|
||||
<Button onClick={onEnableAndRun} disabled={isPending || !preview}>
|
||||
{count > 0 ? `Enable and create ${count}` : "Enable"}
|
||||
</Button>
|
||||
</DialogFooter>
|
||||
</DialogContent>
|
||||
</Dialog>
|
||||
);
|
||||
}
|
||||
|
||||
export function InstanceExperimentalSettings() {
|
||||
const { setBreadcrumbs } = useBreadcrumbs();
|
||||
const queryClient = useQueryClient();
|
||||
const [actionError, setActionError] = useState<string | null>(null);
|
||||
const [lookbackHoursDraft, setLookbackHoursDraft] = useState("24");
|
||||
const [previewDialogOpen, setPreviewDialogOpen] = useState(false);
|
||||
const [pendingPreview, setPendingPreview] = useState<IssueGraphLivenessAutoRecoveryPreview | null>(null);
|
||||
|
||||
useEffect(() => {
|
||||
setBreadcrumbs([
|
||||
|
|
@ -39,6 +153,42 @@ export function InstanceExperimentalSettings() {
|
|||
},
|
||||
});
|
||||
|
||||
const previewMutation = useMutation({
|
||||
mutationFn: async (lookbackHours: number) =>
|
||||
instanceSettingsApi.previewIssueGraphLivenessAutoRecovery({ lookbackHours }),
|
||||
onSuccess: (preview) => {
|
||||
setActionError(null);
|
||||
setPendingPreview(preview);
|
||||
setPreviewDialogOpen(true);
|
||||
},
|
||||
onError: (error) => {
|
||||
setActionError(error instanceof Error ? error.message : "Failed to preview recovery tasks.");
|
||||
},
|
||||
});
|
||||
|
||||
const runRecoveryMutation = useMutation({
|
||||
mutationFn: async (lookbackHours: number) =>
|
||||
instanceSettingsApi.runIssueGraphLivenessAutoRecovery({ lookbackHours }),
|
||||
onSuccess: async () => {
|
||||
setActionError(null);
|
||||
setPreviewDialogOpen(false);
|
||||
await Promise.all([
|
||||
queryClient.invalidateQueries({ queryKey: queryKeys.instance.experimentalSettings }),
|
||||
queryClient.invalidateQueries({ queryKey: queryKeys.health }),
|
||||
]);
|
||||
},
|
||||
onError: (error) => {
|
||||
setActionError(error instanceof Error ? error.message : "Failed to create recovery tasks.");
|
||||
},
|
||||
});
|
||||
|
||||
useEffect(() => {
|
||||
const next = experimentalQuery.data?.issueGraphLivenessAutoRecoveryLookbackHours;
|
||||
if (typeof next === "number") {
|
||||
setLookbackHoursDraft(String(next));
|
||||
}
|
||||
}, [experimentalQuery.data?.issueGraphLivenessAutoRecoveryLookbackHours]);
|
||||
|
||||
if (experimentalQuery.isLoading) {
|
||||
return <div className="text-sm text-muted-foreground">Loading experimental settings...</div>;
|
||||
}
|
||||
|
|
@ -58,6 +208,41 @@ export function InstanceExperimentalSettings() {
|
|||
const autoRestartDevServerWhenIdle = experimentalQuery.data?.autoRestartDevServerWhenIdle === true;
|
||||
const enableIssueGraphLivenessAutoRecovery =
|
||||
experimentalQuery.data?.enableIssueGraphLivenessAutoRecovery === true;
|
||||
const lookbackHours =
|
||||
experimentalQuery.data?.issueGraphLivenessAutoRecoveryLookbackHours ?? 24;
|
||||
const parsedLookbackHours = Number.parseInt(lookbackHoursDraft, 10);
|
||||
const lookbackHoursIsValid =
|
||||
Number.isInteger(parsedLookbackHours) && parsedLookbackHours >= 1 && parsedLookbackHours <= 720;
|
||||
const recoveryActionPending =
|
||||
toggleMutation.isPending || previewMutation.isPending || runRecoveryMutation.isPending;
|
||||
|
||||
function previewForEnable() {
|
||||
if (!lookbackHoursIsValid) {
|
||||
setActionError("Lookback hours must be a whole number from 1 to 720.");
|
||||
return;
|
||||
}
|
||||
previewMutation.mutate(parsedLookbackHours);
|
||||
}
|
||||
|
||||
function enableOnly() {
|
||||
if (!lookbackHoursIsValid) return;
|
||||
toggleMutation.mutate({
|
||||
enableIssueGraphLivenessAutoRecovery: true,
|
||||
issueGraphLivenessAutoRecoveryLookbackHours: parsedLookbackHours,
|
||||
}, {
|
||||
onSuccess: () => setPreviewDialogOpen(false),
|
||||
});
|
||||
}
|
||||
|
||||
function enableAndRun() {
|
||||
if (!lookbackHoursIsValid) return;
|
||||
toggleMutation.mutate({
|
||||
enableIssueGraphLivenessAutoRecovery: true,
|
||||
issueGraphLivenessAutoRecoveryLookbackHours: parsedLookbackHours,
|
||||
}, {
|
||||
onSuccess: () => runRecoveryMutation.mutate(parsedLookbackHours),
|
||||
});
|
||||
}
|
||||
|
||||
return (
|
||||
<div className="max-w-4xl space-y-6">
|
||||
|
|
@ -132,26 +317,99 @@ export function InstanceExperimentalSettings() {
|
|||
</section>
|
||||
|
||||
<section className="rounded-xl border border-border bg-card p-5">
|
||||
<div className="flex items-start justify-between gap-4">
|
||||
<div className="space-y-1.5">
|
||||
<h2 className="text-sm font-semibold">Auto-Create Issue Recovery Tasks</h2>
|
||||
<p className="max-w-2xl text-sm text-muted-foreground">
|
||||
Let the heartbeat scheduler create recovery issues for issue dependency chains that have been stalled for
|
||||
at least 24 hours.
|
||||
</p>
|
||||
<div className="flex flex-col gap-5">
|
||||
<div className="flex items-start justify-between gap-4">
|
||||
<div className="space-y-1.5">
|
||||
<h2 className="text-sm font-semibold">Auto-Create Issue Recovery Tasks</h2>
|
||||
<p className="max-w-2xl text-sm text-muted-foreground">
|
||||
Let the heartbeat scheduler create recovery issues for issue dependency chains found inside the
|
||||
configured lookback window.
|
||||
</p>
|
||||
</div>
|
||||
<ToggleSwitch
|
||||
checked={enableIssueGraphLivenessAutoRecovery}
|
||||
onCheckedChange={() => {
|
||||
if (enableIssueGraphLivenessAutoRecovery) {
|
||||
toggleMutation.mutate({ enableIssueGraphLivenessAutoRecovery: false });
|
||||
return;
|
||||
}
|
||||
previewForEnable();
|
||||
}}
|
||||
disabled={recoveryActionPending}
|
||||
aria-label="Toggle issue graph liveness auto-recovery"
|
||||
/>
|
||||
</div>
|
||||
<ToggleSwitch
|
||||
checked={enableIssueGraphLivenessAutoRecovery}
|
||||
onCheckedChange={() =>
|
||||
toggleMutation.mutate({
|
||||
enableIssueGraphLivenessAutoRecovery: !enableIssueGraphLivenessAutoRecovery,
|
||||
})
|
||||
}
|
||||
disabled={toggleMutation.isPending}
|
||||
aria-label="Toggle issue graph liveness auto-recovery"
|
||||
/>
|
||||
|
||||
<div className="grid gap-3 sm:grid-cols-[minmax(10rem,14rem)_1fr] sm:items-end">
|
||||
<label className="space-y-1.5">
|
||||
<span className="flex items-center gap-1.5 text-xs font-medium text-muted-foreground">
|
||||
<Clock className="h-3.5 w-3.5" />
|
||||
Lookback hours
|
||||
</span>
|
||||
<Input
|
||||
type="number"
|
||||
min={1}
|
||||
max={720}
|
||||
step={1}
|
||||
value={lookbackHoursDraft}
|
||||
onChange={(event) => setLookbackHoursDraft(event.target.value)}
|
||||
aria-invalid={!lookbackHoursIsValid}
|
||||
/>
|
||||
</label>
|
||||
<div className="flex flex-wrap gap-2">
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={() => {
|
||||
if (!lookbackHoursIsValid) {
|
||||
setActionError("Lookback hours must be a whole number from 1 to 720.");
|
||||
return;
|
||||
}
|
||||
toggleMutation.mutate({
|
||||
issueGraphLivenessAutoRecoveryLookbackHours: parsedLookbackHours,
|
||||
});
|
||||
}}
|
||||
disabled={recoveryActionPending || parsedLookbackHours === lookbackHours}
|
||||
>
|
||||
Save hours
|
||||
</Button>
|
||||
<Button
|
||||
variant="outline"
|
||||
onClick={previewForEnable}
|
||||
disabled={recoveryActionPending}
|
||||
>
|
||||
<Search className="h-4 w-4" />
|
||||
Preview
|
||||
</Button>
|
||||
<Button
|
||||
onClick={() => {
|
||||
if (!lookbackHoursIsValid) {
|
||||
setActionError("Lookback hours must be a whole number from 1 to 720.");
|
||||
return;
|
||||
}
|
||||
runRecoveryMutation.mutate(parsedLookbackHours);
|
||||
}}
|
||||
disabled={recoveryActionPending || !enableIssueGraphLivenessAutoRecovery}
|
||||
>
|
||||
<Play className="h-4 w-4" />
|
||||
Run now
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p className="text-xs text-muted-foreground">
|
||||
Current window: last {lookbackHours} {lookbackHours === 1 ? "hour" : "hours"}.
|
||||
</p>
|
||||
</div>
|
||||
</section>
|
||||
|
||||
<RecoveryPreviewDialog
|
||||
open={previewDialogOpen}
|
||||
onOpenChange={setPreviewDialogOpen}
|
||||
preview={pendingPreview}
|
||||
onEnableOnly={enableOnly}
|
||||
onEnableAndRun={enableAndRun}
|
||||
isPending={recoveryActionPending}
|
||||
/>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue