paperclip/server/src/log-redaction.ts

149 lines
4.6 KiB
TypeScript
Raw Normal View History

import os from "node:os";
export const CURRENT_USER_REDACTION_TOKEN = "*";
export interface CurrentUserRedactionOptions {
enabled?: boolean;
replacement?: string;
userNames?: string[];
homeDirs?: string[];
}
type CurrentUserCandidates = {
userNames: string[];
homeDirs: string[];
replacement: string;
};
function isPlainObject(value: unknown): value is Record<string, unknown> {
if (typeof value !== "object" || value === null || Array.isArray(value)) return false;
const proto = Object.getPrototypeOf(value);
return proto === Object.prototype || proto === null;
}
function escapeRegExp(value: string) {
return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
}
function uniqueNonEmpty(values: Array<string | null | undefined>) {
return Array.from(new Set(values.map((value) => value?.trim() ?? "").filter(Boolean)));
}
function splitPathSegments(value: string) {
return value.replace(/[\\/]+$/, "").split(/[\\/]+/).filter(Boolean);
}
function replaceLastPathSegment(pathValue: string, replacement: string) {
const normalized = pathValue.replace(/[\\/]+$/, "");
const lastSeparator = Math.max(normalized.lastIndexOf("/"), normalized.lastIndexOf("\\"));
if (lastSeparator < 0) return replacement;
return `${normalized.slice(0, lastSeparator + 1)}${replacement}`;
}
export function maskUserNameForLogs(value: string, fallback = CURRENT_USER_REDACTION_TOKEN) {
const trimmed = value.trim();
if (!trimmed) return fallback;
return `${trimmed[0]}${"*".repeat(Math.max(1, Array.from(trimmed).length - 1))}`;
}
function defaultUserNames() {
const candidates = [
process.env.USER,
process.env.LOGNAME,
process.env.USERNAME,
];
try {
candidates.push(os.userInfo().username);
} catch {
// Some environments do not expose userInfo; env vars are enough fallback.
}
return uniqueNonEmpty(candidates);
}
function defaultHomeDirs(userNames: string[]) {
const candidates: Array<string | null | undefined> = [
process.env.HOME,
process.env.USERPROFILE,
];
try {
candidates.push(os.homedir());
} catch {
// Ignore and fall back to env hints below.
}
for (const userName of userNames) {
candidates.push(`/Users/${userName}`);
candidates.push(`/home/${userName}`);
candidates.push(`C:\\Users\\${userName}`);
}
return uniqueNonEmpty(candidates);
}
let cachedCurrentUserCandidates: CurrentUserCandidates | null = null;
function getDefaultCurrentUserCandidates(): CurrentUserCandidates {
if (cachedCurrentUserCandidates) return cachedCurrentUserCandidates;
const userNames = defaultUserNames();
cachedCurrentUserCandidates = {
userNames,
homeDirs: defaultHomeDirs(userNames),
replacement: CURRENT_USER_REDACTION_TOKEN,
};
return cachedCurrentUserCandidates;
}
function resolveCurrentUserCandidates(opts?: CurrentUserRedactionOptions) {
const defaults = getDefaultCurrentUserCandidates();
const userNames = uniqueNonEmpty(opts?.userNames ?? defaults.userNames);
const homeDirs = uniqueNonEmpty(opts?.homeDirs ?? defaults.homeDirs);
const replacement = opts?.replacement?.trim() || defaults.replacement;
return { userNames, homeDirs, replacement };
}
export function redactCurrentUserText(input: string, opts?: CurrentUserRedactionOptions) {
if (!input) return input;
if (opts?.enabled === false) return input;
const { userNames, homeDirs, replacement } = resolveCurrentUserCandidates(opts);
let result = input;
for (const homeDir of [...homeDirs].sort((a, b) => b.length - a.length)) {
Sync/master post pap1497 followups 2026 04 15 (#3779) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - The board depends on issue, inbox, cost, and company-skill surfaces to stay accurate and fast while agents are actively working > - The PAP-1497 follow-up branch exposed a few rough edges in those surfaces: stale active-run state on completed issues, missing creator filters, oversized issue payload scans, and placeholder issue-route parsing > - Those gaps make the control plane harder to trust because operators can see misleading run state, miss the right subset of work, or pay extra query/render cost on large issue records > - This pull request tightens those follow-ups across server and UI code, and adds regression coverage for the affected paths > - The benefit is a more reliable issue workflow, safer high-volume cost aggregation, and clearer board/operator navigation ## What Changed - Added the `v2026.415.0` release changelog entry. - Fixed stale issue-run presentation after completion and reused the shared issue-path parser so literal route placeholders no longer become issue links. - Added creator filters to the Issues page and Inbox, including persisted filter-state normalization and regression coverage. - Bounded issue detail/list project-mention scans and trimmed large issue-list payload fields to keep issue reads lighter. - Hardened company-skill list projection and cost/finance aggregation so large markdown blobs and large summed values do not leak into list responses or overflow 32-bit casts. - Added targeted server/UI regression tests for company skills, costs/finance, issue mention scanning, creator filters, inbox normalization, and issue reference parsing. ## Verification - `pnpm exec vitest run server/src/__tests__/company-skills-service.test.ts server/src/__tests__/costs-service.test.ts server/src/__tests__/issues-goal-context-routes.test.ts server/src/__tests__/issues-service.test.ts ui/src/lib/inbox.test.ts ui/src/lib/issue-filters.test.ts ui/src/lib/issue-reference.test.ts` - `gh pr checks 3779` Current pass set on the PR head: `policy`, `verify`, `e2e`, `security/snyk (cryppadotta)`, `Greptile Review` ## Risks - Creator filter options are derived from the currently loaded issue/agent data, so very sparse result sets may not surface every historical creator until they appear in the active dataset. - Cost/finance aggregate casts now use `double precision`; that removes the current overflow risk, but future schema changes should keep large-value aggregation behavior under review. - Issue detail mention scanning now skips comment-body scans on the detail route, so any consumer that relied on comment-only project mentions there would need to fetch them separately. ## Model Used - OpenAI Codex, GPT-5-based coding agent with terminal tool use and local code execution in the Paperclip workspace. Exact internal model ID/context-window exposure is not surfaced in this session. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-15 21:13:56 -05:00
if (!result.includes(homeDir)) continue;
const lastSegment = splitPathSegments(homeDir).pop() ?? "";
const replacementDir = lastSegment
? replaceLastPathSegment(homeDir, maskUserNameForLogs(lastSegment, replacement))
: replacement;
result = result.split(homeDir).join(replacementDir);
}
for (const userName of [...userNames].sort((a, b) => b.length - a.length)) {
Sync/master post pap1497 followups 2026 04 15 (#3779) ## Thinking Path > - Paperclip orchestrates AI agents for zero-human companies > - The board depends on issue, inbox, cost, and company-skill surfaces to stay accurate and fast while agents are actively working > - The PAP-1497 follow-up branch exposed a few rough edges in those surfaces: stale active-run state on completed issues, missing creator filters, oversized issue payload scans, and placeholder issue-route parsing > - Those gaps make the control plane harder to trust because operators can see misleading run state, miss the right subset of work, or pay extra query/render cost on large issue records > - This pull request tightens those follow-ups across server and UI code, and adds regression coverage for the affected paths > - The benefit is a more reliable issue workflow, safer high-volume cost aggregation, and clearer board/operator navigation ## What Changed - Added the `v2026.415.0` release changelog entry. - Fixed stale issue-run presentation after completion and reused the shared issue-path parser so literal route placeholders no longer become issue links. - Added creator filters to the Issues page and Inbox, including persisted filter-state normalization and regression coverage. - Bounded issue detail/list project-mention scans and trimmed large issue-list payload fields to keep issue reads lighter. - Hardened company-skill list projection and cost/finance aggregation so large markdown blobs and large summed values do not leak into list responses or overflow 32-bit casts. - Added targeted server/UI regression tests for company skills, costs/finance, issue mention scanning, creator filters, inbox normalization, and issue reference parsing. ## Verification - `pnpm exec vitest run server/src/__tests__/company-skills-service.test.ts server/src/__tests__/costs-service.test.ts server/src/__tests__/issues-goal-context-routes.test.ts server/src/__tests__/issues-service.test.ts ui/src/lib/inbox.test.ts ui/src/lib/issue-filters.test.ts ui/src/lib/issue-reference.test.ts` - `gh pr checks 3779` Current pass set on the PR head: `policy`, `verify`, `e2e`, `security/snyk (cryppadotta)`, `Greptile Review` ## Risks - Creator filter options are derived from the currently loaded issue/agent data, so very sparse result sets may not surface every historical creator until they appear in the active dataset. - Cost/finance aggregate casts now use `double precision`; that removes the current overflow risk, but future schema changes should keep large-value aggregation behavior under review. - Issue detail mention scanning now skips comment-body scans on the detail route, so any consumer that relied on comment-only project mentions there would need to fetch them separately. ## Model Used - OpenAI Codex, GPT-5-based coding agent with terminal tool use and local code execution in the Paperclip workspace. Exact internal model ID/context-window exposure is not surfaced in this session. ## Checklist - [x] I have included a thinking path that traces from project context to this change - [x] I have specified the model used (with version and capability details) - [x] I have run tests locally and they pass - [x] I have added or updated tests where applicable - [ ] If this change affects the UI, I have included before/after screenshots - [x] I have updated relevant documentation to reflect my changes - [x] I have considered and documented any risks above - [x] I will address all Greptile and reviewer comments before requesting merge --------- Co-authored-by: Paperclip <noreply@paperclip.ing>
2026-04-15 21:13:56 -05:00
if (!result.includes(userName)) continue;
const pattern = new RegExp(`(?<![A-Za-z0-9._-])${escapeRegExp(userName)}(?![A-Za-z0-9._-])`, "g");
result = result.replace(pattern, maskUserNameForLogs(userName, replacement));
}
return result;
}
export function redactCurrentUserValue<T>(value: T, opts?: CurrentUserRedactionOptions): T {
if (typeof value === "string") {
return redactCurrentUserText(value, opts) as T;
}
if (Array.isArray(value)) {
return value.map((entry) => redactCurrentUserValue(entry, opts)) as T;
}
if (!isPlainObject(value)) {
return value;
}
const redacted: Record<string, unknown> = {};
for (const [key, entry] of Object.entries(value)) {
redacted[key] = redactCurrentUserValue(entry, opts);
}
return redacted as T;
}