Project Files
packages / core / src / safety.ts
import type { RagEvidenceBlock } from "./contracts";
import type {
RagSafetySanitizationOptions,
RagStrictGroundingMode,
} from "./policyContracts";
const INSTRUCTIONAL_PATTERNS = [
/\b(ignore (all|any|previous|prior) instructions?)\b/gi,
/\b(system prompt)\b/gi,
/\bdeveloper message\b/gi,
/\bdo not follow the above\b/gi,
/\byou are chatgpt\b/gi,
/\bact as\b/gi,
/\bfollow these steps\b/gi,
/\brespond with only\b/gi,
];
export function containsCoreInstructionLikeText(value: string) {
return INSTRUCTIONAL_PATTERNS.some((pattern) => {
pattern.lastIndex = 0;
return pattern.test(value);
});
}
function normalizeWhitespace(value: string) {
return value
.replace(/\r\n/g, "\n")
.replace(/[\t ]+/g, " ")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
export function sanitizeCoreRetrievedText(
value: string,
options: RagSafetySanitizationOptions
) {
if (!options.sanitizeRetrievedText) {
return value;
}
let sanitized = normalizeWhitespace(value)
.replace(/<script[\s\S]*?<\/script>/gi, " ")
.replace(/<style[\s\S]*?<\/style>/gi, " ")
.replace(/<[^>]+>/g, " ")
.replace(/```[\s\S]*?```/g, (match) => normalizeWhitespace(match))
.replace(/\[([^\]]+)\]\(([^)]+)\)/g, "$1");
if (options.stripInstructionalSpans) {
for (const pattern of INSTRUCTIONAL_PATTERNS) {
sanitized = sanitized.replace(pattern, "[instruction-like text removed]");
}
}
return normalizeWhitespace(sanitized);
}
export function sanitizeCoreEvidenceBlocks(
blocks: Array<RagEvidenceBlock>,
options: RagSafetySanitizationOptions
): Array<RagEvidenceBlock> {
return blocks.map((block) => ({
...block,
content: sanitizeCoreRetrievedText(block.content, options),
}));
}
export function buildCoreGroundingInstruction(
strictGroundingMode: RagStrictGroundingMode
) {
if (strictGroundingMode === "require-evidence") {
return (
"Use only the evidence above when answering. If the evidence does not support an answer, say that clearly and do not guess."
);
}
if (strictGroundingMode === "warn-on-weak-evidence") {
return (
"Prefer the evidence above when it is relevant. If the evidence is weak or incomplete, say so clearly before giving a cautious answer."
);
}
return (
"Use the evidence above when it is relevant and supported by the cited file content. If the evidence is insufficient, say so clearly instead of guessing."
);
}