Forked from brdcastro/maestro
"use strict";
Object.defineProperty(exports, "__esModule", { value: true });
exports.promptPreprocessor = promptPreprocessor;
const sdk_1 = require("@lmstudio/sdk");
const promises_1 = require("fs/promises");
const path_1 = require("path");
const config_1 = require("./config");
const toolsDocumentation_1 = require("./toolsDocumentation");
const stateManager_1 = require("./stateManager");
const workflowCoordinator_1 = require("./workflowCoordinator");
const loopGuard_1 = require("../loopGuard");
const denialTracker_1 = require("./denialTracker");
const secondaryAgent_1 = require("./secondaryAgent");
async function promptPreprocessor(ctl, userMessage) {
// Resilient text extraction — handle both ChatMessage and string
let userPrompt;
try {
userPrompt = typeof userMessage === "string" ? userMessage : userMessage.getText();
}
catch {
userPrompt = String(userMessage);
}
// 1. RAG / Context Injection Logic — wrapped in try/catch since SDK calls can fail
let history = null;
try {
history = await ctl.pullHistory();
}
catch (e) {
try {
ctl.debug(`pullHistory failed: ${e instanceof Error ? e.message : String(e)}`);
}
catch { /* ignore */ }
}
const persistedState = await (0, stateManager_1.getPersistedState)();
// Determine first turn from CONVERSATION history (not global plugin state).
//
// Previously we used `persistedState.messageCount === 0`, but that counter is
// persisted to disk globally — it only hits 0 on the very first ever use of the
// plugin after install. Every subsequent conversation had isFirstTurn = false,
// which meant tool documentation / design mode rules NEVER got injected after
// the first conversation.
//
// Correct signal: if no assistant message has appeared in THIS history yet,
// we're on the first user→assistant turn of this conversation.
let isFirstTurn = false;
try {
if (history && typeof history.getMessagesArray === "function") {
const msgs = history.getMessagesArray();
const priorAssistantCount = msgs.filter((m) => {
try {
return typeof m.getRole === "function" && m.getRole() === "assistant";
}
catch {
return false;
}
}).length;
isFirstTurn = priorAssistantCount === 0;
}
else {
// Fallback when history API is unavailable.
isFirstTurn = persistedState.messageCount === 0;
}
}
catch {
isFirstTurn = persistedState.messageCount === 0;
}
try {
if (history && typeof history.append === "function") {
history.append(userMessage);
}
}
catch { /* append failed — non-critical */ }
let processingResult = null;
try {
const getFiles = typeof userMessage !== "string" && typeof userMessage.getFiles === "function"
? userMessage.getFiles(ctl.client).filter((f) => f.type !== "image")
: [];
const allFiles = history && typeof history.getAllFiles === "function"
? history.getAllFiles(ctl.client).filter((f) => f.type !== "image")
: [];
if (getFiles.length > 0) {
const strategy = await chooseContextInjectionStrategy(ctl, userPrompt, getFiles);
if (strategy === "inject-full-content" && typeof userMessage !== "string") {
processingResult = await prepareDocumentContextInjection(ctl, userMessage);
}
else if (strategy === "retrieval") {
processingResult = await prepareRetrievalResultsContextInjection(ctl, userPrompt, allFiles);
}
}
else if (allFiles.length > 0) {
processingResult = await prepareRetrievalResultsContextInjection(ctl, userPrompt, allFiles);
}
}
catch (e) {
try {
ctl.debug(`RAG processing failed: ${e instanceof Error ? e.message : String(e)}`);
}
catch { /* ignore */ }
}
// Determine the current content after RAG processing
let currentContent;
if (processingResult) {
if (typeof processingResult === 'string') {
currentContent = processingResult;
}
else {
try {
currentContent = processingResult.getText();
}
catch {
currentContent = userPrompt;
}
}
}
else {
currentContent = userPrompt;
}
// --- Delegation & Safety Instructions ---
const pluginConfig = ctl.getPluginConfig(config_1.toolsPluginConfig);
const enableSecondary = pluginConfig.get("enableSecondaryAgent");
const frequency = pluginConfig.get("subAgentFrequency");
// Build sub-agent capabilities from merged permission level
const permLevel = pluginConfig.get("subAgentPermissions") || "standard";
const subAllowFileSystem = permLevel !== "read_only" || permLevel === "read_only"; // always has read
const subAllowWeb = permLevel === "standard" || permLevel === "full";
const subAllowCode = permLevel === "full";
const subCaps = ["memory operations", "summarization", "file reading"];
if (subAllowWeb)
subCaps.push("web search");
if (subAllowCode)
subCaps.push("code execution");
if (enableSecondary && frequency !== "never") {
const canDo = subCaps.join(", ");
const cannotDo = [];
if (!subAllowCode)
cannotDo.push("coding", "writing/creating files");
if (!subAllowWeb)
cannotDo.push("web search");
const cannotStr = cannotDo.length > 0
? ` Do NOT delegate ${cannotDo.join(" or ")} to it — handle those yourself.`
: "";
if (isFirstTurn) {
currentContent += `\n\n**SYSTEM ADVICE:** A secondary agent (lighter model) is available via \`consult_secondary_agent\`.\nCapabilities: ${canDo}.${cannotStr}\n`;
}
else {
// Pattern-based delegation suggestion on subsequent turns (no match = no hint)
const delegationTriggers = [
// Research & analysis
{ pattern: /\b(resum[aoe]|summar|sintetiz)/i, suggestion: "summarize this content" },
{ pattern: /\b(pesquis|research|investig|busca[re]?|procur[aer]|find.*about|look.*up)\b/i, suggestion: "research this topic" },
{ pattern: /\b(revis[aĂŁe]|review|analys[ei]|analis[ae]|audit)\b/i, suggestion: "review/analyze this" },
{ pattern: /\b(compar[aei]|compare|diff|diferenç)/i, suggestion: "compare these items" },
{ pattern: /\b(explic[aĂŁ]|explain|what is|o que Ă©|como funciona|how does)\b/i, suggestion: "explain or research background" },
// Documentation & cataloging
{ pattern: /\b(document|documentaç|gerar doc|write doc)/i, suggestion: "draft documentation" },
{ pattern: /\b(list[ae]r|catalog|levant[ae])/i, suggestion: "catalog or list items" },
// Creative & visual work
{ pattern: /\b(tratamento visual|visual treatment|lookbook|moodboard|style guide)/i, suggestion: "research visual references or generate supporting text" },
{ pattern: /\b(roteiro|screenplay|script|argumento|sinopse|synopsis)/i, suggestion: "analyze or format the script content" },
{ pattern: /\b(refator[ae]|refactor|reorganiz|reestrutur|restructur)/i, suggestion: "refactor or reorganize this code" },
{ pattern: /\b(test[ae]r|write tests|criar testes|unit test|coverage)\b/i, suggestion: "generate test cases" },
{ pattern: /\b(traduz|translat|localiz)/i, suggestion: "translate or localize this content" },
];
// Hint for duplicate image prevention (only when user explicitly mentions duplicates)
if (/\b(repet|duplicat|repetid|repeat)/i.test(userPrompt) &&
/\b(imagens?|images?|galeri|fotos?|photos?)/i.test(userPrompt)) {
currentContent += `\n\n[System: Use \`plan_image_layout\` to avoid repeating images across sections.]`;
}
for (const { pattern, suggestion } of delegationTriggers) {
if (pattern.test(userPrompt)) {
currentContent += `\n\nđź’ˇ **Delegation opportunity:** Consider using \`consult_secondary_agent\` to ${suggestion} while you handle the main task.`;
break;
}
}
}
}
const state = persistedState;
// Reset state on the first turn of a new conversation
if (isFirstTurn) {
state.messageCount = 0;
state.largeFilesSaved = [];
state.workflowPhase = "understand";
state.phaseToolCounts = { reads: 0, writes: 0, executions: 0, tests: 0 };
(0, loopGuard_1.resetPhaseToolCounts)();
denialTracker_1.denialTracker.reset();
(0, secondaryAgent_1.resetAgentDepth)();
await (0, stateManager_1.savePersistedState)(state);
}
// System hints — at most one per turn to avoid noise
if (!isFirstTurn && state.messageCount > 0) {
const isCheckpointTurn = state.messageCount % 4 === 0;
const hasLargeFiles = state.largeFilesSaved && state.largeFilesSaved.length > 0;
// ── Workflow phase tracking ──
const prevPhase = state.workflowPhase || "understand";
const currentPhase = (0, workflowCoordinator_1.inferPhase)(loopGuard_1.phaseToolCounts);
const phaseGuidance = (0, workflowCoordinator_1.getPhaseGuidance)(prevPhase, currentPhase, loopGuard_1.phaseToolCounts);
if (phaseGuidance) {
currentContent += `\n\n${phaseGuidance}`;
}
// Compact phase indicator (every turn after first)
currentContent += `\n\n[${(0, workflowCoordinator_1.getPhaseIndicator)(currentPhase, loopGuard_1.phaseToolCounts)}]`;
state.workflowPhase = currentPhase;
state.phaseToolCounts = { ...loopGuard_1.phaseToolCounts };
// ── Denial summary (if any active restrictions) ──
const denialSummary = denialTracker_1.denialTracker.getSummary();
if (denialSummary) {
currentContent += `\n\n${denialSummary}`;
}
if (isCheckpointTurn && state.messageCount >= 8) {
// Token budget + anti-loop warning on long conversations
currentContent += '\n\n⚠️ [System: Long conversation. Use `replace_text_in_file` for edits. NEVER repeat identical tool calls — if you already called a tool with the same parameters, use the result you got. If stuck, respond to the user instead of retrying.]';
}
else if (hasLargeFiles) {
// Replace hint when large files exist
currentContent += '\n\n[System: Use `replace_text_in_file` for edits on large files instead of rewriting with `save_file`.]';
}
else if (isCheckpointTurn) {
// Memory reminder on other checkpoint turns
currentContent += '\n\n[System: Remember to use `Remember` to store important user preferences or project facts.]';
}
}
// 2. Tools Documentation & Memory Injection (first turn only)
if (isFirstTurn) {
const budget = pluginConfig.get("toolOutputBudget") || "generous";
const allowCode = pluginConfig.get("allowCodeExecution");
const allowMacOS = pluginConfig.get("allowMacOSIntegration");
const enableMedia = pluginConfig.get("enableMediaAnalysis") !== false;
const toolsDoc = (0, toolsDocumentation_1.buildToolsDocumentation)({
allowGit: true,
allowDb: allowCode,
allowNotify: true,
allowJavascript: allowCode,
allowPython: allowCode,
allowTerminal: allowCode,
allowShell: allowCode,
allowAppleScript: allowMacOS,
allowScreenshot: allowMacOS,
enableWikipedia: true,
enableImageAnalysis: enableMedia,
enableVideoAnalysis: enableMedia,
enableSecondaryAgent: enableSecondary,
subAgentCapabilities: subCaps.join(", "),
enableDesignMode: pluginConfig.get("enableDesignMode"),
enableVideoRendering: pluginConfig.get("enableVideoRendering"),
verbosity: budget === "compact" ? "compact" : "full",
});
let injectionContent = toolsDoc;
try {
const startupPath = (0, path_1.join)(state.currentWorkingDirectory, "startup.md");
const startupContent = await (0, promises_1.readFile)(startupPath, "utf-8");
const filesToRead = startupContent.split('\n').map(f => f.trim()).filter(f => f);
for (const file of filesToRead) {
const filePath = (0, path_1.join)(state.currentWorkingDirectory, file);
try {
const fileContent = await (0, promises_1.readFile)(filePath, "utf-8");
if (fileContent.trim().length > 0) {
injectionContent = `\n\n---\n\n${fileContent}\n\n---\n\n${injectionContent}`;
ctl.debug(`${file} loaded and injected into context.`);
}
}
catch (e) {
ctl.debug(`Failed to load ${file} from startup.md.`);
}
}
}
catch (e) {
ctl.debug("No startup.md file found or failed to load.");
}
// Only remind about Recall if auto-inject is off (otherwise memories are already loaded by the preprocessor)
const autoInjectOn = currentContent.includes("[Persistent Memory");
const recallReminder = autoInjectOn
? ""
: `\n\n⚠️ REMINDER: Your FIRST tool call must be Recall("${userPrompt.substring(0, 80).replace(/"/g, '')}"). Do NOT skip this.`;
currentContent = `${injectionContent}\n\n---${recallReminder}\n\n${currentContent}`;
}
// Update message count
try {
state.messageCount++;
await (0, stateManager_1.savePersistedState)(state);
}
catch (e) {
ctl.debug("Failed to update message count.", e);
}
// Always return the content as string — the merged preprocessor handles it
return currentContent;
}
async function prepareRetrievalResultsContextInjection(ctl, originalUserPrompt, files) {
// Hardcoded defaults (removed from user-facing config)
const retrievalLimit = 3;
const retrievalAffinityThreshold = 0.5;
// process files if necessary
const statusSteps = new Map();
const retrievingStatus = ctl.createStatus({
status: "loading",
text: `Loading an embedding model for retrieval...`,
});
// Using the same model as rag-v1
const model = await ctl.client.embedding.model("nomic-ai/nomic-embed-text-v1.5-GGUF", {
signal: ctl.abortSignal,
});
retrievingStatus.setState({
status: "loading",
text: `Retrieving relevant citations for user query...`,
});
const result = await ctl.client.files.retrieve(originalUserPrompt, files, {
embeddingModel: model,
// Affinity threshold: 0.6 not implemented in SDK retrieve options directly usually,
// but we filter below.
limit: retrievalLimit,
signal: ctl.abortSignal,
onFileProcessList(filesToProcess) {
for (const file of filesToProcess) {
statusSteps.set(file, retrievingStatus.addSubStatus({
status: "waiting",
text: `Process ${file.name} for retrieval`,
}));
}
},
onFileProcessingStart(file) {
statusSteps
.get(file)
.setState({ status: "loading", text: `Processing ${file.name} for retrieval` });
},
onFileProcessingEnd(file) {
statusSteps
.get(file)
.setState({ status: "done", text: `Processed ${file.name} for retrieval` });
},
onFileProcessingStepProgress(file, step, progressInStep) {
const verb = step === "loading" ? "Loading" : step === "chunking" ? "Chunking" : "Embedding";
statusSteps.get(file).setState({
status: "loading",
text: `${verb} ${file.name} for retrieval (${(progressInStep * 100).toFixed(1)}%)`,
});
},
});
result.entries = result.entries.filter(entry => entry.score > retrievalAffinityThreshold);
// inject retrieval result into the "processed" content
let processedContent = "";
const numRetrievals = result.entries.length;
if (numRetrievals > 0) {
// retrieval occured and got results
// show status
retrievingStatus.setState({
status: "done",
text: `Retrieved ${numRetrievals} relevant citations for user query`,
});
ctl.debug("Retrieval results", result);
// add results to prompt
const prefix = "The following citations were found in the files provided by the user:\n\n";
processedContent += prefix;
let citationNumber = 1;
result.entries.forEach(result => {
const completeText = result.content;
processedContent += `Citation ${citationNumber}: "${completeText}"\n\n`;
citationNumber++;
});
await ctl.addCitations(result);
const suffix = "Use the citations above to respond to the user query, only if they are relevant. " +
`Otherwise, respond to the best of your ability without them.` +
`\n\nUser Query:\n\n${originalUserPrompt}`;
processedContent += suffix;
}
else {
// retrieval occured but no relevant citations found
retrievingStatus.setState({
status: "canceled",
text: `No relevant citations found for user query`,
});
ctl.debug("No relevant citations found for user query");
const noteAboutNoRetrievalResultsFound = "Important: No citations were found in the user files for the user query. " +
`In less than one sentence, inform the user of this. ` +
`Then respond to the query to the best of your ability.`;
processedContent =
noteAboutNoRetrievalResultsFound + `\n\nUser Query:\n\n${originalUserPrompt}`;
}
ctl.debug("Processed content", processedContent);
return processedContent;
}
async function prepareDocumentContextInjection(ctl, input) {
const documentInjectionSnippets = new Map();
const files = input.consumeFiles(ctl.client, file => file.type !== "image");
for (const file of files) {
// This should take no time as the result is already in the cache
const { content } = await ctl.client.files.parseDocument(file, {
signal: ctl.abortSignal,
});
ctl.debug((0, sdk_1.text) `
Strategy: inject-full-content. Injecting full content of file '${file}' into the
context. Length: ${content.length}.
`);
documentInjectionSnippets.set(file, content);
}
let formattedFinalUserPrompt = "";
if (documentInjectionSnippets.size > 0) {
formattedFinalUserPrompt +=
"This is a Enriched Context Generation scenario.\n\nThe following content was found in the files provided by the user.\n";
for (const [fileHandle, snippet] of documentInjectionSnippets) {
formattedFinalUserPrompt += `\n\n** ${fileHandle.name} full content **\n\n${snippet}\n\n** end of ${fileHandle.name} **\n\n`;
}
formattedFinalUserPrompt += `Based on the content above, please provide a response to the user query.\n\nUser query: ${input.getText()}`;
}
input.replaceText(formattedFinalUserPrompt);
return input;
}
async function measureContextWindow(ctx, model) {
const currentContextFormatted = await model.applyPromptTemplate(ctx);
const totalTokensInContext = await model.countTokens(currentContextFormatted);
const modelContextLength = await model.getContextLength();
const modelRemainingContextLength = modelContextLength - totalTokensInContext;
const contextOccupiedPercent = (totalTokensInContext / modelContextLength) * 100;
return {
totalTokensInContext,
modelContextLength,
modelRemainingContextLength,
contextOccupiedPercent,
};
}
async function chooseContextInjectionStrategy(ctl, originalUserPrompt, files) {
const status = ctl.createStatus({
status: "loading",
text: `Deciding how to handle the document(s)...`,
});
const model = await ctl.client.llm.model();
const ctx = await ctl.pullHistory();
// Measure the context window
const { totalTokensInContext, modelContextLength, modelRemainingContextLength, contextOccupiedPercent, } = await measureContextWindow(ctx, model);
ctl.debug(`Context measurement result:\n\n` +
`\tTotal tokens in context: ${totalTokensInContext}\n` +
`\tModel context length: ${modelContextLength}\n` +
`\tModel remaining context length: ${modelRemainingContextLength}\n` +
`\tContext occupied percent: ${contextOccupiedPercent.toFixed(2)}%\n`);
// Get token count of provided files
let totalFileTokenCount = 0;
let totalReadTime = 0;
let totalTokenizeTime = 0;
for (const file of files) {
const startTime = performance.now();
const loadingStatus = status.addSubStatus({
status: "loading",
text: `Loading parser for ${file.name}...`,
});
let actionProgressing = "Reading";
let parserIndicator = "";
const { content } = await ctl.client.files.parseDocument(file, {
signal: ctl.abortSignal,
onParserLoaded: parser => {
loadingStatus.setState({
status: "loading",
text: `${parser.library} loaded for ${file.name}...`,
});
if (parser.library !== "builtIn") {
actionProgressing = "Parsing";
parserIndicator = ` with ${parser.library}`;
}
},
onProgress: progress => {
loadingStatus.setState({
status: "loading",
text: `${actionProgressing} file ${file.name}${parserIndicator}... (${(progress * 100).toFixed(2)}%)`,
});
},
});
loadingStatus.remove();
totalReadTime += performance.now() - startTime;
// tokenize file content
const startTokenizeTime = performance.now();
totalFileTokenCount += await model.countTokens(content);
totalTokenizeTime += performance.now() - startTokenizeTime;
if (totalFileTokenCount > modelRemainingContextLength) {
break;
}
}
ctl.debug(`Total file read time: ${totalReadTime.toFixed(2)} ms`);
ctl.debug(`Total tokenize time: ${totalTokenizeTime.toFixed(2)} ms`);
// Calculate total token count of files + user prompt
ctl.debug(`Original User Prompt: ${originalUserPrompt}`);
const userPromptTokenCount = (await model.tokenize(originalUserPrompt)).length;
const totalFilePlusPromptTokenCount = totalFileTokenCount + userPromptTokenCount;
// Calculate the available context tokens
const contextOccupiedFraction = contextOccupiedPercent / 100;
const targetContextUsePercent = 0.7;
const targetContextUsage = targetContextUsePercent * (1 - contextOccupiedFraction);
const availableContextTokens = Math.floor(modelRemainingContextLength * targetContextUsage);
// Debug log
ctl.debug("Strategy Calculation:");
ctl.debug(`\tTotal Tokens in All Files: ${totalFileTokenCount}`);
ctl.debug(`\tTotal Tokens in User Prompt: ${userPromptTokenCount}`);
ctl.debug(`\tModel Context Remaining: ${modelRemainingContextLength} tokens`);
ctl.debug(`\tContext Occupied: ${contextOccupiedPercent.toFixed(2)}%`);
ctl.debug(`\tAvailable Tokens: ${availableContextTokens}\n`);
if (totalFilePlusPromptTokenCount > availableContextTokens) {
const chosenStrategy = "retrieval";
ctl.debug(`Chosen context injection strategy: '${chosenStrategy}'. Total file + prompt token count: ` +
`${totalFilePlusPromptTokenCount} > ${targetContextUsage * 100}% * available context tokens: ${availableContextTokens}`);
status.setState({
status: "done",
text: `Chosen context injection strategy: '${chosenStrategy}'. Retrieval is optimal for the size of content provided`,
});
return chosenStrategy;
}
const chosenStrategy = "inject-full-content";
status.setState({
status: "done",
text: `Chosen context injection strategy: '${chosenStrategy}'. All content can fit into the context`,
});
return chosenStrategy;
}
//# sourceMappingURL=promptPreprocessor.js.map