/**
 * @module Summarizer
 * Utility class for performing text summarization and content extraction.
 */

/**
 * Performs basic extractive summarization on a block of raw text.
 * This function simulates advanced NLP by scoring sentences based on length, 
 * position (intro/conclusion), and keyword density to provide a concise overview.
 * @param rawText The long string of content to summarize.
 * @param maxLengthWords The target maximum number of words for the summary. Defaults to 300 words.
 * @returns A structured, summarized string or an error message if no text is provided.
 */
export function summarize(rawText: string, maxLengthWords: number = 300): string {
    if (!rawText || rawText.trim().length === 0) {
        return "No content provided to summarize.";
    }

    // Basic cleanup and sentence splitting
    const cleanedText = rawText.replace(/[\r\n]+/g, ' ').trim();
    const sentences: string[] = [];
    
    // Use a robust regex approach for splitting while retaining the delimiter
    const sentenceRegex = /[^.!?]+([.!?]\s*)/g; 
    let lastIndex = 0;
    let match: RegExpExecArray | null;

    // ✅ FIX: Cleaned up the math and reference errors by using match[0] directly
    while ((match = sentenceRegex.exec(cleanedText)) !== null) {
        sentences.push(match[0].trim());
        lastIndex = sentenceRegex.lastIndex;
    }
    
    // Add any remaining text if it didn't end with punctuation
    if (cleanedText.substring(lastIndex).trim().length > 0) {
        sentences.push(cleanedText.substring(lastIndex).trim());
    }

    if (sentences.length === 0) {
         return cleanedText.substring(0, Math.min(350, cleanedText.length)); // Fallback to simple truncation if regex fails
    }

    // --- Simple Scoring Logic for Extractive Summary ---
    const scoreMap: { [index: number]: number } = {};

    sentences.forEach((sentence: string, index: number) => {
        const wordCount = sentence.trim().split(/\s+/).length;
        let score = Math.min(10, 5 + (wordCount * 0.2)); // Base score based on length
        
        // Boost score for introductory and concluding sentences
        if (index < 3 || index >= Math.max(1, sentences.length - 2)) {
            score += 4;
        }
        scoreMap[index] = score;
    });

    // Sort sentences by their calculated score
    const sortedIndices: number[] = Object.keys(scoreMap).map(Number).sort((a, b) => scoreMap[b] - scoreMap[a]);

    const summarySentences: string[] = [];
    let currentWordCount = 0;
    const maxSentencesToTarget = Math.ceil(maxLengthWords / 30); // Estimate needed number of sentences

    // Select top sentences until the word limit is reached
    for (const index of sortedIndices) {
        const sentence = sentences[index];
        const wordCount = sentence.trim().split(/\s+/).length;

        if (currentWordCount + wordCount <= maxLengthWords && summarySentences.length < maxSentencesToTarget) {
            summarySentences.push(sentence);
            currentWordCount += wordCount;
        } else if (summarySentences.length > 0 && currentWordCount > 0) {
             break; // Stop when we hit the limit or run out of meaningful content
        }
    }

    // Join and ensure final formatting
    return summarySentences.join(" ").trim();
}


/**
 * Extracts the full, raw text content from the given string without applying any 
 * summarizing or filtering logic. Use this function when maximum context preservation is required.
 * @param rawText The raw, extracted text block.
 * @returns The original, unsummarized text.
 */
export function extractFullText(rawText: string): string {
    if (!rawText || rawText.trim().length === 0) {
        return "";
    }
    // Only performs basic whitespace cleanup to prevent massive gaps caused by formatting
    return rawText.replace(/[\r\n]+/g, ' ').trim();
}
crawler