Forked from mindstudio/big-rag
Project Files
src / utils / supportedExtensions.ts
const HTML_EXTENSIONS = [".htm", ".html", ".xhtml"];
const XML_EXTENSIONS = [".xml"];
const FB2_EXTENSIONS = [".fb2"];
const BSL_EXTENSIONS = [".bsl"];
const MARKDOWN_EXTENSIONS = [".md", ".markdown", ".mdown", ".mdx", ".mkd", ".mkdn"];
const TEXT_EXTENSIONS = [".txt", ".text"];
const PDF_EXTENSIONS = [".pdf"];
const EPUB_EXTENSIONS = [".epub"];
const IMAGE_EXTENSIONS = [".bmp", ".jpg", ".jpeg", ".png"];
const ARCHIVE_EXTENSIONS = [".rar"];
const ALL_EXTENSION_GROUPS = [
HTML_EXTENSIONS,
XML_EXTENSIONS,
FB2_EXTENSIONS,
BSL_EXTENSIONS,
MARKDOWN_EXTENSIONS,
TEXT_EXTENSIONS,
PDF_EXTENSIONS,
EPUB_EXTENSIONS,
IMAGE_EXTENSIONS,
ARCHIVE_EXTENSIONS,
];
export const SUPPORTED_EXTENSIONS = new Set(
ALL_EXTENSION_GROUPS.flatMap((group) => group.map((ext) => ext.toLowerCase())),
);
export const HTML_EXTENSION_SET = new Set(HTML_EXTENSIONS);
export const XML_EXTENSION_SET = new Set(XML_EXTENSIONS);
export const FB2_EXTENSION_SET = new Set(FB2_EXTENSIONS);
export const BSL_EXTENSION_SET = new Set(BSL_EXTENSIONS);
export const MARKDOWN_EXTENSION_SET = new Set(MARKDOWN_EXTENSIONS);
export const TEXT_EXTENSION_SET = new Set(TEXT_EXTENSIONS);
export const IMAGE_EXTENSION_SET = new Set(IMAGE_EXTENSIONS);
export function isHtmlExtension(ext: string): boolean {
return HTML_EXTENSION_SET.has(ext.toLowerCase());
}
export function isXmlExtension(ext: string): boolean {
return XML_EXTENSION_SET.has(ext.toLowerCase());
}
export function isFb2Extension(ext: string): boolean {
return FB2_EXTENSION_SET.has(ext.toLowerCase());
}
export function isBslExtension(ext: string): boolean {
return BSL_EXTENSION_SET.has(ext.toLowerCase());
}
export function isMarkdownExtension(ext: string): boolean {
return MARKDOWN_EXTENSION_SET.has(ext.toLowerCase());
}
export function isPlainTextExtension(ext: string): boolean {
return TEXT_EXTENSION_SET.has(ext.toLowerCase());
}
export function isTextualExtension(ext: string): boolean {
return isMarkdownExtension(ext) || isPlainTextExtension(ext);
}
export function listSupportedExtensions(): string[] {
return Array.from(SUPPORTED_EXTENSIONS.values()).sort();
}
/**
* Parse user-provided comma/space/newline separated list of file extensions
* Returns Set of extensions with dots, or null if input is empty (all types enabled)
* Example: "pdf,epub,txt" -> Set { ".pdf", ".epub", ".txt" }
*/
export function parseEnabledExtensions(raw: string | null | undefined): Set<string> | null {
if (!raw || raw.trim().length === 0) {
return null; // null means "all supported types"
}
const set = new Set<string>();
for (const token of raw.split(/[,;\s\n]+/)) {
const trimmed = token.trim().toLowerCase();
if (trimmed.length === 0) continue;
// Add dot if missing
const ext = trimmed.startsWith(".") ? trimmed : `.${trimmed}`;
if (SUPPORTED_EXTENSIONS.has(ext)) {
set.add(ext);
} else {
console.warn(`[Config] Unknown extension "${ext}" in enabledFileTypes, ignoring.`);
}
}
return set.size > 0 ? set : null;
}