Project Files
scripts / aidoc_toolchain.py
"""Runtime toolchain discovery, consent-gated installer, safe downloads, and path guards."""
from __future__ import annotations
import json
import os
import platform
import re
import shutil
import subprocess
import tarfile
import tempfile
import urllib.error
import urllib.request
import uuid
import zipfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
from aidoc_render_builtin import font_dirs, find_font_file
# ---------------------------------------------------------------------------
# Scientific toolchain helpers: Pandoc + TeX engine discovery, safe downloads,
# and safe archive extraction.
# ---------------------------------------------------------------------------
PLUGIN_ID = "ai-to-pdf-docx-odt-epub"
PLUGIN_VERSION = "1.0.0"
TOOLCHAIN_CACHE_ENV = "LMSTUDIO_AI_TO_DOCUMENT_TOOLCHAIN_DIR"
AUTO_INSTALL_DISABLE_ENV = "AIDOC_DISABLE_AUTO_INSTALL_ENGINES"
MAX_DOWNLOAD_BYTES = 512 * 1024 * 1024
DOWNLOAD_TIMEOUT_SECONDS = 120
ALLOWED_DOWNLOAD_HOSTS = {
"github.com",
"raw.githubusercontent.com",
"release-assets.githubusercontent.com",
"objects.githubusercontent.com",
"mirrors.ctan.org",
"ctan.math.illinois.edu",
}
UNSAFE_TEX_COMMAND_RE = re.compile(
r"\\(write18|input|include|openin|openout|read|write|directlua|luaexec|catcode|everyjob|special|includegraphics|usepackage|RequirePackage)\b(?:\s*\{[^{}]*\})?",
re.I,
)
def _parse_path_roots_from_env(name: str) -> List[Path]:
roots: List[Path] = []
raw = os.environ.get(name, "")
for item in raw.split(os.pathsep):
item = item.strip()
if not item:
continue
try:
roots.append(Path(item).expanduser().resolve())
except Exception:
continue
return roots
def _is_within(path: Path, root: Path) -> bool:
try:
path = path.resolve()
root = root.resolve()
return path == root or root in path.parents
except Exception:
return False
def _ensure_within_any(path: Path, roots: List[Path], purpose: str) -> Path:
resolved = path.expanduser().resolve()
allowed = [r.expanduser().resolve() for r in roots]
if not any(_is_within(resolved, root) for root in allowed):
allowed_text = "; ".join(str(r) for r in allowed) or "(none)"
raise RuntimeError(f"Refusing {purpose} outside allowed directories: {resolved}. Allowed roots: {allowed_text}")
return resolved
def resolve_safe_output_dir(value: object, request_path: str = "") -> Path:
"""Resolve output_dir only under the current LM Studio working directory or explicit allowlist.
The TypeScript side applies the same rule before creating directories. This Python
guard exists because trusting only the outer layer would be adorable, and doomed.
"""
cwd = Path(os.getcwd()).resolve()
if value is None or str(value).strip() == "":
return cwd
roots = [cwd] + _parse_path_roots_from_env("LMSTUDIO_AI_TO_DOCUMENT_ALLOWED_OUTPUT_ROOTS")
return _ensure_within_any(Path(str(value)), roots, "output_dir")
def resolve_safe_archives_dir(value: object) -> Optional[Path]:
"""Allow local installer archives only from known cache/vendor roots or explicit allowlist."""
if value is None or str(value).strip() == "":
return None
roots = [
(_user_toolchain_base() / "_archives").resolve(),
(plugin_root() / "vendor" / "toolchain").resolve(),
] + _parse_path_roots_from_env("LMSTUDIO_AI_TO_DOCUMENT_ALLOWED_ARCHIVE_ROOTS")
return _ensure_within_any(Path(str(value)), roots, "archives_dir")
def plugin_root() -> Path:
return Path(__file__).resolve().parents[1]
def platform_key() -> str:
sysname = platform.system().lower()
machine = platform.machine().lower()
is_arm = machine in {"arm64", "aarch64"} or "arm" in machine
if sysname.startswith("win"):
return "windows-arm64" if is_arm else "windows-x64"
if sysname == "darwin":
return "macos-arm64" if is_arm else "macos-x64"
if sysname == "linux":
return "linux-arm64" if is_arm else "linux-x64"
return f"{sysname}-{machine}"
def executable_name(base: str) -> str:
return base + (".exe" if platform.system().lower().startswith("win") else "")
def user_agent() -> str:
return f"{PLUGIN_ID}/{PLUGIN_VERSION}"
def _validate_download_url(url: str) -> None:
from urllib.parse import urlparse
parsed = urlparse(url)
if parsed.scheme != "https":
raise RuntimeError(f"Refusing non-HTTPS download URL: {url}")
host = (parsed.hostname or "").lower()
if host not in ALLOWED_DOWNLOAD_HOSTS:
raise RuntimeError(f"Refusing download from unapproved host: {host or url}")
def download_file(url: str, path: Path, max_bytes: int = MAX_DOWNLOAD_BYTES) -> None:
_validate_download_url(url)
req = urllib.request.Request(url, headers={"User-Agent": user_agent()})
path.parent.mkdir(parents=True, exist_ok=True)
tmp = path.with_name(path.name + ".partial")
try:
with urllib.request.urlopen(req, timeout=DOWNLOAD_TIMEOUT_SECONDS) as resp, tmp.open("wb") as fh:
final_url = getattr(resp, "geturl", lambda: url)()
if final_url and final_url != url:
_validate_download_url(final_url)
declared = resp.headers.get("Content-Length")
if declared and int(declared) > max_bytes:
raise RuntimeError(f"Download is too large: {declared} bytes from {url}")
total = 0
while True:
chunk = resp.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > max_bytes:
raise RuntimeError(f"Download exceeded {max_bytes} bytes from {url}")
fh.write(chunk)
tmp.replace(path)
finally:
if tmp.exists():
try:
tmp.unlink()
except Exception:
pass
def _safe_archive_target(dest: Path, member_name: str) -> Path:
raw = member_name.replace("\\", "/")
if raw.startswith("/") or re.match(r"^[A-Za-z]:/", raw):
raise RuntimeError(f"Unsafe absolute archive path: {member_name}")
parts = Path(raw).parts
if any(part in {"..", ""} for part in parts):
raise RuntimeError(f"Unsafe archive path traversal: {member_name}")
target = (dest / raw).resolve()
dest_resolved = dest.resolve()
if target != dest_resolved and dest_resolved not in target.parents:
raise RuntimeError(f"Archive member escapes destination: {member_name}")
return target
def extract_archive(archive: Path, dest: Path) -> None:
"""Safely extract zip/tar archives without path traversal or links."""
dest.mkdir(parents=True, exist_ok=True)
lower = archive.name.lower()
if lower.endswith(".zip"):
with zipfile.ZipFile(archive) as zf:
for info in zf.infolist():
target = _safe_archive_target(dest, info.filename)
mode = (info.external_attr >> 16) & 0o170000
if mode == 0o120000:
raise RuntimeError(f"Refusing symlink in zip archive: {info.filename}")
if info.is_dir():
target.mkdir(parents=True, exist_ok=True)
continue
target.parent.mkdir(parents=True, exist_ok=True)
with zf.open(info, "r") as src, target.open("wb") as out:
shutil.copyfileobj(src, out)
elif lower.endswith(".tar.gz") or lower.endswith(".tgz"):
with tarfile.open(archive, "r:gz") as tf:
for member in tf.getmembers():
target = _safe_archive_target(dest, member.name)
if member.isdir():
target.mkdir(parents=True, exist_ok=True)
continue
if not member.isfile():
raise RuntimeError(f"Refusing non-file archive member: {member.name}")
src = tf.extractfile(member)
if src is None:
raise RuntimeError(f"Could not read archive member: {member.name}")
target.parent.mkdir(parents=True, exist_ok=True)
with src, target.open("wb") as out:
shutil.copyfileobj(src, out)
else:
raise RuntimeError(f"Unsupported archive type: {archive.name}")
def find_extracted_executable(root: Path, exe: str) -> Optional[Path]:
target = executable_name(exe).lower()
for p in root.rglob("*"):
if p.is_file() and p.name.lower() == target:
return p
return None
def bundled_pandoc_path() -> Optional[str]:
try:
import pypandoc # type: ignore
path = pypandoc.get_pandoc_path()
if path and Path(path).exists():
return str(path)
except Exception:
return None
return None
def pandoc_executable() -> Optional[str]:
return vendored_executable("pandoc") or shutil.which("pandoc") or bundled_pandoc_path()
def available_tex_engine() -> Optional[str]:
# Prefer Tectonic because it is the intended restricted engine for this plugin.
for engine in ["tectonic", "xelatex", "lualatex", "pdflatex"]:
if command_path(engine):
return engine
return None
def sanitize_unsafe_tex(markdown: str) -> Tuple[str, List[str]]:
"""Neutralize TeX commands that can read/write files or invoke extensions."""
blocked: List[str] = []
def repl(match: re.Match[str]) -> str:
cmd = match.group(1)
blocked.append("\\" + cmd)
return f"[blocked TeX command: \\{cmd}]"
cleaned = UNSAFE_TEX_COMMAND_RE.sub(repl, markdown or "")
warnings = []
if blocked:
seen = sorted(set(blocked))
warnings.append("Blocked unsafe TeX command(s) in document source: " + ", ".join(seen))
return cleaned, warnings
# ---------------------------------------------------------------------------
# Runtime installer metadata. Engines are installed only through the explicit
# install_document_engines flow, never during normal document generation.
# ---------------------------------------------------------------------------
AIDOC_VERSION = "1.0.0"
USER_TOOLCHAIN_ENV = "LMSTUDIO_AI_TO_DOCUMENT_TOOLCHAIN_DIR"
PANDOC_VERSION = "3.9.0.2"
TECTONIC_VERSION = "0.16.9"
PANDOC_ARCHIVES = {
"windows-x64": f"pandoc-{PANDOC_VERSION}-windows-x86_64.zip",
"linux-x64": f"pandoc-{PANDOC_VERSION}-linux-amd64.tar.gz",
"macos-x64": f"pandoc-{PANDOC_VERSION}-x86_64-macOS.zip",
"macos-arm64": f"pandoc-{PANDOC_VERSION}-arm64-macOS.zip",
}
TECTONIC_ARCHIVES = {
"windows-x64": f"tectonic-{TECTONIC_VERSION}-x86_64-pc-windows-msvc.zip",
"linux-x64": f"tectonic-{TECTONIC_VERSION}-x86_64-unknown-linux-gnu.tar.gz",
"macos-x64": f"tectonic-{TECTONIC_VERSION}-x86_64-apple-darwin.tar.gz",
"macos-arm64": f"tectonic-{TECTONIC_VERSION}-aarch64-apple-darwin.tar.gz",
}
PANDOC_BASE_URL = f"https://github.com/jgm/pandoc/releases/download/{PANDOC_VERSION}"
TECTONIC_BASE_URL = f"https://github.com/tectonic-typesetting/tectonic/releases/download/tectonic%40{TECTONIC_VERSION}"
FONT_URLS = {
"NotoSerif-Regular.ttf": "https://github.com/notofonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Regular.ttf",
"NotoSerif-Bold.ttf": "https://github.com/notofonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Bold.ttf",
"NotoSerif-Italic.ttf": "https://github.com/notofonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-Italic.ttf",
"NotoSerif-BoldItalic.ttf": "https://github.com/notofonts/noto-fonts/raw/main/hinted/ttf/NotoSerif/NotoSerif-BoldItalic.ttf",
"NotoSansMono-Regular.ttf": "https://github.com/notofonts/noto-fonts/raw/main/hinted/ttf/NotoSansMono/NotoSansMono-Regular.ttf",
}
# STIX Two Math may be bundled with the plugin, which is preferred because
# web links rot with the dedication of museum fruit. If it is not bundled, the
# installer still tries a small list of direct upstream mirrors before giving up.
STIX_DIRECT_URLS = [
"https://ctan.math.illinois.edu/fonts/stix2-otf/STIXTwoMath-Regular.otf",
"https://mirrors.ctan.org/fonts/stix2-otf/STIXTwoMath-Regular.otf",
"https://raw.githubusercontent.com/stipub/stixfonts/master/fonts/static_otf/STIXTwoMath-Regular.otf",
"https://raw.githubusercontent.com/stipub/stixfonts/v2.13b171/fonts/static_otf/STIXTwoMath-Regular.otf",
]
STIX_ARCHIVE_URLS = [
"https://github.com/stipub/stixfonts/archive/refs/tags/v2.13b171.tar.gz",
"https://mirrors.ctan.org/fonts/stix2-otf.zip",
]
BUNDLED_STIX_FONT_FILENAMES = ("STIXTwoMath-Regular.otf", "STIX2Math.otf")
def _platform_key_for_install(requested: str = "auto") -> str:
requested = (requested or "auto").strip().lower()
if requested and requested != "auto":
return requested
return platform_key()
def _user_toolchain_base() -> Path:
override = os.environ.get(USER_TOOLCHAIN_ENV, "").strip()
if override:
return Path(override).expanduser().resolve()
return (Path.home() / ".lmstudio" / "ai-to-pdf-docx-odt-epub" / "toolchain").resolve()
def _plugin_toolchain_base() -> Path:
return (plugin_root() / "vendor" / "toolchain").resolve()
def _platform_dir(base: Path, key: Optional[str] = None) -> Path:
return base / (key or platform_key())
def _toolchain_search_dirs(key: Optional[str] = None) -> List[Path]:
k = key or platform_key()
# User-installed toolchain first, then platform-specific bundled payloads,
# then platform-independent bundled payloads. The last one is where the
# manually bundled STIXTwoMath-Regular.otf lives. Humanity has delegated
# dependency management to ZIP folders, and here we are.
return [
_platform_dir(_user_toolchain_base(), k),
_platform_dir(_plugin_toolchain_base(), k),
_plugin_toolchain_base(),
]
def _font_search_dirs(primary: Optional[Path] = None, key: Optional[str] = None) -> List[Path]:
dirs: List[Path] = []
if primary is not None:
dirs.append(primary)
for base in _toolchain_search_dirs(key):
dirs.append(base / "fonts")
seen = set()
out: List[Path] = []
for d in dirs:
try:
resolved = str(d.resolve())
except Exception:
resolved = str(d)
if resolved in seen:
continue
seen.add(resolved)
out.append(d)
return out
def _find_required_font(names, primary: Optional[Path] = None, key: Optional[str] = None) -> Optional[Path]:
for d in _font_search_dirs(primary, key):
for name in names:
p = d / name
if _valid_component_file(p):
return p
return None
def _bundled_stix_font(key: Optional[str] = None) -> Optional[Path]:
# Only bundled/plugin locations, not the user toolchain. Used by the installer
# to seed STIX without ever touching the network.
k = key or platform_key()
for d in [
_platform_dir(_plugin_toolchain_base(), k) / "fonts",
_plugin_toolchain_base() / "fonts",
]:
for name in BUNDLED_STIX_FONT_FILENAMES:
p = d / name
if _valid_component_file(p):
return p
return None
def _find_font_in_extracted_archive(root: Path) -> Optional[Path]:
for name in BUNDLED_STIX_FONT_FILENAMES:
for p in root.rglob(name):
if _valid_component_file(p):
return p
return None
def _try_download_stix_math_font(dest: Path, downloads_dir: Path, source: str, archives_dir: Optional[Path], force: bool, warnings: List[str]) -> bool:
"""Seed STIX Two Math from local/bundled files first, then online mirrors.
The destination is always the per-platform user toolchain fonts directory.
The plugin-level vendor/toolchain/fonts directory is only a read-only source.
"""
dest.parent.mkdir(parents=True, exist_ok=True)
alt_dest = dest.parent / "STIX2Math.otf"
local_candidates: List[Path] = []
if archives_dir:
local_candidates.extend([archives_dir / name for name in BUNDLED_STIX_FONT_FILENAMES])
bundled = _bundled_stix_font()
if bundled:
local_candidates.append(bundled)
for src in local_candidates:
if _valid_component_file(src):
try:
shutil.copy2(src, dest)
if src.name == "STIX2Math.otf":
shutil.copy2(src, alt_dest)
warnings.append(f"STIX Two Math was copied from local/bundled font: {src}")
return True
except Exception as exc:
warnings.append(f"Could not copy STIX Two Math from {src}: {exc}")
if source == "local":
warnings.append("STIX Two Math was not found in local archives or bundled plugin fonts; source=local prevents online download.")
return False
for url in STIX_DIRECT_URLS:
tmp = downloads_dir / "STIXTwoMath-Regular.otf"
try:
if force or not _valid_component_file(tmp):
download_file(url, tmp)
if not _valid_component_file(tmp):
raise RuntimeError("downloaded file is missing or too small")
shutil.copy2(tmp, dest)
warnings.append(f"STIX Two Math was downloaded from {url}")
return True
except Exception as exc:
warnings.append(f"STIX Two Math direct download failed from {url}: {exc}")
try:
if tmp.exists() and tmp.stat().st_size < 8192:
tmp.unlink()
except Exception:
pass
for url in STIX_ARCHIVE_URLS:
archive_name = url.rstrip('/').split('/')[-1] or "stixfonts.archive"
archive = downloads_dir / archive_name
tmp_extract = Path(tempfile.mkdtemp(prefix="aidoc_stix_extract_"))
try:
if force or not _valid_component_file(archive):
download_file(url, archive)
extract_archive(archive, tmp_extract)
found = _find_font_in_extracted_archive(tmp_extract)
if not found:
raise RuntimeError("STIXTwoMath-Regular.otf/STIX2Math.otf was not found inside archive")
shutil.copy2(found, dest)
if found.name == "STIX2Math.otf":
shutil.copy2(found, alt_dest)
warnings.append(f"STIX Two Math was extracted from {url}")
return True
except Exception as exc:
warnings.append(f"STIX Two Math archive download failed from {url}: {exc}")
finally:
shutil.rmtree(tmp_extract, ignore_errors=True)
return False
def toolchain_base_dir() -> Path:
return _user_toolchain_base()
def vendored_platform_dir() -> Path:
return _platform_dir(_user_toolchain_base())
def vendored_bin_dir() -> Path:
return vendored_platform_dir() / "bin"
def vendored_fonts_dir() -> Path:
# Prefer the user directory for reporting, but font resolution itself is
# merged through _font_search_dirs/_find_required_font.
user_fonts = vendored_platform_dir() / "fonts"
if user_fonts.exists():
return user_fonts
for d in _toolchain_search_dirs():
fonts = d / "fonts"
if fonts.exists():
return fonts
return user_fonts
def vendored_tectonic_cache_dir() -> Path:
return vendored_platform_dir() / "tectonic-cache"
def vendored_executable(name: str) -> Optional[str]:
target = executable_name(name)
for d in _toolchain_search_dirs():
p = d / "bin" / target
if p.exists() and p.is_file():
try:
if p.stat().st_size < 8192:
continue
except Exception:
continue
return str(p)
return None
def command_path(name: str) -> Optional[str]:
return vendored_executable(name) or shutil.which(name)
def toolchain_status() -> Dict[str, Any]:
user_platform = _platform_dir(_user_toolchain_base())
return {
"platform": platform_key(),
"toolchain_dir": str(_user_toolchain_base()),
"plugin_toolchain_dir": str(_plugin_toolchain_base()),
"bin_dir": str(vendored_bin_dir()),
"fonts_dir": str(vendored_fonts_dir()),
"required_fonts_ok": _required_fonts_exist(vendored_fonts_dir()),
"required_fonts_missing": _missing_required_fonts(vendored_fonts_dir()),
"tectonic_cache_dir": str(vendored_tectonic_cache_dir()),
"pandoc": pandoc_executable() or "",
"tex_engine": available_tex_engine() or "",
"user_pandoc": str(user_platform / "bin" / executable_name("pandoc")) if (user_platform / "bin" / executable_name("pandoc")).exists() else "",
"user_tectonic": str(user_platform / "bin" / executable_name("tectonic")) if (user_platform / "bin" / executable_name("tectonic")).exists() else "",
}
def ensure_vendored_toolchain(auto_install: bool, warnings: List[str]) -> None:
if auto_install:
warnings.append(
"Automatic engine installation is disabled during document generation. Use install_document_engines after explicit user consent."
)
def _cleanup_download_cache(downloads_dir: Path, warnings: List[str], reason: str = "after successful extraction") -> None:
"""Delete cached installer archives after they have served their purpose."""
try:
if downloads_dir.exists():
shutil.rmtree(downloads_dir, ignore_errors=True)
warnings.append(f"Downloaded engine/font archives were removed from {downloads_dir} {reason}.")
except Exception as exc:
warnings.append(f"Could not remove downloaded archive cache {downloads_dir}: {exc}")
def _download_or_local(url: str, filename: str, downloads_dir: Path, source: str, archives_dir: Optional[Path], force: bool, warnings: List[str]) -> Path:
downloads_dir.mkdir(parents=True, exist_ok=True)
target = downloads_dir / filename
if source == "local":
if not archives_dir:
raise RuntimeError("source=local requires archives_dir")
local = archives_dir / filename
if not local.exists():
raise RuntimeError(f"Local archive not found: {local}")
if force or not target.exists() or target.stat().st_size != local.stat().st_size:
shutil.copy2(local, target)
return target
if force or not target.exists() or target.stat().st_size < 8192:
download_file(url, target)
return target
def _extract_executable_from_archive(archive: Path, exe_base: str, dest_bin: Path, force: bool) -> str:
dest_bin.mkdir(parents=True, exist_ok=True)
target = dest_bin / executable_name(exe_base)
if target.exists() and not force and target.stat().st_size >= 8192:
return str(target)
tmp = Path(tempfile.mkdtemp(prefix=f"aidoc_{exe_base}_extract_"))
try:
extract_archive(archive, tmp)
found = find_extracted_executable(tmp, exe_base)
if not found:
raise RuntimeError(f"{executable_name(exe_base)} was not found in {archive.name}")
shutil.copy2(found, target)
try:
target.chmod(target.stat().st_mode | 0o755)
except Exception:
pass
return str(target)
finally:
shutil.rmtree(tmp, ignore_errors=True)
REQUIRED_TEXT_FONT_FILES = [
"NotoSerif-Regular.ttf",
"NotoSerif-Bold.ttf",
"NotoSerif-Italic.ttf",
"NotoSerif-BoldItalic.ttf",
"NotoSansMono-Regular.ttf",
]
REQUIRED_MATH_FONT_FILES = ["STIXTwoMath-Regular.otf", "STIX2Math.otf"]
def _valid_component_file(path: Path, min_size: int = 8192) -> bool:
try:
return path.exists() and path.is_file() and path.stat().st_size > min_size
except Exception:
return False
def _missing_required_fonts(fonts_dir: Path) -> List[str]:
# Treat user and bundled font directories as one logical font set. This lets
# the plugin ship STIXTwoMath-Regular.otf while keeping Noto fonts in the
# normal user toolchain cache.
missing = [name for name in REQUIRED_TEXT_FONT_FILES if not _find_required_font([name], primary=fonts_dir)]
if not _find_required_font(REQUIRED_MATH_FONT_FILES, primary=fonts_dir):
missing.append("STIXTwoMath-Regular.otf or STIX2Math.otf")
return missing
def _required_fonts_exist(fonts_dir: Path) -> bool:
return not _missing_required_fonts(fonts_dir)
def _copy_or_download_fonts(fonts_dir: Path, downloads_dir: Path, source: str, archives_dir: Optional[Path], force: bool, warnings: List[str]) -> None:
fonts_dir.mkdir(parents=True, exist_ok=True)
for filename, url in FONT_URLS.items():
dest = fonts_dir / filename
if dest.exists() and not force and dest.stat().st_size > 8192:
continue
if source == "local" and archives_dir and (archives_dir / filename).exists():
shutil.copy2(archives_dir / filename, dest)
else:
download_file(url, dest)
stix_dest = fonts_dir / "STIXTwoMath-Regular.otf"
stix_alt_dest = fonts_dir / "STIX2Math.otf"
if (not force) and (_valid_component_file(stix_dest) or _valid_component_file(stix_alt_dest)):
if _valid_component_file(stix_alt_dest) and not _valid_component_file(stix_dest):
shutil.copy2(stix_alt_dest, stix_dest)
return
if _try_download_stix_math_font(stix_dest, downloads_dir, source, archives_dir, force, warnings):
return
raise RuntimeError(
"STIX Two Math font is missing. Tried local archives, bundled plugin fonts, "
"and online mirrors. Put STIXTwoMath-Regular.otf into vendor/toolchain/fonts/ "
"inside the plugin, or pass it via source=local archives_dir."
)
def _copy_plugin_licenses(platform_dir: Path, key: str, warnings: List[str]) -> None:
dst = platform_dir / "licenses"
dst.mkdir(parents=True, exist_ok=True)
src_common = _plugin_toolchain_base() / "licenses"
if src_common.exists():
for item in src_common.iterdir():
target = dst / item.name
if item.is_dir():
if target.exists():
shutil.rmtree(target, ignore_errors=True)
shutil.copytree(item, target)
elif item.is_file():
shutil.copy2(item, target)
(dst / "pandoc").mkdir(parents=True, exist_ok=True)
(dst / "tectonic").mkdir(parents=True, exist_ok=True)
(dst / "fonts").mkdir(parents=True, exist_ok=True)
(dst / "pandoc" / "SOURCE.txt").write_text(
f"Component: Pandoc\nVersion: {PANDOC_VERSION}\nArchive: {PANDOC_ARCHIVES.get(key, 'unknown')}\nSource: {PANDOC_BASE_URL}\nLicense: GPL-2.0-or-later\n",
encoding="utf-8",
)
(dst / "tectonic" / "SOURCE.txt").write_text(
f"Component: Tectonic\nVersion: {TECTONIC_VERSION}\nArchive: {TECTONIC_ARCHIVES.get(key, 'unknown')}\nSource: {TECTONIC_BASE_URL}\nLicense: MIT with additional third-party notices upstream\n",
encoding="utf-8",
)
(dst / "fonts" / "SOURCE.txt").write_text(
"Component: Noto Serif, Noto Sans Mono, STIX Two Math\nSources: notofonts/noto-fonts and stipub/stixfonts\nLicense: SIL Open Font License 1.1\n",
encoding="utf-8",
)
def _write_toolchain_manifest(platform_dir: Path, key: str) -> None:
data = {
"toolchain_version": AIDOC_VERSION,
"installed_by": "ai-to-pdf-docx-odt-epub",
"platform": key,
"pandoc_version": PANDOC_VERSION,
"tectonic_version": TECTONIC_VERSION,
"bin": str(platform_dir / "bin"),
"fonts": str(platform_dir / "fonts"),
"tectonic_cache": str(platform_dir / "tectonic-cache"),
}
(platform_dir / "MANIFEST.json").write_text(json.dumps(data, ensure_ascii=False, indent=2), encoding="utf-8")
def _warm_tectonic_cache(platform_dir: Path, warnings: List[str]) -> bool:
tectonic = str(platform_dir / "bin" / executable_name("tectonic"))
if not Path(tectonic).exists():
warnings.append("Cannot warm cache: tectonic executable is missing")
return False
cache_dir = platform_dir / "tectonic-cache"
cache_dir.mkdir(parents=True, exist_ok=True)
tmp = Path(tempfile.mkdtemp(prefix="aidoc_cache_warm_"))
try:
(tmp / "fonts").mkdir(parents=True, exist_ok=True)
for font in (platform_dir / "fonts").glob("*"):
if font.is_file():
shutil.copy2(font, tmp / "fonts" / font.name)
tex = r'''
\documentclass{article}
\usepackage{fontspec}
\usepackage{unicode-math}
\usepackage{mathtools}
\usepackage[version=4]{mhchem}
\setmainfont[Path=fonts/,Extension=.ttf,UprightFont=NotoSerif-Regular,BoldFont=NotoSerif-Bold,ItalicFont=NotoSerif-Italic,BoldItalicFont=NotoSerif-BoldItalic]{NotoSerif}
\setsansfont[Path=fonts/,Extension=.ttf]{NotoSerif-Regular}
\setmonofont[Path=fonts/,Extension=.ttf]{NotoSansMono-Regular}
\IfFontExistsTF{fonts/STIXTwoMath-Regular.otf}{\setmathfont[Path=fonts/]{STIXTwoMath-Regular.otf}}{\setmathfont[Path=fonts/]{STIX2Math.otf}}
\begin{document}
Cyrillic, mathematics, and chemistry.
\[
i\hbar \frac{\partial}{\partial t}\Psi = \left(-\frac{\hbar^2}{2m}\nabla^2 + V\right)\Psi
\]
\[
\begin{cases}
\nabla\cdot\mathbf{E}=\rho/\varepsilon_0\\
\nabla\times\mathbf{B}=\mu_0\mathbf{J}+\mu_0\varepsilon_0\partial\mathbf{E}/\partial t
\end{cases}
\]
\[
\ce{N2 + 3H2 ->[Fe][450 ^\circ C] 2NH3}
\]
\end{document}
'''
tex_path = tmp / "science.tex"
tex_path.write_text(tex, encoding="utf-8")
out = tmp / "out"
out.mkdir(exist_ok=True)
env = os.environ.copy()
env["TECTONIC_CACHE_DIR"] = str(cache_dir)
proc = subprocess.run([tectonic, str(tex_path), "--outdir", str(out)], cwd=str(tmp), env=env, text=True, capture_output=True, timeout=180)
if proc.returncode != 0:
warnings.append("Tectonic cache warm failed: " + (proc.stderr or proc.stdout)[-2000:])
return False
return True
except Exception as exc:
warnings.append(f"Tectonic cache warm failed: {exc}")
return False
finally:
shutil.rmtree(tmp, ignore_errors=True)
def _platform_toolchain_complete(platform_dir: Path) -> bool:
bin_dir = platform_dir / "bin"
pandoc = bin_dir / executable_name("pandoc")
tectonic = bin_dir / executable_name("tectonic")
if not (pandoc.exists() and pandoc.is_file() and pandoc.stat().st_size >= 8192):
return False
if not (tectonic.exists() and tectonic.is_file() and tectonic.stat().st_size >= 8192):
return False
return _required_fonts_exist(platform_dir / "fonts")
def _existing_platform_executable(key: str, exe_base: str, final_platform_dir: Path) -> Optional[Path]:
target = executable_name(exe_base)
candidates = [
final_platform_dir / "bin" / target,
_platform_dir(_plugin_toolchain_base(), key) / "bin" / target,
]
for p in candidates:
if _valid_component_file(p):
return p
return None
def _copy_existing_executable(src: Path, exe_base: str, dest_bin: Path) -> str:
dest_bin.mkdir(parents=True, exist_ok=True)
target = dest_bin / executable_name(exe_base)
shutil.copy2(src, target)
try:
target.chmod(target.stat().st_mode | 0o755)
except Exception:
pass
return str(target)
def _copy_existing_or_download_executable(
exe_base: str,
url: str,
filename: str,
key: str,
final_platform_dir: Path,
downloads_dir: Path,
source: str,
local_dir: Optional[Path],
staging_bin_dir: Path,
force: bool,
warnings: List[str],
) -> str:
existing = None if force else _existing_platform_executable(key, exe_base, final_platform_dir)
if existing:
return _copy_existing_executable(existing, exe_base, staging_bin_dir)
archive = _download_or_local(url, filename, downloads_dir, source, local_dir, force, warnings)
return _extract_executable_from_archive(archive, exe_base, staging_bin_dir, True)
def _seed_existing_fonts(staging_fonts_dir: Path, key: str, final_platform_dir: Path, force: bool) -> None:
sources = [final_platform_dir / "fonts", _platform_dir(_plugin_toolchain_base(), key) / "fonts"]
staging_fonts_dir.mkdir(parents=True, exist_ok=True)
for src_dir in sources:
if not src_dir.exists():
continue
for src in src_dir.iterdir():
if not src.is_file():
continue
dst = staging_fonts_dir / src.name
if force or (not _valid_component_file(dst)):
try:
shutil.copy2(src, dst)
except Exception:
pass
def install_toolchain_command(source: str = "online", platform_name: str = "auto", archives_dir: Optional[str] = None, warm_cache: bool = False, force: bool = False) -> Dict[str, Any]:
warnings: List[str] = []
errors: List[str] = []
source = (source or "online").lower().strip()
key = _platform_key_for_install(platform_name)
if key not in PANDOC_ARCHIVES or key not in TECTONIC_ARCHIVES:
return {
"ok": False,
"operation": "install_toolchain",
"warnings": [],
"errors": [f"Unsupported platform for automatic document engines: {key}. Supported: windows-x64, linux-x64, macos-x64, macos-arm64. iOS is not supported by LM Studio desktop plugins."],
"toolchain": toolchain_status(),
}
final_platform_dir = _platform_dir(_user_toolchain_base(), key)
final_bin_dir = final_platform_dir / "bin"
downloads_dir = _user_toolchain_base() / "_downloads" / key
local_dir = resolve_safe_archives_dir(archives_dir)
if (not force) and _platform_toolchain_complete(final_platform_dir):
cache_warmed = False
if warm_cache:
cache_warmed = _warm_tectonic_cache(final_platform_dir, warnings)
status = toolchain_status() if key == platform_key() else {
"platform": key,
"toolchain_dir": str(_user_toolchain_base()),
"bin_dir": str(final_bin_dir),
"fonts_dir": str(final_platform_dir / "fonts"),
"tectonic_cache_dir": str(final_platform_dir / "tectonic-cache"),
"pandoc": str(final_bin_dir / executable_name("pandoc")),
"tex_engine": str(final_bin_dir / executable_name("tectonic")),
}
status["cache_warmed"] = cache_warmed
return {"ok": True, "operation": "install_toolchain", "source": source, "warnings": warnings, "errors": [], "toolchain": status, "already_installed": True}
staging_root = _user_toolchain_base() / "_staging"
staging_root.mkdir(parents=True, exist_ok=True)
staging_platform_dir = staging_root / f"{key}-{uuid.uuid4().hex}"
staging_bin_dir = staging_platform_dir / "bin"
try:
_copy_existing_or_download_executable(
"pandoc",
f"{PANDOC_BASE_URL}/{PANDOC_ARCHIVES[key]}",
PANDOC_ARCHIVES[key],
key,
final_platform_dir,
downloads_dir,
source,
local_dir,
staging_bin_dir,
force,
warnings,
)
_copy_existing_or_download_executable(
"tectonic",
f"{TECTONIC_BASE_URL}/{TECTONIC_ARCHIVES[key]}",
TECTONIC_ARCHIVES[key],
key,
final_platform_dir,
downloads_dir,
source,
local_dir,
staging_bin_dir,
force,
warnings,
)
_seed_existing_fonts(staging_platform_dir / "fonts", key, final_platform_dir, force)
_copy_or_download_fonts(staging_platform_dir / "fonts", downloads_dir, source, local_dir, force, warnings)
_copy_plugin_licenses(staging_platform_dir, key, warnings)
cache_warmed = _warm_tectonic_cache(staging_platform_dir, warnings) if warm_cache else False
_write_toolchain_manifest(staging_platform_dir, key)
if not _platform_toolchain_complete(staging_platform_dir):
raise RuntimeError("Installed toolchain is incomplete after extraction")
if final_platform_dir.exists():
shutil.rmtree(final_platform_dir, ignore_errors=True)
final_platform_dir.parent.mkdir(parents=True, exist_ok=True)
shutil.move(str(staging_platform_dir), str(final_platform_dir))
_cleanup_download_cache(downloads_dir, warnings, "after successful extraction")
final_pandoc = str(final_platform_dir / "bin" / executable_name("pandoc"))
final_tectonic = str(final_platform_dir / "bin" / executable_name("tectonic"))
status = toolchain_status() if key == platform_key() else {
"platform": key,
"toolchain_dir": str(_user_toolchain_base()),
"bin_dir": str(final_platform_dir / "bin"),
"fonts_dir": str(final_platform_dir / "fonts"),
"tectonic_cache_dir": str(final_platform_dir / "tectonic-cache"),
"pandoc": final_pandoc,
"tex_engine": final_tectonic,
}
status["cache_warmed"] = cache_warmed
return {"ok": True, "operation": "install_toolchain", "source": source, "warnings": warnings, "errors": errors, "toolchain": status}
except Exception as exc:
shutil.rmtree(staging_platform_dir, ignore_errors=True)
_cleanup_download_cache(downloads_dir, warnings, "after failed installation attempt")
errors.append(str(exc))
status = toolchain_status() if key == platform_key() else {
"platform": key,
"toolchain_dir": str(_user_toolchain_base()),
"bin_dir": str(final_bin_dir),
"fonts_dir": str(final_platform_dir / "fonts"),
"tectonic_cache_dir": str(final_platform_dir / "tectonic-cache"),
"pandoc": str(final_bin_dir / executable_name("pandoc")) if (final_bin_dir / executable_name("pandoc")).exists() else "",
"tex_engine": str(final_bin_dir / executable_name("tectonic")) if (final_bin_dir / executable_name("tectonic")).exists() else "",
}
return {"ok": False, "operation": "install_toolchain", "source": source, "warnings": warnings, "errors": errors, "toolchain": status}