Project Files
python / extract_image_page.py
#!/usr/bin/env python3
"""
PyMuPDF-based PDF page renderer.
Renders a single page to PNG and writes it to an output path.
Usage: extract_image_page.py <pdf_path> <page_number_1based> <output_path> [dpi]
"""
import sys
import json
import traceback
def render_page(pdf_path: str, page_1based: int, output_path: str, dpi: int) -> dict:
try:
import fitz # PyMuPDF
except ImportError:
return {
"success": False,
"error": "PyMuPDF not installed. Run: pip install pymupdf",
}
try:
doc = fitz.open(pdf_path)
except Exception as e:
return {"success": False, "error": f"Could not open PDF: {e}"}
page_count = doc.page_count
page_0based = page_1based - 1
if page_0based < 0 or page_0based >= page_count:
doc.close()
return {
"success": False,
"error": f"Page {page_1based} out of range (document has {page_count} page(s))",
}
try:
page = doc[page_0based]
scale = dpi / 72.0
mat = fitz.Matrix(scale, scale)
pix = page.get_pixmap(matrix=mat, alpha=False)
pix.save(output_path)
width = pix.width
height = pix.height
doc.close()
return {
"success": True,
"output_path": output_path,
"width": width,
"height": height,
"page": page_1based,
"page_count": page_count,
"dpi": dpi,
}
except Exception as e:
doc.close()
return {
"success": False,
"error": f"Page render failed: {e}\n{traceback.format_exc()}",
}
def main():
if len(sys.argv) < 4:
print(json.dumps({
"success": False,
"error": "Usage: extract_image_page.py <pdf_path> <page_1based> <output_path> [dpi]",
}))
sys.exit(1)
pdf_path = sys.argv[1]
try:
page_1based = int(sys.argv[2])
except ValueError:
print(json.dumps({"success": False, "error": "page must be an integer"}))
sys.exit(1)
output_path = sys.argv[3]
dpi = 150
if len(sys.argv) >= 5:
try:
dpi = int(sys.argv[4])
except ValueError:
pass
result = render_page(pdf_path, page_1based, output_path, dpi)
print(json.dumps(result))
sys.exit(0 if result["success"] else 1)
if __name__ == "__main__":
main()