|
| 1 | +#!/usr/bin/env python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +""" |
| 5 | +Example demonstrating how to convert a PDF to base64 encoded image. |
| 6 | +This example shows how to use the PDF conversion functionality in pyfunc2. |
| 7 | +""" |
| 8 | + |
| 9 | +import os |
| 10 | +import sys |
| 11 | +import base64 |
| 12 | +from PIL import Image |
| 13 | +from pdf2image import convert_from_path |
| 14 | + |
| 15 | +def convert_pdf_to_base64(pdf_path, extension="png", dpi=100): |
| 16 | + """ |
| 17 | + Converts the first page of a PDF to base64 (PNG/JPEG) directly from memory. |
| 18 | +
|
| 19 | + Args: |
| 20 | + pdf_path (str): Path to the PDF file |
| 21 | + extension (str): Output image format ('png' or 'jpg') |
| 22 | + dpi (int): Resolution for the output image |
| 23 | +
|
| 24 | + Returns: |
| 25 | + str: Base64 encoded string of the image |
| 26 | +
|
| 27 | + Requires: |
| 28 | + pip install pdf2image pillow |
| 29 | + Poppler must be installed on the system |
| 30 | + """ |
| 31 | + # Convert PDF to a list of images (each page as a separate image) |
| 32 | + images = convert_from_path(pdf_path, dpi=dpi, fmt=extension, single_file=True) |
| 33 | + |
| 34 | + if not images: |
| 35 | + raise ValueError("Failed to convert PDF to image.") |
| 36 | + |
| 37 | + img = images[0] # Get the first page |
| 38 | + |
| 39 | + # Save image to in-memory file |
| 40 | + import io |
| 41 | + in_mem_file = io.BytesIO() |
| 42 | + |
| 43 | + # Choose format based on extension |
| 44 | + fmt = extension.upper() |
| 45 | + if fmt == "JPG": |
| 46 | + fmt = "JPEG" |
| 47 | + |
| 48 | + img.save(in_mem_file, format=fmt) |
| 49 | + in_mem_file.seek(0) |
| 50 | + |
| 51 | + # Read bytes and encode to base64 |
| 52 | + img_bytes = in_mem_file.read() |
| 53 | + base64_encoded_result_bytes = base64.b64encode(img_bytes) |
| 54 | + base64_encoded_result_str = base64_encoded_result_bytes.decode('ascii') |
| 55 | + |
| 56 | + return base64_encoded_result_str |
| 57 | + |
| 58 | + |
| 59 | +def main(): |
| 60 | + """ |
| 61 | + Main function to demonstrate the PDF to base64 conversion. |
| 62 | + """ |
| 63 | + # Check if a PDF path was provided |
| 64 | + if len(sys.argv) < 2: |
| 65 | + print("Usage: python pdf_to_base64_example.py <path_to_pdf> [extension] [dpi]") |
| 66 | + print("Example: python pdf_to_base64_example.py sample.pdf png 150") |
| 67 | + return |
| 68 | + |
| 69 | + # Get parameters from command line arguments |
| 70 | + pdf_path = sys.argv[1] |
| 71 | + extension = sys.argv[2] if len(sys.argv) > 2 else "png" |
| 72 | + dpi = int(sys.argv[3]) if len(sys.argv) > 3 else 100 |
| 73 | + |
| 74 | + if not os.path.exists(pdf_path): |
| 75 | + print(f"Error: File '{pdf_path}' does not exist.") |
| 76 | + return |
| 77 | + |
| 78 | + try: |
| 79 | + # Convert PDF to base64 |
| 80 | + base64_result = convert_pdf_to_base64(pdf_path, extension, dpi) |
| 81 | + |
| 82 | + # Print the first 100 characters of the base64 string |
| 83 | + print(f"Base64 encoded {extension.upper()} (first 100 chars):") |
| 84 | + print(base64_result[:100] + "...") |
| 85 | + |
| 86 | + # Optionally save the base64 string to a file |
| 87 | + output_file = f"{os.path.splitext(pdf_path)[0]}_base64.txt" |
| 88 | + with open(output_file, "w") as f: |
| 89 | + f.write(base64_result) |
| 90 | + |
| 91 | + print(f"\nFull base64 string saved to: {output_file}") |
| 92 | + |
| 93 | + # Show how to use the base64 string in HTML |
| 94 | + print("\nHTML usage example:") |
| 95 | + print(f'<img src="data:image/{extension};base64,{base64_result[:20]}..." />') |
| 96 | + |
| 97 | + except Exception as e: |
| 98 | + print(f"Error: {str(e)}") |
| 99 | + print("\nMake sure you have installed the required dependencies:") |
| 100 | + print("pip install pdf2image pillow") |
| 101 | + print("And Poppler is installed on your system.") |
| 102 | + |
| 103 | + |
| 104 | +if __name__ == "__main__": |
| 105 | + main() |
| 106 | + |
| 107 | +# python examples/pdf_to_base64_example.py example_invoice.pdf png 150 |
0 commit comments