Skip to content

Commit 6933524

Browse files
author
Tom Softreck
committed
Release version 0.1.45
### Added - Changes in examples/pdf_to_base64_example.py
1 parent 9da3890 commit 6933524

9 files changed

+129
-29
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## [0.1.45] - 2025-04-16
6+
7+
### Added
8+
- Changes in examples/pdf_to_base64_example.py
9+
510
## [0.1.44] - 2025-04-16
611

712
## [0.1.43] - 2025-04-16

examples/pdf_to_base64_example.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
"""
5+
Example demonstrating how to convert a PDF to base64 encoded image.
6+
This example shows how to use the PDF conversion functionality in pyfunc2.
7+
"""
8+
9+
import os
10+
import sys
11+
import base64
12+
from PIL import Image
13+
from pdf2image import convert_from_path
14+
15+
def convert_pdf_to_base64(pdf_path, extension="png", dpi=100):
16+
"""
17+
Converts the first page of a PDF to base64 (PNG/JPEG) directly from memory.
18+
19+
Args:
20+
pdf_path (str): Path to the PDF file
21+
extension (str): Output image format ('png' or 'jpg')
22+
dpi (int): Resolution for the output image
23+
24+
Returns:
25+
str: Base64 encoded string of the image
26+
27+
Requires:
28+
pip install pdf2image pillow
29+
Poppler must be installed on the system
30+
"""
31+
# Convert PDF to a list of images (each page as a separate image)
32+
images = convert_from_path(pdf_path, dpi=dpi, fmt=extension, single_file=True)
33+
34+
if not images:
35+
raise ValueError("Failed to convert PDF to image.")
36+
37+
img = images[0] # Get the first page
38+
39+
# Save image to in-memory file
40+
import io
41+
in_mem_file = io.BytesIO()
42+
43+
# Choose format based on extension
44+
fmt = extension.upper()
45+
if fmt == "JPG":
46+
fmt = "JPEG"
47+
48+
img.save(in_mem_file, format=fmt)
49+
in_mem_file.seek(0)
50+
51+
# Read bytes and encode to base64
52+
img_bytes = in_mem_file.read()
53+
base64_encoded_result_bytes = base64.b64encode(img_bytes)
54+
base64_encoded_result_str = base64_encoded_result_bytes.decode('ascii')
55+
56+
return base64_encoded_result_str
57+
58+
59+
def main():
60+
"""
61+
Main function to demonstrate the PDF to base64 conversion.
62+
"""
63+
# Check if a PDF path was provided
64+
if len(sys.argv) < 2:
65+
print("Usage: python pdf_to_base64_example.py <path_to_pdf> [extension] [dpi]")
66+
print("Example: python pdf_to_base64_example.py sample.pdf png 150")
67+
return
68+
69+
# Get parameters from command line arguments
70+
pdf_path = sys.argv[1]
71+
extension = sys.argv[2] if len(sys.argv) > 2 else "png"
72+
dpi = int(sys.argv[3]) if len(sys.argv) > 3 else 100
73+
74+
if not os.path.exists(pdf_path):
75+
print(f"Error: File '{pdf_path}' does not exist.")
76+
return
77+
78+
try:
79+
# Convert PDF to base64
80+
base64_result = convert_pdf_to_base64(pdf_path, extension, dpi)
81+
82+
# Print the first 100 characters of the base64 string
83+
print(f"Base64 encoded {extension.upper()} (first 100 chars):")
84+
print(base64_result[:100] + "...")
85+
86+
# Optionally save the base64 string to a file
87+
output_file = f"{os.path.splitext(pdf_path)[0]}_base64.txt"
88+
with open(output_file, "w") as f:
89+
f.write(base64_result)
90+
91+
print(f"\nFull base64 string saved to: {output_file}")
92+
93+
# Show how to use the base64 string in HTML
94+
print("\nHTML usage example:")
95+
print(f'<img src="data:image/{extension};base64,{base64_result[:20]}..." />')
96+
97+
except Exception as e:
98+
print(f"Error: {str(e)}")
99+
print("\nMake sure you have installed the required dependencies:")
100+
print("pip install pdf2image pillow")
101+
print("And Poppler is installed on your system.")
102+
103+
104+
if __name__ == "__main__":
105+
main()
106+
107+
# python examples/pdf_to_base64_example.py example_invoice.pdf png 150

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
# Configuration setup
1919
setup(
2020
name="pyfunc2",
21-
version="0.1.42",
21+
version="0.1.43",
2222
description="libs for cameramonit, ocr, fin-officer, cfo, and other projects",
2323
long_description=LONG_DESCRIPTION,
2424
long_description_content_type="text/markdown",

setup.py.bak

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ except FileNotFoundError:
1818
# Configuration setup
1919
setup(
2020
name="pyfunc2",
21-
version="0.1.41",
21+
version="0.1.42",
2222
description="libs for cameramonit, ocr, fin-officer, cfo, and other projects",
2323
long_description=LONG_DESCRIPTION,
2424
long_description_content_type="text/markdown",

src/pyfunc2/markdown/create_dir_structure_from_headers.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
import os
33
import sys
44

5-
sys.path.append('../')
6-
from pyfunc.markdown.get_url_list import get_url_list
7-
from pyfunc.markdown.get_header_list import get_header_list
8-
9-
105
# markdown_file
116
# source - path to source folder
127
# pattern - regular expression pattern for Markdown headers
@@ -27,3 +22,6 @@ def create_dir_structure_from_headers(markdown_file="",
2722
os.makedirs(path_folder)
2823

2924

25+
26+
from .get_url_list import get_url_list
27+
from .get_header_list import get_header_list

src/pyfunc2/markdown/create_folders_files.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,10 @@
33
import sys
44
import string
55

6-
sys.path.append('../')
7-
from pyfunc.markdown.get_url_list import get_url_list
8-
from pyfunc.markdown.get_dictionary_structure_from_headers_content import get_dictionary_structure_from_headers_content
9-
from pyfunc.markdown.get_dictionary_structure_by_separator_list import get_dictionary_structure_by_separator_list
10-
from pyfunc.markdown.get_code_extension_dict import get_code_extension_dict
6+
from .get_url_list import get_url_list
7+
from .get_dictionary_structure_from_headers_content import get_dictionary_structure_from_headers_content
8+
from .get_dictionary_structure_by_separator_list import get_dictionary_structure_by_separator_list
9+
from .get_code_extension_dict import get_code_extension_dict
1110

1211

1312
# markdown_file
@@ -70,5 +69,3 @@ def create_folders_files(markdown_file="",
7069
except Exception as e:
7170
print(e)
7271
continue
73-
74-

src/pyfunc2/markdown/get_code_extension_dict.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,9 @@
33
import sys
44
import string
55

6-
sys.path.append('../')
7-
from pyfunc.markdown.get_url_list import get_url_list
8-
from pyfunc.markdown.get_dictionary_structure_from_headers_content import get_dictionary_structure_from_headers_content
9-
from pyfunc.markdown.get_dictionary_structure_by_separator_list import get_dictionary_structure_by_separator_list
6+
from .get_url_list import get_url_list
7+
from .get_dictionary_structure_from_headers_content import get_dictionary_structure_from_headers_content
8+
from .get_dictionary_structure_by_separator_list import get_dictionary_structure_by_separator_list
109

1110

1211
def get_code_extension_dict(

src/pyfunc2/markdown/get_dictionary_structure_by_separator_list.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
import os
33
import sys
44

5-
sys.path.append('../')
6-
from pyfunc.markdown.get_url_list import get_url_list
7-
from pyfunc.markdown.get_header_list import get_header_list
8-
9-
105
# markdown_file
116
# source - path to source folder
127
# pattern - regular expression pattern for Markdown headers
@@ -41,4 +36,5 @@ def get_dictionary_structure_by_separator_list(markdown = "", separator_list=['`
4136

4237
return code_blocks
4338

44-
39+
from .get_url_list import get_url_list
40+
from .get_header_list import get_header_list

src/pyfunc2/markdown/get_dictionary_structure_from_headers_content.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,6 @@
22
import os
33
import sys
44

5-
sys.path.append('../')
6-
from pyfunc.markdown.get_url_list import get_url_list
7-
from pyfunc.markdown.get_header_list import get_header_list
8-
9-
105
# markdown_file
116
# source - path to source folder
127
# pattern - regular expression pattern for Markdown headers
@@ -30,3 +25,6 @@ def get_dictionary_structure_from_headers_content(markdown_file="", separator_li
3025
data[current_section] += line
3126

3227
return data
28+
29+
from .get_url_list import get_url_list
30+
from .get_header_list import get_header_list

0 commit comments

Comments
 (0)