-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy paths3ObjectToText.py
More file actions
40 lines (26 loc) · 945 Bytes
/
s3ObjectToText.py
File metadata and controls
40 lines (26 loc) · 945 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import boto3
import io
from PIL import Image
import pytesseract
def handler(event, context):
s3 = boto3.client('s3')
pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
bucketname = "avengers-initiative"
filename = "2318727.jpg"
response = s3.get_object(
Bucket=bucketname,
Key=filename
)
# print(bucketname, filename)
file_content = response["Body"].read()
# print(response, file_content)
img = Image.open(io.BytesIO(file_content))
text = pytesseract.image_to_string(img)
return(text)
# s3 = boto3.client('s3')
# s3.download_file('avengers-initiative', 'original.jpg', 'original1.jpg')
# # Set the path to the Tesseract executable (adjust this according to your system)
# pytesseract.pytesseract.tesseract_cmd = '/usr/bin/tesseract'
# img = Image.open("original1.jpg")
# text = pytesseract.image_to_string(img)
# return text