forked from tomaszleszczynski1980/InvoiceProcessing
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain2.py
More file actions
45 lines (34 loc) · 1.2 KB
/
main2.py
File metadata and controls
45 lines (34 loc) · 1.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# this is the simplest example of usage Tesseract with Python
import cv2
import pytesseract
from pytesseract import Output
from PIL import Image
try:
from PIL import Image
except ImportError:
import Image
filename='images/DRP90461344.pdf.png'
img = cv2.imread(filename)
# French text image to string
#print(pytesseract.image_to_string(Image.open('test-european.jpg'), lang='fra'))
#print(pytesseract.image_to_string(filename))
print(pytesseract.image_to_string(filename, lang='eng'))
# Get information about orientation and script detection
#print(pytesseract.image_to_osd(Image.open(filename)))
exit()
# all ocr results stored in dictionary
#content = pytesseract.image_to_data(img, lang='pol', output_type=Output.DICT)
content = pytesseract.image_to_data(img, output_type=Output.DICT)
print(content.keys())
#print(content)
# we can print text (which is list, thus we can easily find particular expression in it)
print(content['text'])
# or join it to have a string
output = " ".join(content['text'])
print(output)
# custom_config = r'--oem 3 --psm 6'
# txt_content = (pytesseract.image_to_string(img, config=custom_config))
#
#
# with open('results/invoice.txt', 'w') as file:
# file.write(txt_content)