-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathapp.py
More file actions
101 lines (86 loc) · 2.79 KB
/
app.py
File metadata and controls
101 lines (86 loc) · 2.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from flask import Flask, jsonify, request
import inference_pdf
import inference_tabula
from async_pdf import save_pdf
import os
from shutil import rmtree
import logging
import pdb
logging.basicConfig(filename='./app.log', filemode='a', format='%(name)s - %(levelname)s - %(message)s')
app = Flask(__name__)
@app.route('/', methods=['GET'])
def getJsonHandlerHealth():
return jsonify({
'response': 200
})
@app.route('/get_pdf_tables', methods=['POST'])
def PostJsonHandlerLineTable():
content = request.get_json()
print(content)
try:
pdf_links = content['pdf_links']
if not isinstance(pdf_links, list):
return jsonify({
'code': 404,
'message': 'pdf in list not found ; try with ["xyz.pdf","abc.pdf"]'
})
else:
if len(pdf_links) > 10 or len(pdf_links) == 0:
return jsonify({
'code': 404,
'message': 'pdf list is empty or greater than 10'
})
else:
os.mkdir(os.path.abspath('.') + '/pdf')
pdf_paths = save_pdf(pdf_links)
except:
return jsonify({
'code': 404,
'message': 'No pdf found'
})
try:
lines = content['lines']
except:
lines = 0
method = 'google_vision'
# method = 'tesseract'
debug = 0
try:
if lines == 1:
final_df = {}
for i in pdf_paths:
pdf_path = os.path.join(os.path.abspath('.'), 'pdf', i.split('/')[-1].split('.')[0] + '.pdf')
df = inference_pdf.get_tables(pdf_path,debug,method)
final_df[i] = df
rmtree(os.path.abspath('.') + '/pdf')
return jsonify({
'code': 200,
'data': final_df
})
elif lines == 0:
final_df = {}
for i in pdf_paths:
pdf_path = os.path.join(os.path.abspath('.'), 'pdf', i.split('/')[-1].split('.')[0] + '.pdf')
df = inference_tabula.get_tables_without_lines(pdf_path)
final_df[i] = df
rmtree(os.path.abspath('.') + '/pdf')
return jsonify({
'code': 200,
'data': final_df
})
else:
rmtree(os.path.abspath('.') + '/pdf')
return jsonify({
'code': 200,
'data': {},
'message': 'Not supported for tables without ocr and lines'
})
except Exception as e:
logging.exception("Exception occurred")
rmtree(os.path.abspath('.') + '/pdf')
return jsonify({
'code': 404,
'message': 'Error!'
})
if __name__ == "__main__":
app.run(host='0.0.0.0', port='3002')