forked from Azure-Samples/document-intelligence-code-samples
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathgetmarkdown.py
More file actions
59 lines (50 loc) · 2.35 KB
/
getmarkdown.py
File metadata and controls
59 lines (50 loc) · 2.35 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
def get_markdown_format():
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeResult, ContentFormat
import os
# get parameters from environment variables
endpoint = os.environ["AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT"]
key = os.environ["AZURE_DOCUMENT_INTELLIGENCE_KEY"]
document_intelligence_client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(key))
path_to_input_document = "input3.pdf"
path_to_output_document = "output-layout2.md"
# Check if file exists and delete if it does
if os.path.exists(path_to_output_document):
os.remove(path_to_output_document)
with open(path_to_input_document, "rb") as f:
poller = document_intelligence_client.begin_analyze_document(
"prebuilt-layout",
analyze_request=f,
content_type="application/octet-stream",
output_content_format=ContentFormat.MARKDOWN
)
result: AnalyzeResult = poller.result()
#save result in markdown file
with open(path_to_output_document, "a") as f:
f.write(result.content)
if __name__ == "__main__":
from azure.core.exceptions import HttpResponseError
from dotenv import find_dotenv, load_dotenv
try:
load_dotenv(find_dotenv())
#analyze_layout()
get_markdown_format()
except HttpResponseError as error:
# Examples of how to check an HttpResponseError
# Check by error code:
if error.error is not None:
if error.error.code == "InvalidImage":
print(f"Received an invalid image error: {error.error}")
if error.error.code == "InvalidRequest":
print(f"Received an invalid request error: {error.error}")
# Raise the error again after printing it
raise
# If the inner error is None and then it is possible to check the message to get more information:
if "Invalid request".casefold() in error.message.casefold():
print(f"Uh-oh! Seems there was an invalid request: {error}")
# Raise the error again
raise
# Next steps:
# Learn more about Layout model: https://aka.ms/di-layout
# Find more sample code: https://aka.ms/doc-intelligence-samples