-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathDocumentClass.py
More file actions
180 lines (130 loc) · 5.34 KB
/
DocumentClass.py
File metadata and controls
180 lines (130 loc) · 5.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import json
from Table import Table
"""
Description: a unified class representing a text document, its properties and content
Parameters:
----------
__owner - attribute specifies the owner of a document,
__time - attribute specifies document creation time,
__content - attribute specifies the conten of a document
Methods
----------
addContent(id, paragraph)
Adds a paragraph to the content list
ptToSm(value)
Converts topographical points to centimeters
dmToSm(value)
Converts inches to centimeters
createJsonToClasifier(listOfAttr)
Creates and returns a json string, which will later be sent to the classifier
requestToClasify(jsonText, api =)
Sends a request to the classification module
writeCSV(path):
Generates csv file based on content
"""
class Class:
def __init__(self, owner, time):
self.__owner = owner
self.__time = time
self.__content = {}
def addContent(self, paragraph_id, paragraph):
"""
Adds a paragraph to the content list
:param paragraph_id: Paragraph number to be added
:param paragraph: The paragraph to be added as a Paragraph class
"""
self.content[paragraph_id] = paragraph
## Пункт в сантиметры
@classmethod
def ptToSm(cls, value):
"""
Converts topographical points to centimeters
:param value: Conventional value
:return: The resulting value in centimeters
"""
return value/28.346
## Дюйм в сантиметры
@classmethod
def dmToSm(cls, value):
"""
Converts inches to centimeters
:param value: Conventional value
:return: The resulting value in centimeters
"""
return value * 2.54
def createJsonToClasifier(self, listOfAttr = ["countOfSpSbl","countSbl","lowercase","uppercase","lastSbl",
"firstkey","prevEl","curEl","nexEl","bold","italics",
"keepLinesTogether","keepWithNext", "outlineLevel",
"pageBreakBefore"]
):
"""
Creates and returns a json string, which will later be sent to the classifier
:param listOfAttr: List of attributes included in json string
:return jsonText: Generated Json string
"""
s ="{"
for attribute in dir(self):
if attribute == "time" or attribute == "owner":
s = s + "\"" + attribute + "\": \"" + str(getattr(self,attribute)) + "\", "
s = s + "\"paragraphs\": {"
for i, p in self.content.items():
if p.__class__ != Table :
s = s + "\"" + str(i) + "\": {\""
for attribute in dir(p):
if not attribute.startswith('_') and attribute in listOfAttr:
s = s + attribute + "\": \"" + str(getattr(p,attribute)) + "\",\""
l = len(s)
s = s[:l - 2] + "}, "
l = len(s)
s = s[:l - 2] + "}}"
jsonText = json.loads(s)
return jsonText
@classmethod
def requestToClasify(cls, jsonText, api = "http://127.0.0.1:8001/clasify"):
"""
Sends a request to the classification module
:param jsonText: The json string to send
:param api: API where the request is sent
:return response: Response received from the API
"""
import requests
response = requests.post(api, json= jsonText)
return response
def writeCSV(self, path = 'pdftocsv.csv'):
"""
Generates csv file based on content
:param path: Path to save csv file
"""
import csv
with open(path, 'w', newline='', encoding="utf-8") as csvfile:
filewriter = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
filewriter.writerow(["text","countOfSpSbl","countSbl","uppercase", "lowercase","fontName","lastSbl",
"firstkey","indent","lineSpacing","textSize"])
for key in self.content.keys():
if type(self.content.get(key))!= Table:
filewriter.writerow([self.content.get(key).text, self.content.get(key).countOfSpSbl ,
self.content.get(key).countSbl,self.content.get(key).uppercase,
self.content.get(key).lowercase, self.content.get(key).fontName,
self.content.get(key).lastSbl,self.content.get(key).firstkey,
self.content.get(key).indent, self.content.get(key).lineSpacing,
self.content.get(key).textSize])
else:
filewriter.writerow([self.content.get(key).text])
@property
def content(self):
return self.__content
@property
def time(self):
return self.__time
@property
def owner(self):
return self.__owner
@owner.setter
def owner(self, owner):
self.__owner = owner
@time.setter
def time(self, time):
self.__time = time
@content.setter
def content(self, content):
self.__content = content