Skip to content
1 change: 1 addition & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
["empty_task_page_check"],
["water_in_the_text_check"],
["report_task_tracker"],
["references_in_chapter_check"],
]

DEFAULT_TYPE = 'pres'
Expand Down
1 change: 1 addition & 0 deletions app/main/checks/report_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@
from .task_tracker import ReportTaskTracker
from .paragraphs_count_check import ReportParagraphsCountCheck
from .template_name import ReportTemplateNameCheck
from .lit_ref_in_spec_chapter import LitRefInChapter
45 changes: 27 additions & 18 deletions app/main/checks/report_checks/banned_words_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

class ReportBannedWordsCheck(BaseReportCriterion):
label = "Проверка наличия запретных слов в тексте отчёта"
description = 'Запрещено упоминание слова "мы"'
description = 'Запрещено упоминание определенных "опасных" слов'
id = 'banned_words_check'

def __init__(self, file_info, headers_map=None):
super().__init__(file_info)
self.words = []
self.warned_words = []
self.min_count = 0
self.max_count = 0
if headers_map:
Expand All @@ -21,42 +22,50 @@ def __init__(self, file_info, headers_map=None):
def late_init(self):
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']]
self.words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']}
self.warned_words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['warned_words']}
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['max_count_for_banned_words_check']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']]
self.words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']}
self.warned_words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['warned_words']}
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['max_count_for_banned_words_check']

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
detected_lines = {}
result_str = f'<b>Запрещенные слова: {"; ".join(self.words)}</b><br>'
count = 0
banned_counter = {'words': self.words, 'detected_lines': {}, 'count': 0}
warned_counter = {'words': self.warned_words,'detected_lines': {}, 'count': 0}
for k, v in self.file.pdf_file.get_text_on_page().items():
lines_on_page = re.split(r'\n', v)
for index, line in enumerate(lines_on_page):
words_on_line = re.split(r'[^\w-]+', line)
words_on_line = [morph.normal_forms(word)[0] for word in words_on_line]
count_banned_words = set(words_on_line).intersection(self.words)
if count_banned_words:
count += len(count_banned_words)
if k not in detected_lines.keys():
detected_lines[k] = []
detected_lines[k].append(f'Строка {index + 1}: {line} <b>[{"; ".join(count_banned_words)}]</b>')
if len(detected_lines):
words_on_line = {morph.normal_forms(word)[0] for word in re.split(r'[^\w-]+', line)}
for counter in (banned_counter, warned_counter):
count_banned_words = words_on_line.intersection(counter['words'])
if count_banned_words:
counter['count'] += len(count_banned_words)
if k not in counter['detected_lines'].keys():
counter['detected_lines'][k] = []
counter['detected_lines'][k].append(f'Строка {index + 1}: {line} <b>[{"; ".join(count_banned_words)}]</b>')
if len(banned_counter['detected_lines']):
result_str += 'Обнаружены запретные слова! <br><br>'
for k, v in detected_lines.items():
result_str += f'Страница №{k}:<br>{"<br>".join(detected_lines[k])}<br><br>'
for k, v in banned_counter['detected_lines'].items():
result_str += f'Страница №{k}:<br>{"<br>".join(banned_counter['detected_lines'][k])}<br><br>'
else:
result_str = 'Пройдена!'

if len(warned_counter['detected_lines']):
result_str += f'<br><br>Обнаружены потенциально опасные слова (не влияют на результат проверки)!<br>Обратите внимание, что их использование возможно только в подтвержденных случаях: {"; ".join(self.warned_words)}<br><br>'
for k, v in warned_counter['detected_lines'].items():
result_str += f'Страница №{k}:<br>{"<br>".join(warned_counter['detected_lines'][k])}<br><br>'

result_score = 1
if count > self.min_count:
if count <= self.max_count:
if banned_counter['count'] > self.min_count:
if banned_counter['count'] <= self.max_count:
result_score = 0.5
else:
result_score = 0
Expand Down
96 changes: 96 additions & 0 deletions app/main/checks/report_checks/lit_ref_in_spec_chapter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import re
from .style_check_settings import StyleCheckSettings
from ..base_check import BaseReportCriterion, answer


class LitRefInChapter(BaseReportCriterion):
label = "Проверка количества ссылок на источники в определенном разделе"
description = ''
id = 'references_in_chapter_check'

def __init__(self, file_info, min_ref_value=0.5, max_ref_value=1, headers_map=None):
super().__init__(file_info)
self.chapters_for_lit_ref = {}
self.lit_ref_count = {}
self.min_ref_value = min_ref_value
self.max_ref_value = max_ref_value
if headers_map:
self.config = headers_map
else:
self.config = 'VKR_HEADERS' if (self.file_type['report_type'] == 'VKR') else 'LR_HEADERS'

def late_init(self):
self.chapters = self.file.make_chapters(self.file_type['report_type'])
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.chapters_for_lit_ref = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main][
'chapters_for_lit_ref']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.chapters_for_lit_ref = StyleCheckSettings.CONFIGS.get(self.config)['any_header'][
'chapters_for_lit_ref']

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
if not self.chapters_for_lit_ref:
return answer(True, 'Для загруженной работы данная проверка не предусмотрена.')
result = []
result_str = f'Пройдена!'
currant_head = ''
chapter_for_check = 0
ref_in_annotation = False
for chapter in self.chapters:
header = chapter["text"].lower()
if currant_head:
self.lit_ref_count[currant_head].append(chapter['number'])
if currant_head in self.chapters_for_lit_ref:
chapter_for_check += 1
ref_count = len(self.search_references(self.lit_ref_count[currant_head][0],
self.lit_ref_count[currant_head][1]))
if ref_count > self.chapters_for_lit_ref[currant_head][1] or ref_count < \
self.chapters_for_lit_ref[currant_head][0]:
result.append(f'«{currant_head[0].upper() + currant_head[1:]}» : {ref_count}')
if currant_head == 'аннотация' or currant_head == 'annotation':
ref_in_annotation = True
self.lit_ref_count[header] = [chapter['number'], ]
currant_head = header
if result:
if chapter_for_check > 0:
ref_value = round((chapter_for_check - len(result)) / chapter_for_check, 2)
else:
ref_value = 1.0
result_str = (f'Доля соответствия количества ссылок необходимому в требуемых разделах равна {ref_value}'
f'<br><b>Количество ссылок на источники не удовлетворяет допустимому в следующих разделах:</b> <br> {"<br>".join(res for res in result)}'
f'<br><b> Допустимые пороги количества ссылок:</b> <br>'
f'{"<br>".join(f"«{chapter.capitalize()}»: от {limit[0]} до {limit[1]}" for chapter, limit in self.chapters_for_lit_ref.items())}')
result_str += '<b>В аннотации не должно быть ссылок на литературу.</b>' if ref_in_annotation else ''
if ref_value >= self.max_ref_value and not ref_in_annotation:
return answer(1, f'Пройдена!')
elif ref_value >= self.min_ref_value and not ref_in_annotation:
return answer(ref_value, f'Частично пройдена! {result_str}')
else:
return answer(0, f'Не пройдена! {result_str}')
elif ref_in_annotation:
return answer(0, 'В аннотации не должно быть ссылок на литературу.')
else:
return answer(1, result_str)

def search_references(self, start_par, end_par):
array_of_references = []
for i in range(start_par, end_par):
if isinstance(self.file.paragraphs[i], str):
detected_references = re.findall(r'\[[\d \-,]+\]', self.file.paragraphs[i])
else:
detected_references = re.findall(r'\[[\d \-,]+\]', self.file.paragraphs[i].paragraph_text)
if detected_references:
for reference in detected_references:
for one_part in re.split(r'[\[\],]', reference):
if re.match(r'\d+[ \-]+\d+', one_part):
start, end = re.split(r'[ -]+', one_part)
for k in range(int(start), int(end) + 1):
array_of_references.append((k))
elif one_part != '':
array_of_references.append(int(one_part))
return array_of_references
11 changes: 1 addition & 10 deletions app/main/checks/report_checks/main_character_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,12 +82,6 @@ def extract_table_contents(self, table):
contents.append("|".join(row_text))
return contents

def calculate_find_value(self, table, index):
count = int((len(table) - index - 2) / 2)
if count >= 0:
return count
return 0

def check_table(self, check_list, table, table_num):
for item in check_list:
for i, line in enumerate(table):
Expand All @@ -105,10 +99,7 @@ def check_table(self, check_list, table, table_num):
continue

elif item["key"] in ["Зав. кафедрой", "Консультант"] and item["found_key"] > 0:
if item["key"] == "Консультант":
if item["found_key"] == 1:
item["find"] += self.calculate_find_value(table, i)
for value in item["value"]:
if re.search(value, line):
if "Руководитель" not in line and re.search(value, line): # исключаем из поиска строки с рукодителем
item["found_value"] += 1
item["logs"] += f"'{item['key']}': значение компоненты '{value}' найдено в строке '{line}' в таблице №{table_num}<br>"
Loading