From 731625778dae41fa37cb4962f87b823b711cc5c4 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Fri, 21 Nov 2025 10:32:50 +0300 Subject: [PATCH 01/28] Checks abbreviations press and reports --- app/main/check_packs/pack_config.py | 2 + .../checks/presentation_checks/__init__.py | 1 + .../abbreviations_presentation.py | 106 ++++++++++++++++++ app/main/checks/report_checks/__init__.py | 1 + .../report_checks/abbreviations_check.py | 100 +++++++++++++++++ 5 files changed, 210 insertions(+) create mode 100644 app/main/checks/presentation_checks/abbreviations_presentation.py create mode 100644 app/main/checks/report_checks/abbreviations_check.py diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 91e08134..4f8d66ea 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -22,6 +22,7 @@ ['pres_image_capture'], ['task_tracker'], ['overview_in_tasks'], + ['PresAbbreviationsCheck'], ] BASE_REPORT_CRITERION = [ ["simple_check"], @@ -50,6 +51,7 @@ ["empty_task_page_check"], ["water_in_the_text_check"], ["report_task_tracker"], + ["abbreviations_check"], ] DEFAULT_TYPE = 'pres' diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py index 52bd5f73..db54ae53 100644 --- a/app/main/checks/presentation_checks/__init__.py +++ b/app/main/checks/presentation_checks/__init__.py @@ -17,3 +17,4 @@ from .name_of_image_check import PresImageCaptureCheck from .task_tracker import TaskTracker from .overview_in_tasks import OverviewInTasks +from .abbreviations_presentation import PresAbbreviationsCheck diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py new file mode 100644 index 00000000..fea02c03 --- /dev/null +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -0,0 +1,106 @@ +import re +from ..base_check import BasePresCriterion, answer + + +class PresAbbreviationsCheck(BasePresCriterion): + label = "Проверка расшифровки аббревиатур в презентации" + description = "Все аббревиатуры должны быть расшифрованы при первом использовании" + id = 'pres_abbreviations_check' + + def __init__(self, file_info): + super().__init__(file_info) + + def check(self): + try: + slides_text = self.file.get_text_from_slides() + + if not slides_text: + return answer(False, "Не удалось получить текст презентации") + + full_text = " ".join(slides_text) + + abbreviations = self._find_abbreviations(full_text) + + if not abbreviations: + return answer(True, "Аббревиатуры не найдены в презентации") + + unexplained_abbr_with_slides = [] + for abbr in abbreviations: + slides_with_abbr = self._find_abbreviation_slides(abbr, slides_text) + + if not self._is_abbreviation_explained(abbr, full_text): + unexplained_abbr_with_slides.append({ + 'abbr': abbr, + 'slides': slides_with_abbr + }) + + + if unexplained_abbr_with_slides: + result_str = "Найдены нерасшифрованные аббревиатуры:
" + + for item in unexplained_abbr_with_slides: + slide_links = self.format_page_link(item['slides']) + result_str += f"- {item['abbr']} (слайды: {', '.join(slide_links)})
" + + result_str += "
Каждая аббревиатура должна быть расшифрована при первом использовании в презентации." + return answer(False, result_str) + + else: + return answer(True, "Все аббревиатуры правильно расшифрованы") + + except Exception as e: + return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}") + + def _find_abbreviation_slides(self, abbr: str, slides_text: list) -> list: + found_slides = [] + + for slide_num, slide_text in enumerate(slides_text, 1): + pattern = rf'\b{re.escape(abbr)}\b' + if re.search(pattern, slide_text, re.IGNORECASE): + found_slides.append(slide_num) + + return found_slides + + def _find_abbreviations(self, text: str): + pattern = r'\b[А-ЯA-Z]{2,5}\b' + abbreviations = re.findall(pattern, text) + + common_abbr = { + 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS', + 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'DVD', 'SSD', 'PC', 'HDD', + 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', + 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', + 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', + 'IP', 'EIP', 'RIP', + 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', + 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', + 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', + 'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT', + 'ASCII', 'UTF', 'UNICODE', 'ANSI', + 'ЭВМ', 'МОЭВМ', + 'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM', + 'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP', + 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', + 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', + 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', + 'MAC', 'IBM' + } + filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr] + + return list(set(filtered_abbr)) + + def _is_abbreviation_explained(self, abbr: str, text: str) -> bool: + patterns = [ + rf'{abbr}\s*\([^)]+\)', # АААА (расшифровка) + rf'\([^)]+\)\s*{abbr}', # (расшифровка) АААА + rf'{abbr}\s*—\s*[^.,;!?]+', # АААА — расшифровка + rf'{abbr}\s*-\s*[^.,;!?]+', # АААА - расшифровка + rf'[^.,;!?]+\s*—\s*{abbr}', # расшифровка — АААА + rf'[^.,;!?]+\s*-\s*{abbr}' # расшифровка - АААА + ] + + for pattern in patterns: + if re.search(pattern, text): + return True + + return False \ No newline at end of file diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index 0ed2a8dc..4cda7b53 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -32,3 +32,4 @@ from .sw_section_size import SWSectionSizeCheck from .sw_keywords_check import SWKeywordsCheck from .task_tracker import ReportTaskTracker +from .abbreviations_check import AbbreviationsCheck diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py new file mode 100644 index 00000000..593d6d6d --- /dev/null +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -0,0 +1,100 @@ +import re +from ..base_check import BaseReportCriterion, answer + + +class AbbreviationsCheck(BaseReportCriterion): + label = "Проверка расшифровки аббревиатур" + description = "Все аббревиатуры должны быть расшифрованы при первом использовании" + id = 'abbreviations_check' + + def __init__(self, file_info): + super().__init__(file_info) + + + def check(self): + try: + text = self._get_document_text() + + if not text: + return answer(False, "Не удалось получить текст документа") + + abbreviations = self._find_abbreviations(text) + + if not abbreviations: + return answer(True, "Аббревиатуры не найдены в документе") + + unexplained_abbr = [] + for abbr in abbreviations: + if not self._is_abbreviation_explained(abbr, text): + unexplained_abbr.append(abbr) + + if unexplained_abbr: + result_str = f"Найдены нерасшифрованные аббревиатуры: {', '.join(unexplained_abbr)}
" + result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте." + return answer(False, result_str) + else: + return answer(True, "Все аббревиатуры правильно расшифрованы") + + except Exception as e: + return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}") + + + def _get_document_text(self): + + if hasattr(self.file, 'pdf_file'): + page_texts = self.file.pdf_file.get_text_on_page() + return " ".join(page_texts.values()) + elif hasattr(self.file, 'paragraphs'): + text_parts = [] + for paragraph in self.file.paragraphs: + text = paragraph.to_string() + if '\n' in text: + text = text.split('\n')[1] + text_parts.append(text) + return "\n".join(text_parts) + return None + + def _find_abbreviations(self, text: str): + pattern = r'\b[А-ЯA-Z]{2,5}\b' + abbreviations = re.findall(pattern, text) + + common_abbr = { + 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS', + 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'DVD', 'SSD', 'PC', 'HDD', + 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', + 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', + 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', + 'IP', 'EIP', 'RIP', + 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', + 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', + 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', + 'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT', + 'ASCII', 'UTF', 'UNICODE', 'ANSI', + 'ЭВМ', 'МОЭВМ', + 'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM', + 'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP', + 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', + 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', + 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', + 'MAC', 'IBM' + } + filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr] + + return list(set(filtered_abbr)) + + + def _is_abbreviation_explained(self, abbr: str, text: str) -> bool: + patterns = [ + rf'{abbr}\s*\([^)]+\)', # АААА (расшифровка) + rf'\([^)]+\)\s*{abbr}', # (расшифровка) АААА + rf'{abbr}\s*—\s*[^.,;!?]+', # АААА — расшифровка + rf'{abbr}\s*-\s*[^.,;!?]+', # АААА - расшифровка + rf'[^.,;!?]+\s*—\s*{abbr}', # расшифровка — АААА + rf'[^.,;!?]+\s*-\s*{abbr}' # расшифровка - АААА + ] + + for pattern in patterns: + if re.search(pattern, text, re.IGNORECASE): + return True + + return False From c89d506e4ecf73d97627da2bb013e111a8941a14 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Fri, 21 Nov 2025 14:24:10 +0300 Subject: [PATCH 02/28] Fixed if present check --- app/main/check_packs/pack_config.py | 1 + .../checks/presentation_checks/abbreviations_presentation.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 4f8d66ea..01f6509a 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -23,6 +23,7 @@ ['task_tracker'], ['overview_in_tasks'], ['PresAbbreviationsCheck'], + ['abbreviations_check'] ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index fea02c03..5d2b4dc2 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -5,7 +5,7 @@ class PresAbbreviationsCheck(BasePresCriterion): label = "Проверка расшифровки аббревиатур в презентации" description = "Все аббревиатуры должны быть расшифрованы при первом использовании" - id = 'pres_abbreviations_check' + id = 'abbreviations_check_pres' def __init__(self, file_info): super().__init__(file_info) From 8394b5a2e00c9d9f3105682e28b516e57a24844f Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Mon, 1 Dec 2025 11:41:13 +0300 Subject: [PATCH 03/28] abbr taken out --- app/main/check_packs/pack_config.py | 3 +- app/main/checks/check_abbreviations.py | 62 ++++++++++++++ .../abbreviations_presentation.py | 83 ++++-------------- .../report_checks/abbreviations_check.py | 85 ++++++------------- 4 files changed, 105 insertions(+), 128 deletions(-) create mode 100644 app/main/checks/check_abbreviations.py diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 01f6509a..008d3c99 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -22,8 +22,7 @@ ['pres_image_capture'], ['task_tracker'], ['overview_in_tasks'], - ['PresAbbreviationsCheck'], - ['abbreviations_check'] + ['abbreviations_check_pres'], ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py new file mode 100644 index 00000000..791b12a9 --- /dev/null +++ b/app/main/checks/check_abbreviations.py @@ -0,0 +1,62 @@ +import re + +def get_unexplained_abbrev(text): + abbreviations = find_abbreviations(text) + + if not abbreviations: + return False, None + + unexplained_abbr = [] + for abbr in abbreviations: + if not is_abbreviation_explained(abbr, text): + unexplained_abbr.append(abbr) + + return True, unexplained_abbr + + + + +def find_abbreviations(text: str): + pattern = r'\b[А-ЯA-Z]{2,5}\b' + abbreviations = re.findall(pattern, text) + + common_abbr = { + 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS', + 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'DVD', 'SSD', 'PC', 'HDD', + 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', + 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', + 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', + 'IP', 'EIP', 'RIP', + 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', + 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', + 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', + 'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT', + 'ASCII', 'UTF', 'UNICODE', 'ANSI', + 'ЭВМ', 'МОЭВМ', + 'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM', + 'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP', + 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', + 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', + 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', + 'MAC', 'IBM' + } + filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr] + + return list(set(filtered_abbr)) + + +def is_abbreviation_explained(abbr: str, text: str) -> bool: + patterns = [ + rf'{abbr}\s*\([^)]+\)', # АААА (расшифровка) + rf'\([^)]+\)\s*{abbr}', # (расшифровка) АААА + rf'{abbr}\s*—\s*[^.,;!?]+', # АААА — расшифровка + rf'{abbr}\s*-\s*[^.,;!?]+', # АААА - расшифровка + rf'[^.,;!?]+\s*—\s*{abbr}', # расшифровка — АААА + rf'[^.,;!?]+\s*-\s*{abbr}' # расшифровка - АААА + ] + + for pattern in patterns: + if re.search(pattern, text, re.IGNORECASE): + return True + + return False diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index 5d2b4dc2..6444919b 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -1,5 +1,6 @@ import re from ..base_check import BasePresCriterion, answer +from ..check_abbreviations import get_unexplained_abbrev class PresAbbreviationsCheck(BasePresCriterion): @@ -18,35 +19,29 @@ def check(self): return answer(False, "Не удалось получить текст презентации") full_text = " ".join(slides_text) + + abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=full_text) - abbreviations = self._find_abbreviations(full_text) - - if not abbreviations: + if not abbr_is_finding: return answer(True, "Аббревиатуры не найдены в презентации") - unexplained_abbr_with_slides = [] - for abbr in abbreviations: - slides_with_abbr = self._find_abbreviation_slides(abbr, slides_text) + if not unexplained_abbr: + return answer(True, "Все аббревиатуры правильно расшифрованы") - if not self._is_abbreviation_explained(abbr, full_text): - unexplained_abbr_with_slides.append({ - 'abbr': abbr, - 'slides': slides_with_abbr - }) - + unexplained_abbr_with_slides = {} - if unexplained_abbr_with_slides: - result_str = "Найдены нерасшифрованные аббревиатуры:
" + for slide_num, slide_text in enumerate(slides_text, 1): + for abbr in unexplained_abbr: + if abbr in slide_text and abbr not in unexplained_abbr_with_slides: + unexplained_abbr_with_slides[abbr] = slide_num - for item in unexplained_abbr_with_slides: - slide_links = self.format_page_link(item['slides']) - result_str += f"- {item['abbr']} (слайды: {', '.join(slide_links)})
" - - result_str += "
Каждая аббревиатура должна быть расшифрована при первом использовании в презентации." - return answer(False, result_str) + result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:
" + slide_links = self.format_page_link(list(unexplained_abbr_with_slides.values())) + for index_links, abbr in enumerate(unexplained_abbr_with_slides): + result_str += f"- {abbr} на слайде {slide_links[index_links]}
" - else: - return answer(True, "Все аббревиатуры правильно расшифрованы") + result_str += "
Каждая аббревиатура должна быть расшифрована при первом использовании в презентации.
" + return answer(False, result_str) except Exception as e: return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}") @@ -60,47 +55,3 @@ def _find_abbreviation_slides(self, abbr: str, slides_text: list) -> list: found_slides.append(slide_num) return found_slides - - def _find_abbreviations(self, text: str): - pattern = r'\b[А-ЯA-Z]{2,5}\b' - abbreviations = re.findall(pattern, text) - - common_abbr = { - 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS', - 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'DVD', 'SSD', 'PC', 'HDD', - 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', - 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', - 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', - 'IP', 'EIP', 'RIP', - 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', - 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', - 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', - 'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT', - 'ASCII', 'UTF', 'UNICODE', 'ANSI', - 'ЭВМ', 'МОЭВМ', - 'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM', - 'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP', - 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', - 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', - 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', - 'MAC', 'IBM' - } - filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr] - - return list(set(filtered_abbr)) - - def _is_abbreviation_explained(self, abbr: str, text: str) -> bool: - patterns = [ - rf'{abbr}\s*\([^)]+\)', # АААА (расшифровка) - rf'\([^)]+\)\s*{abbr}', # (расшифровка) АААА - rf'{abbr}\s*—\s*[^.,;!?]+', # АААА — расшифровка - rf'{abbr}\s*-\s*[^.,;!?]+', # АААА - расшифровка - rf'[^.,;!?]+\s*—\s*{abbr}', # расшифровка — АААА - rf'[^.,;!?]+\s*-\s*{abbr}' # расшифровка - АААА - ] - - for pattern in patterns: - if re.search(pattern, text): - return True - - return False \ No newline at end of file diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index 593d6d6d..eeb0dc1b 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -1,6 +1,6 @@ import re from ..base_check import BaseReportCriterion, answer - +from ..check_abbreviations import get_unexplained_abbrev class AbbreviationsCheck(BaseReportCriterion): label = "Проверка расшифровки аббревиатур" @@ -17,26 +17,36 @@ def check(self): if not text: return answer(False, "Не удалось получить текст документа") - - abbreviations = self._find_abbreviations(text) - if not abbreviations: - return answer(True, "Аббревиатуры не найдены в документе") + abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text) - unexplained_abbr = [] - for abbr in abbreviations: - if not self._is_abbreviation_explained(abbr, text): - unexplained_abbr.append(abbr) + if not abbr_is_finding: + return answer(True, "Аббревиатуры не найдены в документе") - if unexplained_abbr: - result_str = f"Найдены нерасшифрованные аббревиатуры: {', '.join(unexplained_abbr)}
" - result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте." - return answer(False, result_str) - else: + if not unexplained_abbr: return answer(True, "Все аббревиатуры правильно расшифрованы") - + + unexplained_abbr_with_page = {} + + for page_num in range(1, self.file.page_counter() + 1): + text_on_page = self.file.pdf_file.text_on_page[page_num] + + for abbr in unexplained_abbr: + if abbr in text_on_page and abbr not in unexplained_abbr_with_page: + unexplained_abbr_with_page[abbr] = page_num + + + result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:" + page_links = self.format_page_link(list(unexplained_abbr_with_page.values())) + for index_links, abbr in enumerate(unexplained_abbr_with_page): + result_str += f"- {abbr} на странице {page_links[index_links]}
" + result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.
" + + return answer(False, result_str) + except Exception as e: return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}") + def _get_document_text(self): @@ -53,48 +63,3 @@ def _get_document_text(self): text_parts.append(text) return "\n".join(text_parts) return None - - def _find_abbreviations(self, text: str): - pattern = r'\b[А-ЯA-Z]{2,5}\b' - abbreviations = re.findall(pattern, text) - - common_abbr = { - 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS', - 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'DVD', 'SSD', 'PC', 'HDD', - 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', - 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', - 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', - 'IP', 'EIP', 'RIP', - 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', - 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', - 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', - 'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT', - 'ASCII', 'UTF', 'UNICODE', 'ANSI', - 'ЭВМ', 'МОЭВМ', - 'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM', - 'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP', - 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', - 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', - 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', - 'MAC', 'IBM' - } - filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr] - - return list(set(filtered_abbr)) - - - def _is_abbreviation_explained(self, abbr: str, text: str) -> bool: - patterns = [ - rf'{abbr}\s*\([^)]+\)', # АААА (расшифровка) - rf'\([^)]+\)\s*{abbr}', # (расшифровка) АААА - rf'{abbr}\s*—\s*[^.,;!?]+', # АААА — расшифровка - rf'{abbr}\s*-\s*[^.,;!?]+', # АААА - расшифровка - rf'[^.,;!?]+\s*—\s*{abbr}', # расшифровка — АААА - rf'[^.,;!?]+\s*-\s*{abbr}' # расшифровка - АААА - ] - - for pattern in patterns: - if re.search(pattern, text, re.IGNORECASE): - return True - - return False From 975b4d2effa64f7ba007ffb30cf4aa20fef8a090 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Mon, 1 Dec 2025 11:41:13 +0300 Subject: [PATCH 04/28] correct check abbr --- app/main/check_packs/pack_config.py | 4 ++ app/main/checks/check_abbreviations.py | 38 ++++++++++++++----- .../abbreviations_presentation.py | 2 + .../report_checks/abbreviations_check.py | 4 +- 4 files changed, 36 insertions(+), 12 deletions(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 008d3c99..fb428d7e 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -22,7 +22,11 @@ ['pres_image_capture'], ['task_tracker'], ['overview_in_tasks'], +<<<<<<< HEAD ['abbreviations_check_pres'], +======= + ['pres_abbreviations_check'], +>>>>>>> 2c581e8 (correct check abbr) ] BASE_REPORT_CRITERION = [ ["simple_check"], diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py index 791b12a9..f810b1b1 100644 --- a/app/main/checks/check_abbreviations.py +++ b/app/main/checks/check_abbreviations.py @@ -1,4 +1,7 @@ import re +from pymorphy2 import MorphAnalyzer +morph = MorphAnalyzer() + def get_unexplained_abbrev(text): abbreviations = find_abbreviations(text) @@ -22,7 +25,9 @@ def find_abbreviations(text: str): common_abbr = { 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS', - 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'DVD', 'SSD', 'PC', 'HDD', + 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'ГОСТ', 'DVD' + + 'SSD', 'PC', 'HDD', 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', @@ -38,25 +43,38 @@ def find_abbreviations(text: str): 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', - 'MAC', 'IBM' + 'MAC', 'IBM', 'ГОСТ' } - filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr] + filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr and morph.parse(abbr.lower())[0].score != 0] return list(set(filtered_abbr)) def is_abbreviation_explained(abbr: str, text: str) -> bool: patterns = [ - rf'{abbr}\s*\([^)]+\)', # АААА (расшифровка) - rf'\([^)]+\)\s*{abbr}', # (расшифровка) АААА - rf'{abbr}\s*—\s*[^.,;!?]+', # АААА — расшифровка - rf'{abbr}\s*-\s*[^.,;!?]+', # АААА - расшифровка - rf'[^.,;!?]+\s*—\s*{abbr}', # расшифровка — АААА - rf'[^.,;!?]+\s*-\s*{abbr}' # расшифровка - АААА + rf'{abbr}\s*\(([^)]+)\)', # АААА (расшифровка) + rf'\(([^)]+)\)\s*{abbr}', # (расшифровка) АААА + rf'{abbr}\s*[—\-]\s*([^.,;!?]+)', # АААА — расшифровка + rf'{abbr}\s*-\s*([^.,;!?]+)', # АААА - расшифровка + rf'([^.,;!?]+)\s*[—\-]\s*{abbr}', # расшифровка — АААА + rf'([^.,;!?]+)\s*-\s*{abbr}' # расшифровка - АААА ] + for pattern in patterns: - if re.search(pattern, text, re.IGNORECASE): + match = re.search(pattern, text, re.IGNORECASE) + if match and correctly_explained(abbr, match.group(1)): return True return False + +def correctly_explained(abbr, explan): + words = explan.split() + + first_letter = "" + for word in words: + first_letter += word[0].upper() + + if(first_letter == abbr[len(first_letter)]): + return True + return False diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index 6444919b..17c4a672 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -41,6 +41,8 @@ def check(self): result_str += f"- {abbr} на слайде {slide_links[index_links]}
" result_str += "
Каждая аббревиатура должна быть расшифрована при первом использовании в презентации.
" + result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.
" + return answer(False, result_str) except Exception as e: diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index eeb0dc1b..33a97d0c 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -1,4 +1,3 @@ -import re from ..base_check import BaseReportCriterion, answer from ..check_abbreviations import get_unexplained_abbrev @@ -36,11 +35,12 @@ def check(self): unexplained_abbr_with_page[abbr] = page_num - result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:" + result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:
" page_links = self.format_page_link(list(unexplained_abbr_with_page.values())) for index_links, abbr in enumerate(unexplained_abbr_with_page): result_str += f"- {abbr} на странице {page_links[index_links]}
" result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.
" + result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.
" return answer(False, result_str) From 280d9cd65fa911297822e2ba3d673d76b1ff39f2 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Thu, 4 Dec 2025 11:03:47 +0300 Subject: [PATCH 05/28] fixed checks 1 --- app/main/check_packs/pack_config.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index fb428d7e..14ddcd51 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -22,11 +22,7 @@ ['pres_image_capture'], ['task_tracker'], ['overview_in_tasks'], -<<<<<<< HEAD - ['abbreviations_check_pres'], -======= ['pres_abbreviations_check'], ->>>>>>> 2c581e8 (correct check abbr) ] BASE_REPORT_CRITERION = [ ["simple_check"], From 7fe11be3deb3c239f485beb167f844afd7461914 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Thu, 4 Dec 2025 11:48:10 +0300 Subject: [PATCH 06/28] fixed checks 2 --- app/main/check_packs/pack_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 14ddcd51..008d3c99 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -22,7 +22,7 @@ ['pres_image_capture'], ['task_tracker'], ['overview_in_tasks'], - ['pres_abbreviations_check'], + ['abbreviations_check_pres'], ] BASE_REPORT_CRITERION = [ ["simple_check"], From edd07ce6ba794f67ae5347f7894dbd7ae7ee15fd Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Thu, 4 Dec 2025 11:52:20 +0300 Subject: [PATCH 07/28] fixed checks 3 --- app/main/checks/report_checks/__init__.py | 2 +- app/main/checks/report_checks/abbreviations_check.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index 4cda7b53..2eebac6b 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -32,4 +32,4 @@ from .sw_section_size import SWSectionSizeCheck from .sw_keywords_check import SWKeywordsCheck from .task_tracker import ReportTaskTracker -from .abbreviations_check import AbbreviationsCheck +from .abbreviations_check import AbbreviationsCheckPres diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index 33a97d0c..20db6fac 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -1,7 +1,7 @@ from ..base_check import BaseReportCriterion, answer from ..check_abbreviations import get_unexplained_abbrev -class AbbreviationsCheck(BaseReportCriterion): +class AbbreviationsCheckPres(BaseReportCriterion): label = "Проверка расшифровки аббревиатур" description = "Все аббревиатуры должны быть расшифрованы при первом использовании" id = 'abbreviations_check' From ab8f2f77f22f2426c0566cfb10f54c11c9846cda Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 5 Dec 2025 14:52:31 +0300 Subject: [PATCH 08/28] Update banned_words_in_literature.py --- .../checks/report_checks/banned_words_in_literature.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/app/main/checks/report_checks/banned_words_in_literature.py b/app/main/checks/report_checks/banned_words_in_literature.py index 9763e59f..498afa56 100644 --- a/app/main/checks/report_checks/banned_words_in_literature.py +++ b/app/main/checks/report_checks/banned_words_in_literature.py @@ -8,11 +8,14 @@ class BannedWordsInLiteratureCheck(BaseReportCriterion): description = 'Запрещено упоминание слова "wikipedia"' id = 'banned_words_in_literature' - def __init__(self, file_info, banned_words=["wikipedia"]): + def __init__(self, file_info, banned_words=None): super().__init__(file_info) + self.banned_words = ["habr", "medium", "stackoverflow", "sky.pro", "geeksforgeeks", "wikipedia"] + if banned_words: + self.banned_words += banned_words + self.banned_words = [morph.normal_forms(word)[0] for word in self.banned_words] self.headers_page = 1 self.literature_header = [] - self.banned_words = [morph.normal_forms(word)[0] for word in banned_words] self.name_pattern = r'список[ \t]*(использованных|использованной|)[ \t]*(источников|литературы)' def late_init_vkr(self): From 331a1210c58862531ce37f9c3fe274b01e86a306 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 20 Nov 2025 19:36:10 +0300 Subject: [PATCH 09/28] fix svg size and background color --- app/main/check_packs/base_criterion_pack.py | 2 +- app/routes/results.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/app/main/check_packs/base_criterion_pack.py b/app/main/check_packs/base_criterion_pack.py index c9437e5a..431b519d 100644 --- a/app/main/check_packs/base_criterion_pack.py +++ b/app/main/check_packs/base_criterion_pack.py @@ -72,5 +72,5 @@ def get_proportion(result): score = 0. for check in result: score += float(check['score']) - return score, len(result) + return round(score, 2), len(result) diff --git a/app/routes/results.py b/app/routes/results.py index 71eb3ecb..956ca17b 100644 --- a/app/routes/results.py +++ b/app/routes/results.py @@ -45,15 +45,15 @@ def results_svg(_id): result_proportion = check.get_proportion() if check.is_ended: svg_text = f""" - + Результат: {result_proportion[0]}/{result_proportion[1]} - {'' if check.is_passed else 'не '}пройдена + {'' if check.is_passed else 'не '}пройдена """ else: svg_text = f""" - + Работа проверяется """ From ac65e061bd435395a86647a08c91834f8af678a9 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 20 Nov 2025 19:36:24 +0300 Subject: [PATCH 10/28] fix dev docker compose --- docker-compose-dev.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index d993798a..8eeb833d 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -39,3 +39,4 @@ volumes: files: redis_data: flower_data: + language_tool_cache: From c69548c1d3f5697d203faa6fb2f435b355749749 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 20 Nov 2025 19:38:43 +0300 Subject: [PATCH 11/28] print traceback to logs (and check result) --- app/main/check_packs/base_criterion_pack.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/app/main/check_packs/base_criterion_pack.py b/app/main/check_packs/base_criterion_pack.py index 431b519d..6b4e136b 100644 --- a/app/main/check_packs/base_criterion_pack.py +++ b/app/main/check_packs/base_criterion_pack.py @@ -1,4 +1,5 @@ import logging +import traceback from .utils import init_criterions logger = logging.getLogger('root_logger') @@ -26,8 +27,10 @@ def check(self): try: criterion_check_result = criterion.check() except Exception as e: - err_msg = f'{criterion.id}: oшибка во время проверки: {e}' + trace_msg = traceback.format_exc() + err_msg = f'{criterion.id}: oшибка во время проверки: {e} ({trace_msg[len(trace_msg)/2:]})' logger.error(err_msg) + logger.error(trace_msg) criterion_check_result = {'score': 0, 'verdict': [UNEXPECTED_CHECK_FAIL_MSG, f"Информация об ошибке для администратора: {err_msg}"]} if criterion.priority and not criterion_check_result['score']: failed_priority_check = True From bbbf47de74cf63f5694c17ea886a27d4ae3a3bcc Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 20 Nov 2025 19:43:47 +0300 Subject: [PATCH 12/28] little change for svg size --- app/routes/results.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/routes/results.py b/app/routes/results.py index 956ca17b..91cc5b7f 100644 --- a/app/routes/results.py +++ b/app/routes/results.py @@ -45,15 +45,15 @@ def results_svg(_id): result_proportion = check.get_proportion() if check.is_ended: svg_text = f""" - + Результат: - {result_proportion[0]}/{result_proportion[1]} + {result_proportion[0]}/{result_proportion[1]} {'' if check.is_passed else 'не '}пройдена """ else: svg_text = f""" - + Работа проверяется """ From 31371af7b43042f3787f6931805399288fd977c1 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 20 Nov 2025 19:47:48 +0300 Subject: [PATCH 13/28] add more feedback for UNEXPECTED_CHECK_FAIL_MSG --- app/main/check_packs/base_criterion_pack.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/main/check_packs/base_criterion_pack.py b/app/main/check_packs/base_criterion_pack.py index 6b4e136b..b48e811c 100644 --- a/app/main/check_packs/base_criterion_pack.py +++ b/app/main/check_packs/base_criterion_pack.py @@ -5,7 +5,7 @@ logger = logging.getLogger('root_logger') PRIORITY_CHECK_FAILED_MSG = "Данный критерий является обязательным для прохождения.
Результат всей проверки обнулен, но вы можете ознакомиться с результатами каждого критерия.

" -UNEXPECTED_CHECK_FAIL_MSG = "Во время проверки произошла ошибка, попробуйте позже или обратитесь к администратору системы." +UNEXPECTED_CHECK_FAIL_MSG = "Во время проверки произошла ошибка: проверьте соответствия оформления файла шаблона (в том числе разделов и уровней заголовков), попробуйте позже или обратитесь к администратору системы." class BaseCriterionPack: @@ -28,7 +28,7 @@ def check(self): criterion_check_result = criterion.check() except Exception as e: trace_msg = traceback.format_exc() - err_msg = f'{criterion.id}: oшибка во время проверки: {e} ({trace_msg[len(trace_msg)/2:]})' + err_msg = f'{criterion.id}: oшибка во время проверки: {e} ({trace_msg[len(trace_msg)//2:]})' logger.error(err_msg) logger.error(trace_msg) criterion_check_result = {'score': 0, 'verdict': [UNEXPECTED_CHECK_FAIL_MSG, f"Информация об ошибке для администратора: {err_msg}"]} From df61abdd54d2135dae7ecd9d8fcec8ffa1aed95b Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Thu, 20 Nov 2025 20:13:25 +0300 Subject: [PATCH 14/28] little kostil' --- app/main/reports/md_uploader/md_uploader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/reports/md_uploader/md_uploader.py b/app/main/reports/md_uploader/md_uploader.py index da48f32c..735a8b68 100644 --- a/app/main/reports/md_uploader/md_uploader.py +++ b/app/main/reports/md_uploader/md_uploader.py @@ -176,7 +176,7 @@ def build_chapter_tree(self, chapters): while len(stack) > level: stack.pop() - parent = stack[-1] + parent = stack[-1] if stack else [] new_chapter = { 'name': chapter['name'], 'text': chapter['text'], From c90d82d8dd38244b7ae26c1d557627d06b4f637f Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 28 Nov 2025 12:39:23 +0300 Subject: [PATCH 15/28] update template results.html --- app/templates/results.html | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/app/templates/results.html b/app/templates/results.html index b049b1ba..b858d90c 100644 --- a/app/templates/results.html +++ b/app/templates/results.html @@ -54,11 +54,16 @@

Список всех загрузок пользователя - Cписок загрузок пользователя по критерию "{{current_user.criteria}}" + Cписок загрузок пользователя по критерию "{{results.criteria}}" {% endif %} {% if results.is_ended and not results.is_failed %} +
+
+

Подробности проверки по каждому критерию доступны по нажатию в левом столбце.

+
+
@@ -102,7 +107,7 @@

- Слайд из + Страница из From 3269e5303fa62013bbde177de20466837bc9d803 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 7 Jul 2024 17:55:52 +0300 Subject: [PATCH 16/28] 569: fix big files in webpack --- app/db/clear_users.py | 5 +++-- app/templates/404.html | 2 ++ app/templates/admin_criterions.html | 1 + app/templates/admin_pages_list.html | 1 + app/templates/check_list.html | 3 ++- app/templates/criteria_pack.html | 3 +++ app/templates/intro_page.html | 2 ++ app/templates/login.html | 2 ++ app/templates/logs.html | 2 ++ app/templates/one_user_info.html | 1 + app/templates/pack_list.html | 1 + app/templates/profile.html | 2 +- app/templates/results.html | 1 + app/templates/root.html | 4 +++- app/templates/signup.html | 2 ++ app/templates/upload.html | 4 +++- app/templates/user_list.html | 1 + app/templates/version.html | 1 + assets/scripts/check_list.js | 2 ++ assets/scripts/general.js | 21 +++++++++++++++++++++ assets/scripts/general_imports.js | 21 +++++++++++++++++++++ assets/scripts/login.js | 3 +-- assets/scripts/{main.js => main.js-bat} | 0 assets/scripts/signup.js | 2 +- webpack.config.js | 20 +++++++++++++++++++- 25 files changed, 97 insertions(+), 10 deletions(-) create mode 100644 assets/scripts/general.js create mode 100644 assets/scripts/general_imports.js rename assets/scripts/{main.js => main.js-bat} (100%) diff --git a/app/db/clear_users.py b/app/db/clear_users.py index 7ef8e585..d0dff801 100644 --- a/app/db/clear_users.py +++ b/app/db/clear_users.py @@ -4,5 +4,6 @@ logger = logging.getLogger('root_logger') -client.drop_database('dis-db') -logger.info("Вся информация очищена!") + +def drop_database(): + client.drop_database('pres-parser-db') diff --git a/app/templates/404.html b/app/templates/404.html index 86940cb3..bf8140dd 100644 --- a/app/templates/404.html +++ b/app/templates/404.html @@ -6,4 +6,6 @@
Страница не найдена!
+ + {% endblock %} diff --git a/app/templates/admin_criterions.html b/app/templates/admin_criterions.html index 05d0bf9f..4ad45716 100644 --- a/app/templates/admin_criterions.html +++ b/app/templates/admin_criterions.html @@ -46,6 +46,7 @@

+ diff --git a/app/templates/admin_pages_list.html b/app/templates/admin_pages_list.html index de17fb02..67a1e339 100644 --- a/app/templates/admin_pages_list.html +++ b/app/templates/admin_pages_list.html @@ -11,5 +11,6 @@

Список страниц для администра
  • Таблица с информацией о пользователях
  • + {% endblock main %} diff --git a/app/templates/check_list.html b/app/templates/check_list.html index 3ce0b59c..e1f1f156 100644 --- a/app/templates/check_list.html +++ b/app/templates/check_list.html @@ -69,9 +69,10 @@ {% endblock main %} - {% block script %} + {% endblock %} + diff --git a/app/templates/criteria_pack.html b/app/templates/criteria_pack.html index 7b666b84..b5ee8af0 100644 --- a/app/templates/criteria_pack.html +++ b/app/templates/criteria_pack.html @@ -54,4 +54,7 @@ + + + {% endblock %} \ No newline at end of file diff --git a/app/templates/intro_page.html b/app/templates/intro_page.html index f54be326..cd02c8d9 100644 --- a/app/templates/intro_page.html +++ b/app/templates/intro_page.html @@ -20,4 +20,6 @@

    Добро пожаловать в Doc + + {% endblock %} diff --git a/app/templates/login.html b/app/templates/login.html index 5a8aea1d..025b5d2d 100644 --- a/app/templates/login.html +++ b/app/templates/login.html @@ -43,4 +43,6 @@

    Вход

    + + {% endblock %} diff --git a/app/templates/logs.html b/app/templates/logs.html index 0ef51a0a..44ed6bb3 100644 --- a/app/templates/logs.html +++ b/app/templates/logs.html @@ -62,4 +62,6 @@ + + {% endblock main %} diff --git a/app/templates/one_user_info.html b/app/templates/one_user_info.html index ac3be452..5266bfa5 100644 --- a/app/templates/one_user_info.html +++ b/app/templates/one_user_info.html @@ -60,5 +60,6 @@

    + {% endblock main %} diff --git a/app/templates/pack_list.html b/app/templates/pack_list.html index 71edc8e7..43d7ba15 100644 --- a/app/templates/pack_list.html +++ b/app/templates/pack_list.html @@ -76,5 +76,6 @@ + {% endblock %} \ No newline at end of file diff --git a/app/templates/profile.html b/app/templates/profile.html index 0fd8652f..22f5c3dc 100644 --- a/app/templates/profile.html +++ b/app/templates/profile.html @@ -39,5 +39,5 @@

    {{ user.name }}

    {% endif %} - + {% endblock %} diff --git a/app/templates/results.html b/app/templates/results.html index b858d90c..1738168d 100644 --- a/app/templates/results.html +++ b/app/templates/results.html @@ -116,4 +116,5 @@

    + {% endblock %} diff --git a/app/templates/root.html b/app/templates/root.html index 38719c8b..e15fd63a 100644 --- a/app/templates/root.html +++ b/app/templates/root.html @@ -17,6 +17,8 @@ - + + + {% block script %}{% endblock %} diff --git a/app/templates/signup.html b/app/templates/signup.html index ec4e9440..633cf9e5 100644 --- a/app/templates/signup.html +++ b/app/templates/signup.html @@ -54,4 +54,6 @@

    Создать аккаунт

    + + {% endblock %} diff --git a/app/templates/upload.html b/app/templates/upload.html index 09632d45..f4a140d0 100644 --- a/app/templates/upload.html +++ b/app/templates/upload.html @@ -73,7 +73,7 @@
    {{ uploading_label }}
    - Критерии: + Критерии: {% for criterion_name, criterion_descrpt in list_of_check.items() %} @@ -91,4 +91,6 @@
    {{ uploading_label }}
    + + {% endblock %} diff --git a/app/templates/user_list.html b/app/templates/user_list.html index 0ce9b511..69c73c7c 100644 --- a/app/templates/user_list.html +++ b/app/templates/user_list.html @@ -58,6 +58,7 @@

    + {% endblock main %} diff --git a/app/templates/version.html b/app/templates/version.html index 3f6b6192..46ea45b5 100644 --- a/app/templates/version.html +++ b/app/templates/version.html @@ -45,5 +45,6 @@ {% endif %} + {% endblock main %} diff --git a/assets/scripts/check_list.js b/assets/scripts/check_list.js index 39d5e4b3..2fcc2c14 100644 --- a/assets/scripts/check_list.js +++ b/assets/scripts/check_list.js @@ -1,3 +1,5 @@ +import {collect_values_if_possible, hash} from "./general"; + import { debounce, isFloat, resetTable, ajaxRequest, onPopState } from "./utils" let $table; diff --git a/assets/scripts/general.js b/assets/scripts/general.js new file mode 100644 index 00000000..2a94488f --- /dev/null +++ b/assets/scripts/general.js @@ -0,0 +1,21 @@ +import * as md5 from "md5"; + +export function hash(password) { + return md5(password) +} + +export function collect_values_if_possible(...ids) { + const id_array = [...ids]; + const necessary_fields = $(id_array.map(el => "#" + el).join(", ")); + let valid = true; + necessary_fields.map(function () { + $(this).toggleClass("is-invalid", this.value === ""); + valid &= (this.value !== ""); + return this; + }); + if (valid) { + const result = Object(); + for (const field of necessary_fields) result[field.id] = field.value; + return result; + } +} diff --git a/assets/scripts/general_imports.js b/assets/scripts/general_imports.js new file mode 100644 index 00000000..cfe1c19f --- /dev/null +++ b/assets/scripts/general_imports.js @@ -0,0 +1,21 @@ +import 'bootstrap'; +import 'bootstrap/dist/css/bootstrap.min.css'; + +import 'bootstrap-table'; +import 'bootstrap-table/dist/bootstrap-table.min.css' + +import 'bootstrap-table/dist/extensions/filter-control/bootstrap-table-filter-control' +import 'bootstrap-table/dist/extensions/filter-control/bootstrap-table-filter-control.min.css' + +import 'bootstrap-table/dist/extensions/auto-refresh/bootstrap-table-auto-refresh.js' + +import 'bootstrap-icons/font/bootstrap-icons.css' + +import 'bootstrap-datepicker'; +import 'bootstrap-datepicker/dist/css/bootstrap-datepicker.min.css' + + +import '../styles/main.css'; + +import '../favicon.ico'; +import '../styles/404.css'; diff --git a/assets/scripts/login.js b/assets/scripts/login.js index 08e65f6d..647c30ea 100644 --- a/assets/scripts/login.js +++ b/assets/scripts/login.js @@ -1,8 +1,7 @@ -import {collect_values_if_possible, hash} from "./main"; +import {collect_values_if_possible, hash} from "./general"; import '../styles/login.css'; - $("#login_button").click(async () => { const params = collect_values_if_possible("login_text_field", "password_text_field"); diff --git a/assets/scripts/main.js b/assets/scripts/main.js-bat similarity index 100% rename from assets/scripts/main.js rename to assets/scripts/main.js-bat diff --git a/assets/scripts/signup.js b/assets/scripts/signup.js index 27fb0cf4..7d6e98cd 100644 --- a/assets/scripts/signup.js +++ b/assets/scripts/signup.js @@ -1,4 +1,4 @@ -import {collect_values_if_possible, hash} from "./main"; +import {collect_values_if_possible, hash} from "./general"; import '../styles/signup.css'; diff --git a/webpack.config.js b/webpack.config.js index 77928871..161f4314 100644 --- a/webpack.config.js +++ b/webpack.config.js @@ -4,7 +4,25 @@ const { CleanWebpackPlugin } = require('clean-webpack-plugin'); module.exports = { mode: 'production', - entry: ['core-js/stable', 'regenerator-runtime/runtime', "./assets/scripts/main.js"], +// entry: ['core-js/stable', 'regenerator-runtime/runtime', "./assets/scripts/main.js"], + entry: { +// stable: 'core-js/stable', +// runtime: 'regenerator-runtime/runtime', +// main: ['core-js/stable', 'regenerator-runtime/runtime',"./assets/scripts/main.js"], + admin_criterions: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/admin_criterions.js'], + check_list: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/check_list.js'], + criterion_pack: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/criterion_pack.js'], + general: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/general.js'], + login: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/login.js'], + logs: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/logs.js'], + one_user_info: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/one_user_info.js'], + profile: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/profile.js'], + results: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/results.js'], + signup: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/signup.js'], + upload: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/upload.js'], + user_list: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/user_list.js'], + version: ['core-js/stable', 'regenerator-runtime/runtime', './assets/scripts/general_imports.js', './assets/scripts/version.js'], + }, output: { path: path.join(__dirname, './src/'), filename: "./[name].js" From bdfefdf1fbafedefb8d619b1ce9c69f74903c2d3 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sun, 30 Nov 2025 15:47:16 +0300 Subject: [PATCH 17/28] update some html-templates and styles --- app/templates/check_list.html | 1 - app/templates/logs.html | 1 - app/templates/results.html | 35 +++++++++++++++-------------------- app/templates/root.html | 2 +- assets/styles/main.css | 2 +- 5 files changed, 17 insertions(+), 24 deletions(-) diff --git a/app/templates/check_list.html b/app/templates/check_list.html index e1f1f156..0590e037 100644 --- a/app/templates/check_list.html +++ b/app/templates/check_list.html @@ -25,7 +25,6 @@ border: 1px solid #a1a1a1; } -
    -
    {% include 'header.html' %}
    - {% if results.is_ended %} - {% if results.is_failed %} -

    - Проверка завершилась с ошибкой, попробуйте загрузить файл заново -

    +
    +

    + {% if results.is_ended %} + {% if results.is_failed %} + Проверка завершилась с ошибкой, попробуйте загрузить файл заново + {% else %} + Результат проверки: {{ "" if results.correct() else "не" }} пройдена! + {% endif %} {% else %} -

    - Результат проверки: {{ "" if results.correct() else "не" }} пройдена! -

    - {% endif %} - {% else %} -

    Производится проверка файла, страница перезагрузится автоматически. Примерное время: {{ avg_process_time }} сек. -

    {% endif %} +
    @@ -42,10 +38,10 @@

    - + {% if results.is_ended and not results.is_failed %} -
    -
    -

    Подробности проверки по каждому критерию доступны по нажатию в левом столбце.

    -
    +
    + Подробности проверки по каждому критерию доступны по нажатию в левом столбце.
    + diff --git a/app/templates/root.html b/app/templates/root.html index e15fd63a..ef64cdb4 100644 --- a/app/templates/root.html +++ b/app/templates/root.html @@ -11,7 +11,7 @@
    {% include "header.html" %}
    -
    +
    {% block main %}{% endblock %}
    diff --git a/assets/styles/main.css b/assets/styles/main.css index ce807efa..890092a2 100644 --- a/assets/styles/main.css +++ b/assets/styles/main.css @@ -45,7 +45,7 @@ body { .holder { background: var(--background-color); - height: 95vh; + height: 96vh; overflow-y: auto; overflow-x: hidden; } From eece7ea203fd61bf086b1149f78e0875764aff2f Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sun, 30 Nov 2025 15:49:09 +0300 Subject: [PATCH 18/28] update Dockerfiles (base and main), requirements and some python-libs --- Dockerfile | 11 ++-- Dockerfile_base | 9 ++-- app/main/checks/base_check.py | 4 +- .../presentation_checks/find_theme_in_pres.py | 2 +- .../report_checks/find_theme_in_report.py | 2 +- .../checks/report_checks/sw_keywords_check.py | 2 +- .../pdf_document/pdf_document_manager.py | 8 +-- app/nlp/stemming.py | 2 +- requirements.txt | 52 +++++++++---------- 9 files changed, 45 insertions(+), 47 deletions(-) diff --git a/Dockerfile b/Dockerfile index 00795845..3babcdd1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM node:20-alpine as frontend_build +FROM node:20-alpine AS frontend_build WORKDIR /app ADD package.json webpack.config.js ./ @@ -7,10 +7,11 @@ RUN npm install && npm install webpack ADD ./assets ./assets RUN npm run build -FROM dvivanov/dis-base:v0.4 +FROM dvivanov/dis-base:v0.5 LABEL project='dis' -LABEL version='0.4' +LABEL version='0.5' +ENV PYTHONPATH="${PYTHONPATH}:/usr/src/project/app" WORKDIR /usr/src/project @@ -19,6 +20,4 @@ ADD ./db_versioning ./db_versioning/ ADD ./app ./app/ COPY --from=frontend_build /app/src ./src/ -ENV PYTHONPATH "${PYTHONPATH}:/usr/src/project/app" - -CMD ./scripts/local_start.sh +CMD ["./scripts/local_start.sh"] diff --git a/Dockerfile_base b/Dockerfile_base index 0f196f92..fb414224 100644 --- a/Dockerfile_base +++ b/Dockerfile_base @@ -1,14 +1,13 @@ -FROM python:3.10-slim-bullseye +FROM python:3.12-slim-bullseye LABEL project='dis' -LABEL version='0.4-base' +LABEL version='0.5-base' -ENV LANG en_US.UTF-8 +ENV LANG=en_US.UTF-8 ENV TZ=Europe/Moscow RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone - -RUN apt update && apt install -y libreoffice-writer libreoffice-impress default-jre +RUN apt update && apt install -y g++ gcc libreoffice-writer libreoffice-impress default-jre ADD requirements.txt . RUN python3 -m pip install -r requirements.txt --no-cache-dir diff --git a/app/main/checks/base_check.py b/app/main/checks/base_check.py index 0dcb0f90..5e431d10 100644 --- a/app/main/checks/base_check.py +++ b/app/main/checks/base_check.py @@ -1,6 +1,6 @@ -import pymorphy2 +import pymorphy3 -morph = pymorphy2.MorphAnalyzer() +morph = pymorphy3.MorphAnalyzer() def answer(mod, *args): diff --git a/app/main/checks/presentation_checks/find_theme_in_pres.py b/app/main/checks/presentation_checks/find_theme_in_pres.py index 02388602..00ef9ba1 100644 --- a/app/main/checks/presentation_checks/find_theme_in_pres.py +++ b/app/main/checks/presentation_checks/find_theme_in_pres.py @@ -7,7 +7,7 @@ import nltk from nltk.tokenize import word_tokenize, sent_tokenize from nltk.corpus import stopwords -from pymorphy2 import MorphAnalyzer +from pymorphy3 import MorphAnalyzer MORPH_ANALYZER = MorphAnalyzer() diff --git a/app/main/checks/report_checks/find_theme_in_report.py b/app/main/checks/report_checks/find_theme_in_report.py index 9602e44f..17184ff3 100644 --- a/app/main/checks/report_checks/find_theme_in_report.py +++ b/app/main/checks/report_checks/find_theme_in_report.py @@ -6,7 +6,7 @@ import string from nltk.tokenize import word_tokenize, sent_tokenize from nltk.corpus import stopwords -from pymorphy2 import MorphAnalyzer +from pymorphy3 import MorphAnalyzer MORPH_ANALYZER = MorphAnalyzer() diff --git a/app/main/checks/report_checks/sw_keywords_check.py b/app/main/checks/report_checks/sw_keywords_check.py index 8ed5eb7d..a89452c5 100644 --- a/app/main/checks/report_checks/sw_keywords_check.py +++ b/app/main/checks/report_checks/sw_keywords_check.py @@ -3,7 +3,7 @@ from nltk.tokenize import word_tokenize from nltk.corpus import stopwords -from pymorphy2 import MorphAnalyzer +from pymorphy3 import MorphAnalyzer from ..base_check import BaseReportCriterion, answer diff --git a/app/main/reports/pdf_document/pdf_document_manager.py b/app/main/reports/pdf_document/pdf_document_manager.py index 4ad951b5..1407cece 100644 --- a/app/main/reports/pdf_document/pdf_document_manager.py +++ b/app/main/reports/pdf_document/pdf_document_manager.py @@ -1,6 +1,6 @@ # import pdfplumber -import fitz +import pymupdf from app.utils import convert_to @@ -9,10 +9,10 @@ class PdfDocumentManager: def __init__(self, path_to_file, pdf_filepath): if not pdf_filepath: # self.pdf_file = pdfplumber.open(convert_to(path_to_file, target_format='pdf')) - self.pdf_file = fitz.open(convert_to(path_to_file, target_format='pdf')) + self.pdf_file = pymupdf.open(convert_to(path_to_file, target_format='pdf')) else: # self.pdf_file = pdfplumber.open(pdf_filepath) - self.pdf_file = fitz.open(pdf_filepath) + self.pdf_file = pymupdf.open(pdf_filepath) self.pages = [self.pdf_file.load_page(page_num) for page_num in range(self.pdf_file.page_count)] self.page_count_all = self.pdf_file.page_count # self.page_count = len(self.pages) @@ -34,7 +34,7 @@ def page_images(self, page_without_pril): total_height = 0 for page_num in range(page_without_pril): page = self.pdf_file[page_num] - images = self.pdf_file.get_page_images(page) + images = self.pdf_file.get_page_images(page_num) for image in images: image_coord = page.get_image_bbox(image[7], transform=0) # might be [1.0, 1.0, -1.0, -1.0] image_height = image_coord[3] - image_coord[1] diff --git a/app/nlp/stemming.py b/app/nlp/stemming.py index 4f6efa60..996e45ab 100644 --- a/app/nlp/stemming.py +++ b/app/nlp/stemming.py @@ -1,7 +1,7 @@ import itertools from nltk.corpus import stopwords from nltk.tokenize import word_tokenize, sent_tokenize -from pymorphy2 import MorphAnalyzer +from pymorphy3 import MorphAnalyzer MORPH_ANALYZER = MorphAnalyzer() diff --git a/requirements.txt b/requirements.txt index 8710f80b..ca44fa76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,37 +1,37 @@ -werkzeug==2.0.0 +argparse~=1.4.0 +celery==5.5.3 +configparser~=5.3.0 +docx2python~=2.0.4 +filetype==1.2.0 Flask==2.0.3 -jinja2==3.0.0 -requests~=2.31.0 -python-pptx==0.6.18 -odfpy==1.4.1 -pymongo==3.11.1 flask-login==0.5.0 -numpy==1.22 -scipy~=1.10.1 -pymorphy2==0.9.1 -nltk==3.6.6 flask-recaptcha==0.4.2 -lti==0.9.5 flask-security==3.0.0 -celery==5.2.2 flower==1.2.0 -redis==3.5.3 -pandas~=2.0.3 fsspec==2022.2.0 -python-docx==0.8.11 -odfpy==1.4.1 -argparse~=1.4.0 -docx2python~=2.0.4 +jinja2==3.0.0 +language-tool-python==2.8.1 +lti==0.9.5 +lxml~=4.9.2 +markdown==3.4.4 +md2pdf==1.0.1 +nltk==3.6.6 +numpy==1.26.4 oauthlib~=3.1.0 +odfpy==1.4.1 +odfpy==1.4.1 +pandas~=2.0.3 pdfplumber==0.6.1 -pytest~=7.1.2 -PyMuPDF~=1.22.5 +PyMuPDF==1.26.6 PyPDF2~=3.0.1 -configparser~=5.3.0 +pymongo==3.11.1 +pymorphy3==2.0.6 +pytest~=7.1.2 +python-docx==0.8.11 +python-pptx==0.6.18 pytz~=2023.3 -lxml~=4.9.2 +redis==6.1.0 +requests~=2.31.0 +scipy~=1.11.1 urllib3~=2.0.3 -filetype==1.2.0 -language-tool-python==2.8.1 -markdown==3.4.4 -md2pdf==1.0.1 +werkzeug==2.0.0 \ No newline at end of file From 8280103238c4d40a180f35641f9f8635fb4348a3 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 8 Dec 2025 11:53:45 +0300 Subject: [PATCH 19/28] update main_character_check --- app/main/checks/report_checks/main_character_check.py | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/app/main/checks/report_checks/main_character_check.py b/app/main/checks/report_checks/main_character_check.py index c596e999..47914a6f 100644 --- a/app/main/checks/report_checks/main_character_check.py +++ b/app/main/checks/report_checks/main_character_check.py @@ -82,12 +82,6 @@ def extract_table_contents(self, table): contents.append("|".join(row_text)) return contents - def calculate_find_value(self, table, index): - count = int((len(table) - index - 2) / 2) - if count >= 0: - return count - return 0 - def check_table(self, check_list, table, table_num): for item in check_list: for i, line in enumerate(table): @@ -105,10 +99,7 @@ def check_table(self, check_list, table, table_num): continue elif item["key"] in ["Зав. кафедрой", "Консультант"] and item["found_key"] > 0: - if item["key"] == "Консультант": - if item["found_key"] == 1: - item["find"] += self.calculate_find_value(table, i) for value in item["value"]: - if re.search(value, line): + if "Руководитель" not in line and re.search(value, line): # исключаем из поиска строки с рукодителем item["found_value"] += 1 item["logs"] += f"'{item['key']}': значение компоненты '{value}' найдено в строке '{line}' в таблице №{table_num}
    " From c88f47e5371c9465abc7d0848c117c5aeeefc8c6 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Mon, 8 Dec 2025 11:54:16 +0300 Subject: [PATCH 20/28] add recheck test --- tests/test_recheck.py | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 tests/test_recheck.py diff --git a/tests/test_recheck.py b/tests/test_recheck.py new file mode 100644 index 00000000..4fc8135c --- /dev/null +++ b/tests/test_recheck.py @@ -0,0 +1,36 @@ +import time +from basic_selenium_test import BasicSeleniumTest +from selenium.webdriver.common.by import By +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC + + +class RecheckTestSelenium(BasicSeleniumTest): + + def test_recheck_file(self): + check_id = self.open_statistic() + if check_id: + URL = self.get_url(f"/recheck/{check_id}") + self.get_driver().get(URL) + obj = WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.ID, "results_title")) + ) + if "Производится проверка файла" in obj.text: + start_time = time.time() + max_time = 240 + while (time.time() - start_time) < max_time: + time.sleep(10) + try: + obj = WebDriverWait(self.driver, 10).until( + EC.presence_of_element_located((By.ID, "results_table")) + ) + if obj is not None: + self.assertNotEquals(obj, None) + return + except: + continue + self.fail("Result of check is not found") + else: + self.fail("No checking status after /recheck") + else: + self.skipTest("No check in system for testing recheck") From 495b68d4bee7a1769793548ceb55baef2084609e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 10 Dec 2025 15:05:19 +0300 Subject: [PATCH 21/28] Update style_check_settings.py --- app/main/checks/report_checks/style_check_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/main/checks/report_checks/style_check_settings.py b/app/main/checks/report_checks/style_check_settings.py index 7c3a61b5..36912a9f 100644 --- a/app/main/checks/report_checks/style_check_settings.py +++ b/app/main/checks/report_checks/style_check_settings.py @@ -12,7 +12,7 @@ class StyleCheckSettings: HEADER_2_REGEX = "^()([\\w\\s]+)\\.$" STD_BANNED_WORDS = ['мы', 'моя', 'мои', 'моё', 'наш', 'наши', 'аттач', 'билдить', 'бинарник', 'валидный', 'дебаг', 'деплоить', 'десктопное', 'железо', - 'исходники', 'картинка', 'консольное', 'конфиг', 'кусок', 'либа', 'лог', 'мануал', 'машина', + 'исходники', 'картинка', 'консольное', 'конфиг', 'кусок', 'либа', 'лог', 'мануал', 'отнаследованный', 'парсинг', 'пост', 'распаковать', 'сбоит', 'скачать', 'склонировать', 'скрипт', 'тестить', 'тул', 'тула', 'тулза', 'фиксить', 'флажок', 'флаг', 'юзкейс', 'продакт', 'продакшн', 'прод', 'фидбек', 'дедлайн', 'дэдлайн', 'оптимально', 'оптимальный', 'надежный', 'интуитивный', From dcad677d3a8d4460de834e8a85314302609b9cc0 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Wed, 17 Dec 2025 08:32:21 +0300 Subject: [PATCH 22/28] add warned_words for banned_words_check --- .../report_checks/banned_words_check.py | 45 +++++++++++-------- .../report_checks/style_check_settings.py | 23 +++++++--- 2 files changed, 44 insertions(+), 24 deletions(-) diff --git a/app/main/checks/report_checks/banned_words_check.py b/app/main/checks/report_checks/banned_words_check.py index 351b403e..58c67cd3 100644 --- a/app/main/checks/report_checks/banned_words_check.py +++ b/app/main/checks/report_checks/banned_words_check.py @@ -5,12 +5,13 @@ class ReportBannedWordsCheck(BaseReportCriterion): label = "Проверка наличия запретных слов в тексте отчёта" - description = 'Запрещено упоминание слова "мы"' + description = 'Запрещено упоминание определенных "опасных" слов' id = 'banned_words_check' def __init__(self, file_info, headers_map=None): super().__init__(file_info) self.words = [] + self.warned_words = [] self.min_count = 0 self.max_count = 0 if headers_map: @@ -21,12 +22,14 @@ def __init__(self, file_info, headers_map=None): def late_init(self): self.headers_main = self.file.get_main_headers(self.file_type['report_type']) if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config): - self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']] + self.words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']} + self.warned_words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['warned_words']} self.min_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_count_for_banned_words_check'] self.max_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['max_count_for_banned_words_check'] else: if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config): - self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']] + self.words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']} + self.warned_words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['warned_words']} self.min_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_count_for_banned_words_check'] self.max_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['max_count_for_banned_words_check'] @@ -34,29 +37,35 @@ def check(self): if self.file.page_counter() < 4: return answer(False, "В отчете недостаточно страниц. Нечего проверять.") self.late_init() - detected_lines = {} result_str = f'Запрещенные слова: {"; ".join(self.words)}
    ' - count = 0 + banned_counter = {'words': self.words, 'detected_lines': {}, 'count': 0} + warned_counter = {'words': self.warned_words,'detected_lines': {}, 'count': 0} for k, v in self.file.pdf_file.get_text_on_page().items(): lines_on_page = re.split(r'\n', v) for index, line in enumerate(lines_on_page): - words_on_line = re.split(r'[^\w-]+', line) - words_on_line = [morph.normal_forms(word)[0] for word in words_on_line] - count_banned_words = set(words_on_line).intersection(self.words) - if count_banned_words: - count += len(count_banned_words) - if k not in detected_lines.keys(): - detected_lines[k] = [] - detected_lines[k].append(f'Строка {index + 1}: {line} [{"; ".join(count_banned_words)}]') - if len(detected_lines): + words_on_line = {morph.normal_forms(word)[0] for word in re.split(r'[^\w-]+', line)} + for counter in (banned_counter, warned_counter): + count_banned_words = words_on_line.intersection(counter['words']) + if count_banned_words: + counter['count'] += len(count_banned_words) + if k not in counter['detected_lines'].keys(): + counter['detected_lines'][k] = [] + counter['detected_lines'][k].append(f'Строка {index + 1}: {line} [{"; ".join(count_banned_words)}]') + if len(banned_counter['detected_lines']): result_str += 'Обнаружены запретные слова!

    ' - for k, v in detected_lines.items(): - result_str += f'Страница №{k}:
    {"
    ".join(detected_lines[k])}

    ' + for k, v in banned_counter['detected_lines'].items(): + result_str += f'Страница №{k}:
    {"
    ".join(banned_counter['detected_lines'][k])}

    ' else: result_str = 'Пройдена!' + + if len(warned_counter['detected_lines']): + result_str += f'

    Обнаружены потенциально опасные слова (не влияют на результат проверки)!
    Обратите внимание, что их использование возможно только в подтвержденных случаях: {"; ".join(self.warned_words)}

    ' + for k, v in warned_counter['detected_lines'].items(): + result_str += f'Страница №{k}:
    {"
    ".join(warned_counter['detected_lines'][k])}

    ' + result_score = 1 - if count > self.min_count: - if count <= self.max_count: + if banned_counter['count'] > self.min_count: + if banned_counter['count'] <= self.max_count: result_score = 0.5 else: result_score = 0 diff --git a/app/main/checks/report_checks/style_check_settings.py b/app/main/checks/report_checks/style_check_settings.py index 36912a9f..40a7cd21 100644 --- a/app/main/checks/report_checks/style_check_settings.py +++ b/app/main/checks/report_checks/style_check_settings.py @@ -10,14 +10,15 @@ class StyleCheckSettings: HEADER_REGEX = "^\\D+.+$" HEADER_1_REGEX = "^()([\\w\\s]+)$" HEADER_2_REGEX = "^()([\\w\\s]+)\\.$" - STD_BANNED_WORDS = ['мы', 'моя', 'мои', 'моё', 'наш', 'наши', + STD_BANNED_WORDS = ('мы', 'моя', 'мои', 'моё', 'наш', 'наши', 'аттач', 'билдить', 'бинарник', 'валидный', 'дебаг', 'деплоить', 'десктопное', 'железо', 'исходники', 'картинка', 'консольное', 'конфиг', 'кусок', 'либа', 'лог', 'мануал', 'отнаследованный', 'парсинг', 'пост', 'распаковать', 'сбоит', 'скачать', 'склонировать', 'скрипт', 'тестить', 'тул', 'тула', 'тулза', 'фиксить', 'флажок', 'флаг', 'юзкейс', 'продакт', 'продакшн', - 'прод', 'фидбек', 'дедлайн', 'дэдлайн', 'оптимально', 'оптимальный', 'надежный', 'интуитивный', + 'прод', 'фидбек', 'дедлайн', 'дэдлайн', 'оптимально', 'надежный', 'интуитивный', 'хороший', 'плохой', 'идеальный', 'быстро', 'медленно', 'какой-нибудь', 'некоторый', 'почти' - ] # TODO: list of "warning" words + ) + STD_WARNED_WORDS = ('машина', 'оптимальный') # TODO: list of "warning" words STD_MIN_LIT_REF = 1 STD_MAX_LIT_REF = 1000 #just in case for future edit HEADER_1_STYLE = { @@ -101,6 +102,7 @@ class StyleCheckSettings: "unify_regex": APPENDIX_UNIFY_REGEX, "regex": APPENDIX_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_count_for_banned_words_check': 3, 'max_count_for_banned_words_check': 6, 'min_ref_for_literature_references_check': STD_MIN_LIT_REF, @@ -125,6 +127,7 @@ class StyleCheckSettings: "unify_regex": None, "regex": HEADER_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_count_for_banned_words_check': 3, 'max_count_for_banned_words_check': 6, 'min_ref_for_literature_references_check': STD_MIN_LIT_REF, @@ -149,7 +152,8 @@ class StyleCheckSettings: "ПЛАН РАБОТЫ НА ВЕСЕННИЙ СЕМЕСТР", "ОТЗЫВ РУКОВОДИТЕЛЯ", "СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ"], "unify_regex": None, "regex": HEADER_REGEX, - "banned_words": STD_BANNED_WORDS + ['доработать', 'доработка', 'переписать', 'рефакторинг', 'исправление'] + "banned_words": STD_BANNED_WORDS + ('доработать', 'доработка', 'переписать', 'рефакторинг', 'исправление'), + "warned_words": STD_WARNED_WORDS }, } @@ -162,7 +166,8 @@ class StyleCheckSettings: "ПЛАН РАБОТЫ НА ОСЕННИЙ СЕМЕСТР", "СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ"], "unify_regex": None, "regex": HEADER_REGEX, - "banned_words": STD_BANNED_WORDS + ['доработать', 'доработка', 'переписать', 'рефакторинг', 'исправление'] + "banned_words": STD_BANNED_WORDS + ('доработать', 'доработка', 'переписать', 'рефакторинг', 'исправление'), + "warned_words": STD_WARNED_WORDS }, } @@ -175,7 +180,8 @@ class StyleCheckSettings: "ПЛАН РАБОТЫ НА ВЕСЕННИЙ СЕМЕСТР", "ОТЗЫВ РУКОВОДИТЕЛЯ", "СПИСОК ИСПОЛЬЗОВАННЫХ ИСТОЧНИКОВ"], "unify_regex": None, "regex": HEADER_REGEX, - "banned_words": STD_BANNED_WORDS + ['доработать', 'доработка', 'переписать', 'рефакторинг', 'исправление'] + "banned_words": STD_BANNED_WORDS + ('доработать', 'доработка', 'переписать', 'рефакторинг', 'исправление'), + "warned_words": STD_WARNED_WORDS }, } @@ -193,6 +199,7 @@ class StyleCheckSettings: "unify_regex": None, "regex": HEADER_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_count_for_banned_words_check': 3, 'max_count_for_banned_words_check': 6, }, @@ -208,6 +215,7 @@ class StyleCheckSettings: "unify_regex": None, "regex": HEADER_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_count_for_banned_words_check': 3, 'max_count_for_banned_words_check': 6, } @@ -230,6 +238,7 @@ class StyleCheckSettings: "unify_regex": None, "regex": HEADER_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_ref_for_literature_references_check': 1, 'mах_ref_for_literature_references_check': 1000, #just for future possible edit 'min_count_for_banned_words_check': 2, @@ -249,6 +258,7 @@ class StyleCheckSettings: "unify_regex": None, "regex": HEADER_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_ref_for_literature_references_check': 3, 'mах_ref_for_literature_references_check': 1000, #just for future possible edit 'min_count_for_banned_words_check': 2, @@ -268,6 +278,7 @@ class StyleCheckSettings: "unify_regex": None, "regex": HEADER_REGEX, "banned_words": STD_BANNED_WORDS, + "warned_words": STD_WARNED_WORDS, 'min_ref_for_literature_references_check': 5, 'mах_ref_for_literature_references_check': 1000, #just for future possible edit 'min_count_for_banned_words_check': 2, From 8b5865e958de31830543226927b5e3fb2a6ce310 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Dec 2025 12:08:34 +0300 Subject: [PATCH 23/28] add login_required and author check for result page --- app/routes/results.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/app/routes/results.py b/app/routes/results.py index 91cc5b7f..95f4f508 100644 --- a/app/routes/results.py +++ b/app/routes/results.py @@ -3,6 +3,7 @@ from time import time from flask import Blueprint, Response, render_template +from flask_login import current_user, login_required from wsgiref.handlers import format_date_time as format_date from app.db import db_methods @@ -16,6 +17,7 @@ @results_bp.route("/", methods=["GET"]) +@login_required def results_main(_id): try: oid = ObjectId(_id) @@ -24,11 +26,15 @@ def results_main(_id): return render_template("./404.html") check = db_methods.get_check(oid) if check is not None: - # show processing time for user - avg_process_time = None if check.is_ended else db_methods.get_average_processing_time() - return render_template("./results.html", navi_upload=True, results=check, - columns=TABLE_COLUMNS, avg_process_time=avg_process_time, - stats=format_check(check.pack())) + # show check only for author or admin + if current_user.is_admin or current_user.username == check.user: + # show processing time for user + avg_process_time = None if check.is_ended else db_methods.get_average_processing_time() + return render_template("./results.html", navi_upload=True, results=check, + columns=TABLE_COLUMNS, avg_process_time=avg_process_time, + stats=format_check(check.pack())) + else: + return "У вас нет прав на просмотр результатов чужих проверок", 403 else: logger.info("Запрошенная проверка не найдена: " + _id) return render_template("./404.html") From 02b70baeb6917a924658df632dfb3216814f8eff Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 26 Dec 2025 12:11:51 +0300 Subject: [PATCH 24/28] little update for 404 page --- app/templates/404.html | 2 +- assets/styles/404.css | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/app/templates/404.html b/app/templates/404.html index bf8140dd..ad648890 100644 --- a/app/templates/404.html +++ b/app/templates/404.html @@ -4,7 +4,7 @@ {% block main %}
    - Страница не найдена! +

    Запрашиваемый ресурс не найден

    diff --git a/assets/styles/404.css b/assets/styles/404.css index c833cc62..384902ba 100644 --- a/assets/styles/404.css +++ b/assets/styles/404.css @@ -1,3 +1,3 @@ #middle-container { - background-color: black; + background-color: rgb(255, 255, 255); } From 9a54360fabc015c23703400f656c804a24ab4541 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Mon, 26 Jan 2026 16:34:52 +0300 Subject: [PATCH 25/28] all requier applyied-2 --- app/main/check_packs/pack_config.py | 4 +- app/main/checks/check_abbreviations.py | 39 ++++++++++++++++--- .../abbreviations_presentation.py | 20 ++-------- app/main/checks/report_checks/__init__.py | 5 +-- .../report_checks/abbreviations_check.py | 4 +- 5 files changed, 41 insertions(+), 31 deletions(-) diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py index 008d3c99..db2e4ef2 100644 --- a/app/main/check_packs/pack_config.py +++ b/app/main/check_packs/pack_config.py @@ -22,7 +22,7 @@ ['pres_image_capture'], ['task_tracker'], ['overview_in_tasks'], - ['abbreviations_check_pres'], + ['pres_abbreviations_check'], ] BASE_REPORT_CRITERION = [ ["simple_check"], @@ -51,7 +51,7 @@ ["empty_task_page_check"], ["water_in_the_text_check"], ["report_task_tracker"], - ["abbreviations_check"], + ["report_abbreviations_check"], ] DEFAULT_TYPE = 'pres' diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py index f810b1b1..15750293 100644 --- a/app/main/checks/check_abbreviations.py +++ b/app/main/checks/check_abbreviations.py @@ -1,5 +1,5 @@ import re -from pymorphy2 import MorphAnalyzer +from pymorphy3 import MorphAnalyzer morph = MorphAnalyzer() @@ -71,10 +71,37 @@ def is_abbreviation_explained(abbr: str, text: str) -> bool: def correctly_explained(abbr, explan): words = explan.split() - first_letter = "" + first_letters = "" for word in words: - first_letter += word[0].upper() + first_letters += word[0].upper() - if(first_letter == abbr[len(first_letter)]): - return True - return False + return first_letters == abbr.upper() + +def main_check(text: str): + try: + continue_check = True + res_str = "" + if not text: + continue_check, res_str = False, "Не удалось получить текст" + + abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text) + + if not abbr_is_finding: + continue_check, res_str = False, "Аббревиатуры не найдены в представленном документе" + + if not unexplained_abbr: + continue_check, res_str = False, "Все аббревиатуры правильно расшифрованы" + + return continue_check, res_str, unexplained_abbr + + except Exception as e: + return False, f"Ошибка при проверке аббревиатур: {str(e)}", {} + +def forming_response(unexplained_abbr_with_page, format_page_link): + result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:
    " + page_links = format_page_link(list(unexplained_abbr_with_page.values())) + for index_links, abbr in enumerate(unexplained_abbr_with_page): + result_str += f"- {abbr} на {page_links[index_links]} странице/слайде
    " + result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.
    " + result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.
    " + return result_str diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index 17c4a672..4dad0cc8 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -6,7 +6,7 @@ class PresAbbreviationsCheck(BasePresCriterion): label = "Проверка расшифровки аббревиатур в презентации" description = "Все аббревиатуры должны быть расшифрованы при первом использовании" - id = 'abbreviations_check_pres' + id = 'pres_abbreviations_check' def __init__(self, file_info): super().__init__(file_info) @@ -22,12 +22,7 @@ def check(self): abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=full_text) - if not abbr_is_finding: - return answer(True, "Аббревиатуры не найдены в презентации") - - if not unexplained_abbr: - return answer(True, "Все аббревиатуры правильно расшифрованы") - + unexplained_abbr_with_slides = {} for slide_num, slide_text in enumerate(slides_text, 1): @@ -47,13 +42,4 @@ def check(self): except Exception as e: return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}") - - def _find_abbreviation_slides(self, abbr: str, slides_text: list) -> list: - found_slides = [] - - for slide_num, slide_text in enumerate(slides_text, 1): - pattern = rf'\b{re.escape(abbr)}\b' - if re.search(pattern, slide_text, re.IGNORECASE): - found_slides.append(slide_num) - - return found_slides + \ No newline at end of file diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py index bf5d475c..d66f5cac 100644 --- a/app/main/checks/report_checks/__init__.py +++ b/app/main/checks/report_checks/__init__.py @@ -13,12 +13,10 @@ from .page_counter import ReportPageCounter from .right_words_check import ReportRightWordsCheck from .section_component import ReportSectionComponent -from .sections_check import LRReportSectionCheck from .short_sections_check import ReportShortSectionsCheck from .simple_check import ReportSimpleCheck from .style_check_settings import StyleCheckSettings from .find_theme_in_report import FindThemeInReport -from .headers_at_page_top_check import ReportHeadersAtPageTopCheck from .sections_check import LRReportSectionCheck from .style_check import ReportStyleCheck from .spelling_check import SpellingCheck @@ -32,6 +30,5 @@ from .sw_section_size import SWSectionSizeCheck from .sw_keywords_check import SWKeywordsCheck from .task_tracker import ReportTaskTracker -from .abbreviations_check import AbbreviationsCheckPres from .paragraphs_count_check import ReportParagraphsCountCheck -from .template_name import ReportTemplateNameCheck +from .abbreviations_check import ReportAbbreviationsCheck diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index 20db6fac..593aa810 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -1,10 +1,10 @@ from ..base_check import BaseReportCriterion, answer from ..check_abbreviations import get_unexplained_abbrev -class AbbreviationsCheckPres(BaseReportCriterion): +class ReportAbbreviationsCheck(BaseReportCriterion): label = "Проверка расшифровки аббревиатур" description = "Все аббревиатуры должны быть расшифрованы при первом использовании" - id = 'abbreviations_check' + id = 'report_abbreviations_check' def __init__(self, file_info): super().__init__(file_info) From 96cb746b2cc637ddfca18ac2fbaaee875cb740f7 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Mon, 26 Jan 2026 18:06:37 +0300 Subject: [PATCH 26/28] all requier applyied-3 --- app/main/checks/check_abbreviations.py | 13 ++++----- .../abbreviations_presentation.py | 22 +++++---------- .../report_checks/abbreviations_check.py | 27 +++++-------------- 3 files changed, 18 insertions(+), 44 deletions(-) diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py index 15750293..cd2579ff 100644 --- a/app/main/checks/check_abbreviations.py +++ b/app/main/checks/check_abbreviations.py @@ -16,9 +16,6 @@ def get_unexplained_abbrev(text): return True, unexplained_abbr - - - def find_abbreviations(text: str): pattern = r'\b[А-ЯA-Z]{2,5}\b' abbreviations = re.findall(pattern, text) @@ -43,12 +40,11 @@ def find_abbreviations(text: str): 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP', 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS', 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', - 'MAC', 'IBM', 'ГОСТ' + 'MAC', 'IBM', 'ГОСТ', 'ООП', 'ЛР', 'КР', 'ОТЧЕТ' } - filtered_abbr = [abbr for abbr in abbreviations if abbr not in common_abbr and morph.parse(abbr.lower())[0].score != 0] + filtered_abbr = {abbr for abbr in abbreviations if abbr not in common_abbr and morph.parse(abbr.lower())[0].score != 0} - return list(set(filtered_abbr)) - + return list(filtered_abbr) def is_abbreviation_explained(abbr: str, text: str) -> bool: patterns = [ @@ -73,7 +69,8 @@ def correctly_explained(abbr, explan): first_letters = "" for word in words: - first_letters += word[0].upper() + if word: + first_letters += word[0].upper() return first_letters == abbr.upper() diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index 4dad0cc8..2cb9d4d3 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -1,6 +1,6 @@ import re from ..base_check import BasePresCriterion, answer -from ..check_abbreviations import get_unexplained_abbrev +from ..check_abbreviations import main_check, forming_response class PresAbbreviationsCheck(BasePresCriterion): @@ -14,15 +14,13 @@ def __init__(self, file_info): def check(self): try: slides_text = self.file.get_text_from_slides() - - if not slides_text: - return answer(False, "Не удалось получить текст презентации") - full_text = " ".join(slides_text) - abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=full_text) + continue_check, res_str, unexplained_abbr = main_check(text=full_text) + if not continue_check: + return answer(True, res_str) + - unexplained_abbr_with_slides = {} for slide_num, slide_text in enumerate(slides_text, 1): @@ -30,16 +28,8 @@ def check(self): if abbr in slide_text and abbr not in unexplained_abbr_with_slides: unexplained_abbr_with_slides[abbr] = slide_num - result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:
    " - slide_links = self.format_page_link(list(unexplained_abbr_with_slides.values())) - for index_links, abbr in enumerate(unexplained_abbr_with_slides): - result_str += f"- {abbr} на слайде {slide_links[index_links]}
    " - - result_str += "
    Каждая аббревиатура должна быть расшифрована при первом использовании в презентации.
    " - result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.
    " - + result_str = forming_response(unexplained_abbr_with_slides, lambda pages: self.format_page_link(pages)) return answer(False, result_str) except Exception as e: return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}") - \ No newline at end of file diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index 593aa810..49560721 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -1,5 +1,6 @@ from ..base_check import BaseReportCriterion, answer -from ..check_abbreviations import get_unexplained_abbrev +from ..check_abbreviations import main_check, forming_response + class ReportAbbreviationsCheck(BaseReportCriterion): label = "Проверка расшифровки аббревиатур" @@ -14,17 +15,10 @@ def check(self): try: text = self._get_document_text() - if not text: - return answer(False, "Не удалось получить текст документа") - - abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text) + continue_check, res_str, unexplained_abbr = main_check(text=text) + if not continue_check: + return answer(True, res_str) - if not abbr_is_finding: - return answer(True, "Аббревиатуры не найдены в документе") - - if not unexplained_abbr: - return answer(True, "Все аббревиатуры правильно расшифрованы") - unexplained_abbr_with_page = {} for page_num in range(1, self.file.page_counter() + 1): @@ -33,15 +27,7 @@ def check(self): for abbr in unexplained_abbr: if abbr in text_on_page and abbr not in unexplained_abbr_with_page: unexplained_abbr_with_page[abbr] = page_num - - - result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:
    " - page_links = self.format_page_link(list(unexplained_abbr_with_page.values())) - for index_links, abbr in enumerate(unexplained_abbr_with_page): - result_str += f"- {abbr} на странице {page_links[index_links]}
    " - result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.
    " - result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.
    " - + result_str = forming_response(unexplained_abbr_with_page, lambda pages: self.format_page_link(pages)) return answer(False, result_str) except Exception as e: @@ -63,3 +49,4 @@ def _get_document_text(self): text_parts.append(text) return "\n".join(text_parts) return None + \ No newline at end of file From d26fcd38972bef27f2609c8bfbf82c94e7d996a9 Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Sat, 7 Feb 2026 12:01:24 +0300 Subject: [PATCH 27/28] add case for title page --- app/main/checks/check_abbreviations.py | 8 ++++---- .../presentation_checks/abbreviations_presentation.py | 3 ++- app/main/checks/report_checks/abbreviations_check.py | 5 +++-- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py index cd2579ff..fe05e9cb 100644 --- a/app/main/checks/check_abbreviations.py +++ b/app/main/checks/check_abbreviations.py @@ -3,7 +3,7 @@ morph = MorphAnalyzer() -def get_unexplained_abbrev(text): +def get_unexplained_abbrev(text, title_page): abbreviations = find_abbreviations(text) if not abbreviations: @@ -11,7 +11,7 @@ def get_unexplained_abbrev(text): unexplained_abbr = [] for abbr in abbreviations: - if not is_abbreviation_explained(abbr, text): + if not is_abbreviation_explained(abbr, text) and not abbr in title_page: unexplained_abbr.append(abbr) return True, unexplained_abbr @@ -74,14 +74,14 @@ def correctly_explained(abbr, explan): return first_letters == abbr.upper() -def main_check(text: str): +def main_check(text: str, title_page: str): try: continue_check = True res_str = "" if not text: continue_check, res_str = False, "Не удалось получить текст" - abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text) + abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text, title_page=title_page) if not abbr_is_finding: continue_check, res_str = False, "Аббревиатуры не найдены в представленном документе" diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index 2cb9d4d3..0ed75905 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -14,9 +14,10 @@ def __init__(self, file_info): def check(self): try: slides_text = self.file.get_text_from_slides() + title_page = slides_text[0] full_text = " ".join(slides_text) - continue_check, res_str, unexplained_abbr = main_check(text=full_text) + continue_check, res_str, unexplained_abbr = main_check(text=full_text, title_page=title_page) if not continue_check: return answer(True, res_str) diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index 49560721..b09f165a 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -14,8 +14,9 @@ def __init__(self, file_info): def check(self): try: text = self._get_document_text() - - continue_check, res_str, unexplained_abbr = main_check(text=text) + title_page = self.file.pdf_file.text_on_page[0] + + continue_check, res_str, unexplained_abbr = main_check(text=text, title_page=title_page) if not continue_check: return answer(True, res_str) From fd19063079e02beda3faae638f9e06e7b99a988d Mon Sep 17 00:00:00 2001 From: LapshinAE0 Date: Tue, 10 Feb 2026 12:36:32 +0300 Subject: [PATCH 28/28] dont check abbr title page --- app/main/checks/check_abbreviations.py | 13 +++++++------ .../abbreviations_presentation.py | 5 ++++- .../checks/report_checks/abbreviations_check.py | 5 ++++- 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py index fe05e9cb..0fd0ff4d 100644 --- a/app/main/checks/check_abbreviations.py +++ b/app/main/checks/check_abbreviations.py @@ -4,19 +4,19 @@ def get_unexplained_abbrev(text, title_page): - abbreviations = find_abbreviations(text) + abbreviations = find_abbreviations(text, title_page) if not abbreviations: - return False, None + return False, [] unexplained_abbr = [] for abbr in abbreviations: - if not is_abbreviation_explained(abbr, text) and not abbr in title_page: + if not is_abbreviation_explained(abbr, text): unexplained_abbr.append(abbr) return True, unexplained_abbr -def find_abbreviations(text: str): +def find_abbreviations(text: str, title_page: str): pattern = r'\b[А-ЯA-Z]{2,5}\b' abbreviations = re.findall(pattern, text) @@ -28,7 +28,7 @@ def find_abbreviations(text: str): 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP', 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL', 'CS', 'DS', 'ES', 'SS', 'FS', 'GS', - 'IP', 'EIP', 'RIP', + 'IP', 'EIP', 'RIP', 'URL', 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF', 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP', 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP', @@ -42,7 +42,8 @@ def find_abbreviations(text: str): 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML', 'MAC', 'IBM', 'ГОСТ', 'ООП', 'ЛР', 'КР', 'ОТЧЕТ' } - filtered_abbr = {abbr for abbr in abbreviations if abbr not in common_abbr and morph.parse(abbr.lower())[0].score != 0} + filtered_abbr = {abbr for abbr in abbreviations if abbr not in common_abbr \ + and abbr not in title_page and morph.parse(abbr.lower())[0].score != 0} return list(filtered_abbr) diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py index 0ed75905..1dafefdd 100644 --- a/app/main/checks/presentation_checks/abbreviations_presentation.py +++ b/app/main/checks/presentation_checks/abbreviations_presentation.py @@ -28,7 +28,10 @@ def check(self): for abbr in unexplained_abbr: if abbr in slide_text and abbr not in unexplained_abbr_with_slides: unexplained_abbr_with_slides[abbr] = slide_num - + + if not unexplained_abbr_with_slides: + return answer(True, "Все аббревиатуры правильно расшифрованы") + result_str = forming_response(unexplained_abbr_with_slides, lambda pages: self.format_page_link(pages)) return answer(False, result_str) diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py index b09f165a..906378e0 100644 --- a/app/main/checks/report_checks/abbreviations_check.py +++ b/app/main/checks/report_checks/abbreviations_check.py @@ -14,7 +14,7 @@ def __init__(self, file_info): def check(self): try: text = self._get_document_text() - title_page = self.file.pdf_file.text_on_page[0] + title_page = self.file.pdf_file.text_on_page[1] continue_check, res_str, unexplained_abbr = main_check(text=text, title_page=title_page) if not continue_check: @@ -28,6 +28,9 @@ def check(self): for abbr in unexplained_abbr: if abbr in text_on_page and abbr not in unexplained_abbr_with_page: unexplained_abbr_with_page[abbr] = page_num + + if not unexplained_abbr_with_page: + return answer(True, "Все аббревиатуры правильно расшифрованы") result_str = forming_response(unexplained_abbr_with_page, lambda pages: self.format_page_link(pages)) return answer(False, result_str)