diff --git a/app/main/check_packs/pack_config.py b/app/main/check_packs/pack_config.py
index 91e08134..db2e4ef2 100644
--- a/app/main/check_packs/pack_config.py
+++ b/app/main/check_packs/pack_config.py
@@ -22,6 +22,7 @@
['pres_image_capture'],
['task_tracker'],
['overview_in_tasks'],
+ ['pres_abbreviations_check'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
@@ -50,6 +51,7 @@
["empty_task_page_check"],
["water_in_the_text_check"],
["report_task_tracker"],
+ ["report_abbreviations_check"],
]
DEFAULT_TYPE = 'pres'
diff --git a/app/main/checks/check_abbreviations.py b/app/main/checks/check_abbreviations.py
new file mode 100644
index 00000000..0fd0ff4d
--- /dev/null
+++ b/app/main/checks/check_abbreviations.py
@@ -0,0 +1,105 @@
+import re
+from pymorphy3 import MorphAnalyzer
+morph = MorphAnalyzer()
+
+
+def get_unexplained_abbrev(text, title_page):
+ abbreviations = find_abbreviations(text, title_page)
+
+ if not abbreviations:
+ return False, []
+
+ unexplained_abbr = []
+ for abbr in abbreviations:
+ if not is_abbreviation_explained(abbr, text):
+ unexplained_abbr.append(abbr)
+
+ return True, unexplained_abbr
+
+def find_abbreviations(text: str, title_page: str):
+ pattern = r'\b[А-ЯA-Z]{2,5}\b'
+ abbreviations = re.findall(pattern, text)
+
+ common_abbr = {
+ 'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS',
+ 'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'ГОСТ', 'DVD'
+
+ 'SSD', 'PC', 'HDD',
+ 'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP',
+ 'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL',
+ 'CS', 'DS', 'ES', 'SS', 'FS', 'GS',
+ 'IP', 'EIP', 'RIP', 'URL',
+ 'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF',
+ 'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP',
+ 'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP',
+ 'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT',
+ 'ASCII', 'UTF', 'UNICODE', 'ANSI',
+ 'ЭВМ', 'МОЭВМ',
+ 'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM',
+ 'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP',
+ 'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP',
+ 'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS',
+ 'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML',
+ 'MAC', 'IBM', 'ГОСТ', 'ООП', 'ЛР', 'КР', 'ОТЧЕТ'
+ }
+ filtered_abbr = {abbr for abbr in abbreviations if abbr not in common_abbr \
+ and abbr not in title_page and morph.parse(abbr.lower())[0].score != 0}
+
+ return list(filtered_abbr)
+
+def is_abbreviation_explained(abbr: str, text: str) -> bool:
+ patterns = [
+ rf'{abbr}\s*\(([^)]+)\)', # АААА (расшифровка)
+ rf'\(([^)]+)\)\s*{abbr}', # (расшифровка) АААА
+ rf'{abbr}\s*[—\-]\s*([^.,;!?]+)', # АААА — расшифровка
+ rf'{abbr}\s*-\s*([^.,;!?]+)', # АААА - расшифровка
+ rf'([^.,;!?]+)\s*[—\-]\s*{abbr}', # расшифровка — АААА
+ rf'([^.,;!?]+)\s*-\s*{abbr}' # расшифровка - АААА
+ ]
+
+
+ for pattern in patterns:
+ match = re.search(pattern, text, re.IGNORECASE)
+ if match and correctly_explained(abbr, match.group(1)):
+ return True
+
+ return False
+
+def correctly_explained(abbr, explan):
+ words = explan.split()
+
+ first_letters = ""
+ for word in words:
+ if word:
+ first_letters += word[0].upper()
+
+ return first_letters == abbr.upper()
+
+def main_check(text: str, title_page: str):
+ try:
+ continue_check = True
+ res_str = ""
+ if not text:
+ continue_check, res_str = False, "Не удалось получить текст"
+
+ abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text, title_page=title_page)
+
+ if not abbr_is_finding:
+ continue_check, res_str = False, "Аббревиатуры не найдены в представленном документе"
+
+ if not unexplained_abbr:
+ continue_check, res_str = False, "Все аббревиатуры правильно расшифрованы"
+
+ return continue_check, res_str, unexplained_abbr
+
+ except Exception as e:
+ return False, f"Ошибка при проверке аббревиатур: {str(e)}", {}
+
+def forming_response(unexplained_abbr_with_page, format_page_link):
+ result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:
"
+ page_links = format_page_link(list(unexplained_abbr_with_page.values()))
+ for index_links, abbr in enumerate(unexplained_abbr_with_page):
+ result_str += f"- {abbr} на {page_links[index_links]} странице/слайде
"
+ result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.
"
+ result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.
"
+ return result_str
diff --git a/app/main/checks/presentation_checks/__init__.py b/app/main/checks/presentation_checks/__init__.py
index 8a0a64fb..288546f1 100644
--- a/app/main/checks/presentation_checks/__init__.py
+++ b/app/main/checks/presentation_checks/__init__.py
@@ -17,4 +17,5 @@
from .name_of_image_check import PresImageCaptureCheck
from .task_tracker import TaskTracker
from .overview_in_tasks import OverviewInTasks
-from .decimal_places import PresDecimalPlacesCheck
\ No newline at end of file
+from .abbreviations_presentation import PresAbbreviationsCheck
+from .decimal_places import PresDecimalPlacesCheck
diff --git a/app/main/checks/presentation_checks/abbreviations_presentation.py b/app/main/checks/presentation_checks/abbreviations_presentation.py
new file mode 100644
index 00000000..1dafefdd
--- /dev/null
+++ b/app/main/checks/presentation_checks/abbreviations_presentation.py
@@ -0,0 +1,39 @@
+import re
+from ..base_check import BasePresCriterion, answer
+from ..check_abbreviations import main_check, forming_response
+
+
+class PresAbbreviationsCheck(BasePresCriterion):
+ label = "Проверка расшифровки аббревиатур в презентации"
+ description = "Все аббревиатуры должны быть расшифрованы при первом использовании"
+ id = 'pres_abbreviations_check'
+
+ def __init__(self, file_info):
+ super().__init__(file_info)
+
+ def check(self):
+ try:
+ slides_text = self.file.get_text_from_slides()
+ title_page = slides_text[0]
+ full_text = " ".join(slides_text)
+
+ continue_check, res_str, unexplained_abbr = main_check(text=full_text, title_page=title_page)
+ if not continue_check:
+ return answer(True, res_str)
+
+
+ unexplained_abbr_with_slides = {}
+
+ for slide_num, slide_text in enumerate(slides_text, 1):
+ for abbr in unexplained_abbr:
+ if abbr in slide_text and abbr not in unexplained_abbr_with_slides:
+ unexplained_abbr_with_slides[abbr] = slide_num
+
+ if not unexplained_abbr_with_slides:
+ return answer(True, "Все аббревиатуры правильно расшифрованы")
+
+ result_str = forming_response(unexplained_abbr_with_slides, lambda pages: self.format_page_link(pages))
+ return answer(False, result_str)
+
+ except Exception as e:
+ return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}")
diff --git a/app/main/checks/report_checks/__init__.py b/app/main/checks/report_checks/__init__.py
index 7b1b974b..96b3fab0 100644
--- a/app/main/checks/report_checks/__init__.py
+++ b/app/main/checks/report_checks/__init__.py
@@ -13,12 +13,10 @@
from .page_counter import ReportPageCounter
from .right_words_check import ReportRightWordsCheck
from .section_component import ReportSectionComponent
-from .sections_check import LRReportSectionCheck
from .short_sections_check import ReportShortSectionsCheck
from .simple_check import ReportSimpleCheck
from .style_check_settings import StyleCheckSettings
from .find_theme_in_report import FindThemeInReport
-from .headers_at_page_top_check import ReportHeadersAtPageTopCheck
from .sections_check import LRReportSectionCheck
from .style_check import ReportStyleCheck
from .spelling_check import SpellingCheck
@@ -33,5 +31,5 @@
from .sw_keywords_check import SWKeywordsCheck
from .task_tracker import ReportTaskTracker
from .paragraphs_count_check import ReportParagraphsCountCheck
-from .template_name import ReportTemplateNameCheck
-from .decimal_places import ReportDecimalPlacesCheck
\ No newline at end of file
+from .abbreviations_check import ReportAbbreviationsCheck
+from .decimal_places import ReportDecimalPlacesCheck
diff --git a/app/main/checks/report_checks/abbreviations_check.py b/app/main/checks/report_checks/abbreviations_check.py
new file mode 100644
index 00000000..906378e0
--- /dev/null
+++ b/app/main/checks/report_checks/abbreviations_check.py
@@ -0,0 +1,56 @@
+from ..base_check import BaseReportCriterion, answer
+from ..check_abbreviations import main_check, forming_response
+
+
+class ReportAbbreviationsCheck(BaseReportCriterion):
+ label = "Проверка расшифровки аббревиатур"
+ description = "Все аббревиатуры должны быть расшифрованы при первом использовании"
+ id = 'report_abbreviations_check'
+
+ def __init__(self, file_info):
+ super().__init__(file_info)
+
+
+ def check(self):
+ try:
+ text = self._get_document_text()
+ title_page = self.file.pdf_file.text_on_page[1]
+
+ continue_check, res_str, unexplained_abbr = main_check(text=text, title_page=title_page)
+ if not continue_check:
+ return answer(True, res_str)
+
+ unexplained_abbr_with_page = {}
+
+ for page_num in range(1, self.file.page_counter() + 1):
+ text_on_page = self.file.pdf_file.text_on_page[page_num]
+
+ for abbr in unexplained_abbr:
+ if abbr in text_on_page and abbr not in unexplained_abbr_with_page:
+ unexplained_abbr_with_page[abbr] = page_num
+
+ if not unexplained_abbr_with_page:
+ return answer(True, "Все аббревиатуры правильно расшифрованы")
+ result_str = forming_response(unexplained_abbr_with_page, lambda pages: self.format_page_link(pages))
+ return answer(False, result_str)
+
+ except Exception as e:
+ return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}")
+
+
+
+ def _get_document_text(self):
+
+ if hasattr(self.file, 'pdf_file'):
+ page_texts = self.file.pdf_file.get_text_on_page()
+ return " ".join(page_texts.values())
+ elif hasattr(self.file, 'paragraphs'):
+ text_parts = []
+ for paragraph in self.file.paragraphs:
+ text = paragraph.to_string()
+ if '\n' in text:
+ text = text.split('\n')[1]
+ text_parts.append(text)
+ return "\n".join(text_parts)
+ return None
+
\ No newline at end of file