Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
7316257
Checks abbreviations press and reports
LapshinAE0 Nov 21, 2025
c89d506
Fixed if present check
LapshinAE0 Nov 21, 2025
8394b5a
abbr taken out
LapshinAE0 Dec 1, 2025
975b4d2
correct check abbr
LapshinAE0 Dec 1, 2025
280d9cd
fixed checks 1
LapshinAE0 Dec 4, 2025
7fe11be
fixed checks 2
LapshinAE0 Dec 4, 2025
edd07ce
fixed checks 3
LapshinAE0 Dec 4, 2025
d696932
Merge branch 'master' into 555_check_abbreviations
LapshinAE0 Dec 4, 2025
ab8f2f7
Update banned_words_in_literature.py
HadronCollider Dec 5, 2025
331a121
fix svg size and background color
HadronCollider Nov 20, 2025
ac65e06
fix dev docker compose
HadronCollider Nov 20, 2025
c69548c
print traceback to logs (and check result)
HadronCollider Nov 20, 2025
bbbf47d
little change for svg size
HadronCollider Nov 20, 2025
31371af
add more feedback for UNEXPECTED_CHECK_FAIL_MSG
HadronCollider Nov 20, 2025
df61abd
little kostil'
HadronCollider Nov 20, 2025
c90d82d
update template results.html
HadronCollider Nov 28, 2025
3269e53
569: fix big files in webpack
necit-dev Jul 7, 2024
bdfefdf
update some html-templates and styles
HadronCollider Nov 30, 2025
eece7ea
update Dockerfiles (base and main), requirements and some python-libs
HadronCollider Nov 30, 2025
8280103
update main_character_check
HadronCollider Dec 8, 2025
c88f47e
add recheck test
HadronCollider Dec 8, 2025
495b68d
Update style_check_settings.py
HadronCollider Dec 10, 2025
dcad677
add warned_words for banned_words_check
HadronCollider Dec 17, 2025
8b5865e
add login_required and author check for result page
HadronCollider Dec 26, 2025
02b70ba
little update for 404 page
HadronCollider Dec 26, 2025
9a54360
all requier applyied-2
LapshinAE0 Jan 26, 2026
96cb746
all requier applyied-3
LapshinAE0 Jan 26, 2026
d2d09a6
Merge branch 'dev' into 555_check_abbreviations
LapshinAE0 Jan 29, 2026
d26fcd3
add case for title page
LapshinAE0 Feb 7, 2026
fd19063
dont check abbr title page
LapshinAE0 Feb 10, 2026
467bef5
Merge branch 'dev' into 555_check_abbreviations
HadronCollider Feb 15, 2026
f1a8f16
Merge branch 'dev' into 555_check_abbreviations
HadronCollider Feb 15, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
['pres_image_capture'],
['task_tracker'],
['overview_in_tasks'],
['pres_abbreviations_check'],
]
BASE_REPORT_CRITERION = [
["simple_check"],
Expand Down Expand Up @@ -50,6 +51,7 @@
["empty_task_page_check"],
["water_in_the_text_check"],
["report_task_tracker"],
["report_abbreviations_check"],
]

DEFAULT_TYPE = 'pres'
Expand Down
105 changes: 105 additions & 0 deletions app/main/checks/check_abbreviations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import re
from pymorphy3 import MorphAnalyzer
morph = MorphAnalyzer()


def get_unexplained_abbrev(text, title_page):
abbreviations = find_abbreviations(text, title_page)

if not abbreviations:
return False, []

unexplained_abbr = []
for abbr in abbreviations:
if not is_abbreviation_explained(abbr, text):
unexplained_abbr.append(abbr)

return True, unexplained_abbr

def find_abbreviations(text: str, title_page: str):
pattern = r'\b[А-ЯA-Z]{2,5}\b'
abbreviations = re.findall(pattern, text)

common_abbr = {
'СССР', 'РФ', 'США', 'ВКР', 'ИТ', 'ПО', 'ООО', 'ЗАО', 'ОАО', 'HTML', 'CSS',
'JS', 'ЛЭТИ', 'МОЕВМ', 'ЭВМ', 'ГОСТ', 'DVD'

'SSD', 'PC', 'HDD',
'AX', 'BX', 'CX', 'DX', 'SI', 'DI', 'BP', 'SP',
'AH', 'AL', 'BH', 'BL', 'CH', 'CL', 'DH', 'DL',
'CS', 'DS', 'ES', 'SS', 'FS', 'GS',
'IP', 'EIP', 'RIP', 'URL',
'CF', 'PF', 'AF', 'ZF', 'SF', 'TF', 'IF', 'DF', 'OF',
'EAX', 'EBX', 'ECX', 'EDX', 'ESI', 'EDI', 'EBP', 'ESP',
'RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP',
'DOS', 'OS', 'BIOS', 'UEFI', 'MBR', 'GPT',
'ASCII', 'UTF', 'UNICODE', 'ANSI',
'ЭВМ', 'МОЭВМ',
'CPU', 'GPU', 'APU', 'RAM', 'ROM', 'PROM', 'EPROM', 'EEPROM',
'USB', 'SATA', 'PCI', 'PCIe', 'AGP', 'ISA', 'VGA', 'HDMI', 'DP',
'LAN', 'WAN', 'WLAN', 'VPN', 'ISP', 'DNS', 'DHCP', 'TCP', 'UDP', 'IP',
'HTTP', 'HTTPS', 'FTP', 'SSH', 'SSL', 'TLS',
'API', 'GUI', 'CLI', 'IDE', 'SDK', 'SQL', 'NoSQL', 'XML', 'JSON', 'YAML',
'MAC', 'IBM', 'ГОСТ', 'ООП', 'ЛР', 'КР', 'ОТЧЕТ'
}
filtered_abbr = {abbr for abbr in abbreviations if abbr not in common_abbr \
and abbr not in title_page and morph.parse(abbr.lower())[0].score != 0}

return list(filtered_abbr)

def is_abbreviation_explained(abbr: str, text: str) -> bool:
patterns = [
rf'{abbr}\s*\(([^)]+)\)', # АААА (расшифровка)
rf'\(([^)]+)\)\s*{abbr}', # (расшифровка) АААА
rf'{abbr}\s*[—\-]\s*([^.,;!?]+)', # АААА — расшифровка
rf'{abbr}\s*-\s*([^.,;!?]+)', # АААА - расшифровка
rf'([^.,;!?]+)\s*[—\-]\s*{abbr}', # расшифровка — АААА
rf'([^.,;!?]+)\s*-\s*{abbr}' # расшифровка - АААА
]


for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match and correctly_explained(abbr, match.group(1)):
return True

return False

def correctly_explained(abbr, explan):
words = explan.split()

first_letters = ""
for word in words:
if word:
first_letters += word[0].upper()

return first_letters == abbr.upper()

def main_check(text: str, title_page: str):
try:
continue_check = True
res_str = ""
if not text:
continue_check, res_str = False, "Не удалось получить текст"

abbr_is_finding, unexplained_abbr = get_unexplained_abbrev(text=text, title_page=title_page)

if not abbr_is_finding:
continue_check, res_str = False, "Аббревиатуры не найдены в представленном документе"

if not unexplained_abbr:
continue_check, res_str = False, "Все аббревиатуры правильно расшифрованы"

return continue_check, res_str, unexplained_abbr

except Exception as e:
return False, f"Ошибка при проверке аббревиатур: {str(e)}", {}

def forming_response(unexplained_abbr_with_page, format_page_link):
result_str = "Найдены нерасшифрованные аббревиатуры при первом использовании:<br>"
page_links = format_page_link(list(unexplained_abbr_with_page.values()))
for index_links, abbr in enumerate(unexplained_abbr_with_page):
result_str += f"- {abbr} на {page_links[index_links]} странице/слайде<br>"
result_str += "Каждая аббревиатура должна быть расшифрована при первом использовании в тексте.<br>"
result_str += "Расшифровка должны быть по первыми буквам, например, МВД - Министерство внутренних дел.<br>"
return result_str
3 changes: 2 additions & 1 deletion app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,5 @@
from .name_of_image_check import PresImageCaptureCheck
from .task_tracker import TaskTracker
from .overview_in_tasks import OverviewInTasks
from .decimal_places import PresDecimalPlacesCheck
from .abbreviations_presentation import PresAbbreviationsCheck
from .decimal_places import PresDecimalPlacesCheck
39 changes: 39 additions & 0 deletions app/main/checks/presentation_checks/abbreviations_presentation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import re
from ..base_check import BasePresCriterion, answer
from ..check_abbreviations import main_check, forming_response


class PresAbbreviationsCheck(BasePresCriterion):
label = "Проверка расшифровки аббревиатур в презентации"
description = "Все аббревиатуры должны быть расшифрованы при первом использовании"
id = 'pres_abbreviations_check'

def __init__(self, file_info):
super().__init__(file_info)

def check(self):
try:
slides_text = self.file.get_text_from_slides()
title_page = slides_text[0]
full_text = " ".join(slides_text)

continue_check, res_str, unexplained_abbr = main_check(text=full_text, title_page=title_page)
if not continue_check:
return answer(True, res_str)


unexplained_abbr_with_slides = {}

for slide_num, slide_text in enumerate(slides_text, 1):
for abbr in unexplained_abbr:
if abbr in slide_text and abbr not in unexplained_abbr_with_slides:
unexplained_abbr_with_slides[abbr] = slide_num

if not unexplained_abbr_with_slides:
return answer(True, "Все аббревиатуры правильно расшифрованы")

result_str = forming_response(unexplained_abbr_with_slides, lambda pages: self.format_page_link(pages))
return answer(False, result_str)

except Exception as e:
return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}")
6 changes: 2 additions & 4 deletions app/main/checks/report_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
from .page_counter import ReportPageCounter
from .right_words_check import ReportRightWordsCheck
from .section_component import ReportSectionComponent
from .sections_check import LRReportSectionCheck
from .short_sections_check import ReportShortSectionsCheck
from .simple_check import ReportSimpleCheck
from .style_check_settings import StyleCheckSettings
from .find_theme_in_report import FindThemeInReport
from .headers_at_page_top_check import ReportHeadersAtPageTopCheck
from .sections_check import LRReportSectionCheck
from .style_check import ReportStyleCheck
from .spelling_check import SpellingCheck
Expand All @@ -33,5 +31,5 @@
from .sw_keywords_check import SWKeywordsCheck
from .task_tracker import ReportTaskTracker
from .paragraphs_count_check import ReportParagraphsCountCheck
from .template_name import ReportTemplateNameCheck
from .decimal_places import ReportDecimalPlacesCheck
from .abbreviations_check import ReportAbbreviationsCheck
from .decimal_places import ReportDecimalPlacesCheck
56 changes: 56 additions & 0 deletions app/main/checks/report_checks/abbreviations_check.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from ..base_check import BaseReportCriterion, answer
from ..check_abbreviations import main_check, forming_response


class ReportAbbreviationsCheck(BaseReportCriterion):
label = "Проверка расшифровки аббревиатур"
description = "Все аббревиатуры должны быть расшифрованы при первом использовании"
id = 'report_abbreviations_check'

def __init__(self, file_info):
super().__init__(file_info)


def check(self):
try:
text = self._get_document_text()
title_page = self.file.pdf_file.text_on_page[1]

continue_check, res_str, unexplained_abbr = main_check(text=text, title_page=title_page)
if not continue_check:
return answer(True, res_str)

unexplained_abbr_with_page = {}

for page_num in range(1, self.file.page_counter() + 1):
text_on_page = self.file.pdf_file.text_on_page[page_num]

for abbr in unexplained_abbr:
if abbr in text_on_page and abbr not in unexplained_abbr_with_page:
unexplained_abbr_with_page[abbr] = page_num

if not unexplained_abbr_with_page:
return answer(True, "Все аббревиатуры правильно расшифрованы")
result_str = forming_response(unexplained_abbr_with_page, lambda pages: self.format_page_link(pages))
return answer(False, result_str)
Comment on lines +16 to +35
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

С учетом, что данный код 1 в 1 дублируется в обоих критериях (за исключением строк с указанием документа/презентации и получения данных), его стоит вынести в отдельную функцию/модуль


except Exception as e:
return answer(False, f"Ошибка при проверке аббревиатур: {str(e)}")



def _get_document_text(self):

if hasattr(self.file, 'pdf_file'):
page_texts = self.file.pdf_file.get_text_on_page()
return " ".join(page_texts.values())
elif hasattr(self.file, 'paragraphs'):
text_parts = []
for paragraph in self.file.paragraphs:
text = paragraph.to_string()
if '\n' in text:
text = text.split('\n')[1]
text_parts.append(text)
return "\n".join(text_parts)
return None