Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
d62b05c
751_decimal_places_limit
baydakov-georgiy Nov 15, 2025
5a9f252
fixed doc-string
baydakov-georgiy Nov 15, 2025
ae1f290
Merge branch 'dev' into 751_decimal_places_limit
HadronCollider Dec 6, 2025
ecee0d7
update main_character_check
HadronCollider Dec 8, 2025
266f919
add recheck test
HadronCollider Dec 8, 2025
5d82440
Update style_check_settings.py
HadronCollider Dec 10, 2025
fe9f649
add warned_words for banned_words_check
HadronCollider Dec 17, 2025
d7121dd
add login_required and author check for result page
HadronCollider Dec 26, 2025
125ea65
little update for 404 page
HadronCollider Dec 26, 2025
452a797
Update results.py
HadronCollider Jan 28, 2026
77aeae2
check 3 label is done
LapshinAE0 Jan 28, 2026
4dc44cb
rm captcha logic
HadronCollider Feb 9, 2026
4c49018
little fix in check description / feedback
HadronCollider Feb 10, 2026
70360ed
Update checks for LR compability
HadronCollider Feb 10, 2026
7dc0aa9
update raw_criterions using
HadronCollider Feb 10, 2026
446bf54
Merge branch 'dev' into 751_decimal_places_limit
baydakov-georgiy Feb 12, 2026
ab70f51
bring out the general logic of pres and report criteria
baydakov-georgiy Feb 12, 2026
020b975
fixed page num for presentation violations
baydakov-georgiy Feb 13, 2026
35c5865
update .gitignore
HadronCollider Feb 15, 2026
2de95e1
update docker image tag in build.yml
HadronCollider Feb 15, 2026
ff99bf8
rm unused workflow
HadronCollider Feb 15, 2026
a1e9e7b
update pandas version (requirements.txt)
HadronCollider Feb 15, 2026
9eca4c2
Merge branch 'dev' into 751_decimal_places_limit
HadronCollider Feb 15, 2026
34125f0
fix ReportDecimalPlacesCheck
HadronCollider Feb 15, 2026
de41c28
Merge pull request #758 from moevm/751_decimal_places_limit
HadronCollider Feb 15, 2026
eed5f1d
Merge branch 'dev' into 768_content_objects_3level
HadronCollider Feb 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions .env_example
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
RECAPTCHA_SITE_KEY=123
RECAPTCHA_SECRET_KEY=123
SECRET_KEY=123
ADMIN_PASSWORD=admin
SIGNUP_PAGE_ENABLED=False
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ jobs:
- name: Build system images (non-pulling)
run: |
# build base image
docker build -f Dockerfile_base -t dvivanov/dis-base:v0.3 .
docker build -f Dockerfile_base -t dvivanov/dis-base:v0.5 .
- name: Build docker-compose
run: |
cp .env_example .env
Expand Down
16 changes: 0 additions & 16 deletions .github/workflows/collect_commits.yml

This file was deleted.

6 changes: 3 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ __pycache__/

.idea
venv
.venv
.vscode
*.pyc

files/*
Expand All @@ -14,7 +16,5 @@ node_modules
src/
.env

/VERSION.json
VERSION.json

app/main/mse22/converted_files/
/app/main/mse22/for_testing/test/.pytest_cache/
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
## Environment
- To `.env` in root:
```
RECAPTCHA_SITE_KEY=...
RECAPTCHA_SECRET_KEY=...
SECRET_KEY=...
ADMIN_PASSWORD=...
SIGNUP_PAGE_ENABLED=...
Expand Down
3 changes: 2 additions & 1 deletion app/main/check_packs/base_criterion_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def __init__(self, raw_criterions, file_type, min_score=1.0, name=None, **kwargs

def init(self, file_info):
# create criterion objects, ignore errors - validation was performed earlier
self.criterions, errors = init_criterions(self.raw_criterions, file_type=self.file_type, file_info=file_info)
file_info['file_type'] = self.file_type
self.criterions, errors = init_criterions(self.raw_criterions, file_info=file_info)

def check(self):
result = []
Expand Down
1 change: 1 addition & 0 deletions app/main/check_packs/pack_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
["empty_task_page_check"],
["water_in_the_text_check"],
["report_task_tracker"],
["report_3_level_in_content_check"],
]

DEFAULT_TYPE = 'pres'
Expand Down
3 changes: 2 additions & 1 deletion app/main/check_packs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
logger = getLogger('root_logger')


def init_criterions(criterions, file_type, file_info={}):
def init_criterions(criterions, file_info):
"""
criterions = [[criterion_id, criterion_params], ...] # criterion_params is dict
"""
try:
file_type = file_info['file_type']
existing_criterions = AVAILABLE_CHECKS.get(file_type['type'], {})
errors = []
initialized_checks = []
Expand Down
6 changes: 3 additions & 3 deletions app/main/checks/base_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@ def answer(mod, *args):
class BaseCriterion:
description = None
label = None
file_type = None
id = None
priority = False # if priority criterion is failed -> check is failed

def __init__(self, file_info):
self.file = file_info.get('file')
self.filename = file_info.get('filename', '')
self.pdf_id = file_info.get('pdf_id')
self.file_type = file_info.get('file_type')

def check(self):
raise NotImplementedError()
Expand All @@ -36,8 +36,8 @@ def name(self):


class BasePresCriterion(BaseCriterion):
file_type = 'pres'
pass


class BaseReportCriterion(BaseCriterion):
file_type = {'type': 'report', 'report_type': 'VKR'}
pass
1 change: 1 addition & 0 deletions app/main/checks/presentation_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,4 @@
from .name_of_image_check import PresImageCaptureCheck
from .task_tracker import TaskTracker
from .overview_in_tasks import OverviewInTasks
from .decimal_places import PresDecimalPlacesCheck
16 changes: 16 additions & 0 deletions app/main/checks/presentation_checks/decimal_places.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from app.utils.decimal_places_check import DecimalPlacesCheck
from ..base_check import BasePresCriterion, answer

class PresDecimalPlacesCheck(BasePresCriterion):
label = 'Проверка на избыточное количество десятичных знаков'
description = 'Проверка на избыточное количество десятичных знаков в числах'
id = 'decimal_places_check'

def __init__(self, file_info, max_decimal_places=2, max_violations=3):
super().__init__(file_info)
self.checker = DecimalPlacesCheck(file_info, max_decimal_places, max_violations)

def check(self):
total_violations, detected_pages = self.checker.find_violations_in_texts(enumerate(self.file.get_text_from_slides(), start=1))
result_str, result_score = self.checker.get_result_msg_and_score(total_violations, detected_pages, self.format_page_link)
return answer(result_score, result_str)
1 change: 0 additions & 1 deletion app/main/checks/presentation_checks/image_share.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,3 @@ def check(self):
ограничение - {round(self.limit, 2)}')
else:
return answer(True, f'Пройдена!')
return answer(False, 'Во время обработки произошла критическая ошибка')
2 changes: 2 additions & 0 deletions app/main/checks/report_checks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,5 @@
from .task_tracker import ReportTaskTracker
from .paragraphs_count_check import ReportParagraphsCountCheck
from .template_name import ReportTemplateNameCheck
from .check_chapters_3_level import ReportСhaptersLevel3ContentCheck
from .decimal_places import ReportDecimalPlacesCheck
45 changes: 27 additions & 18 deletions app/main/checks/report_checks/banned_words_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

class ReportBannedWordsCheck(BaseReportCriterion):
label = "Проверка наличия запретных слов в тексте отчёта"
description = 'Запрещено упоминание слова "мы"'
description = 'Запрещено упоминание определенных "опасных" слов'
id = 'banned_words_check'

def __init__(self, file_info, headers_map=None):
super().__init__(file_info)
self.words = []
self.warned_words = []
self.min_count = 0
self.max_count = 0
if headers_map:
Expand All @@ -21,42 +22,50 @@ def __init__(self, file_info, headers_map=None):
def late_init(self):
self.headers_main = self.file.get_main_headers(self.file_type['report_type'])
if self.headers_main in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']]
self.words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['banned_words']}
self.warned_words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['warned_words']}
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)[self.headers_main]['max_count_for_banned_words_check']
else:
if 'any_header' in StyleCheckSettings.CONFIGS.get(self.config):
self.words = [morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']]
self.words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['banned_words']}
self.warned_words = {morph.normal_forms(word)[0] for word in StyleCheckSettings.CONFIGS.get(self.config)['any_header']['warned_words']}
self.min_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['min_count_for_banned_words_check']
self.max_count = StyleCheckSettings.CONFIGS.get(self.config)['any_header']['max_count_for_banned_words_check']

def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
detected_lines = {}
result_str = f'<b>Запрещенные слова: {"; ".join(self.words)}</b><br>'
count = 0
banned_counter = {'words': self.words, 'detected_lines': {}, 'count': 0}
warned_counter = {'words': self.warned_words,'detected_lines': {}, 'count': 0}
for k, v in self.file.pdf_file.get_text_on_page().items():
lines_on_page = re.split(r'\n', v)
for index, line in enumerate(lines_on_page):
words_on_line = re.split(r'[^\w-]+', line)
words_on_line = [morph.normal_forms(word)[0] for word in words_on_line]
count_banned_words = set(words_on_line).intersection(self.words)
if count_banned_words:
count += len(count_banned_words)
if k not in detected_lines.keys():
detected_lines[k] = []
detected_lines[k].append(f'Строка {index + 1}: {line} <b>[{"; ".join(count_banned_words)}]</b>')
if len(detected_lines):
words_on_line = {morph.normal_forms(word)[0] for word in re.split(r'[^\w-]+', line)}
for counter in (banned_counter, warned_counter):
count_banned_words = words_on_line.intersection(counter['words'])
if count_banned_words:
counter['count'] += len(count_banned_words)
if k not in counter['detected_lines'].keys():
counter['detected_lines'][k] = []
counter['detected_lines'][k].append(f'Строка {index + 1}: {line} <b>[{"; ".join(count_banned_words)}]</b>')
if len(banned_counter['detected_lines']):
result_str += 'Обнаружены запретные слова! <br><br>'
for k, v in detected_lines.items():
result_str += f'Страница №{k}:<br>{"<br>".join(detected_lines[k])}<br><br>'
for k, v in banned_counter['detected_lines'].items():
result_str += f'Страница №{k}:<br>{"<br>".join(banned_counter['detected_lines'][k])}<br><br>'
else:
result_str = 'Пройдена!'

if len(warned_counter['detected_lines']):
result_str += f'<br><br>Обнаружены потенциально опасные слова (не влияют на результат проверки)!<br>Обратите внимание, что их использование возможно только в подтвержденных случаях: {"; ".join(self.warned_words)}<br><br>'
for k, v in warned_counter['detected_lines'].items():
result_str += f'Страница №{k}:<br>{"<br>".join(warned_counter['detected_lines'][k])}<br><br>'

result_score = 1
if count > self.min_count:
if count <= self.max_count:
if banned_counter['count'] > self.min_count:
if banned_counter['count'] <= self.max_count:
result_score = 0.5
else:
result_score = 0
Expand Down
3 changes: 2 additions & 1 deletion app/main/checks/report_checks/banned_words_in_literature.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def check(self):
if self.file.page_counter() < 4:
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
detected_words_dict = {}
# TODO: проверить совместимость / дублируемость LR и VKR
if self.file_type['report_type'] == 'LR':
list_of_literature = self.find_literature()
if len(list_of_literature) == 0:
Expand All @@ -51,7 +52,7 @@ def check(self):
else:
detected_words_dict[child_number] = banned_word
else:
return answer(False, 'Во время обработки произошла критическая ошибка')
return answer(False, 'Во время обработки произошла критическая ошибка - указан неверный тип работы в наборе критериев')
if detected_words_dict:
result_str = ""
for i in sorted(detected_words_dict.keys()):
Expand Down
65 changes: 31 additions & 34 deletions app/main/checks/report_checks/chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class ReportChapters(BaseReportCriterion):
def __init__(self, file_info):
super().__init__(file_info)
self.headers = []
self.target_styles = StyleCheckSettings.VKR_CONFIG
self.target_styles = StyleCheckSettings.VKR_CONFIG if (self.file_type['report_type'] == 'VKR') else StyleCheckSettings.LR_CONFIG
self.target_styles = list(map(lambda elem: {
"style": self.construct_style_from_description(elem["style"])
}, self.target_styles.values()))
Expand All @@ -29,7 +29,7 @@ def __init__(self, file_info):
level += 1

def late_init(self):
self.headers = self.file.make_chapters(self.file_type['report_type'])
self.headers = self.file.make_chapters()#self.file_type['report_type'])

@staticmethod
def construct_style_from_description(style_dict):
Expand Down Expand Up @@ -57,38 +57,35 @@ def check(self):
return answer(False, "В отчете недостаточно страниц. Нечего проверять.")
self.late_init()
result_str = ''
if self.file_type['report_type'] == 'VKR':
if not len(self.headers):
return answer(False, "Не найдено ни одного заголовка.<br><br>Проверьте корректность использования стилей.")
for header in self.headers:
marked_style = 0
for key in self.docx_styles.keys():
if not marked_style:
for style_name in self.docx_styles[key]:
if header["style"].find(style_name) >= 0:
if self.style_regex[key].match(header["text"]):
marked_style = 1
err = self.style_diff(header["styled_text"], self.target_styles[key]["style"])
err = list(map(lambda msg: f'Стиль "{header["style"]}": ' + msg, err))
result_str += ("<br>".join(err) + "<br>" if len(err) else "")
break
if not len(self.headers):
return answer(False, "Не найдено ни одного заголовка.<br><br>Проверьте корректность использования стилей.")
for header in self.headers:
marked_style = 0
for key in self.docx_styles.keys():
if not marked_style:
err = f"Заголовок \"{header['text']}\": "
err += f'Стиль "{header["style"]}" не соответстует ни одному из стилей заголовков.'
result_str += (str(err) + "<br>")
for style_name in self.docx_styles[key]:
if header["style"].find(style_name) >= 0:
if self.style_regex[key].match(header["text"]):
marked_style = 1
err = self.style_diff(header["styled_text"], self.target_styles[key]["style"])
err = list(map(lambda msg: f'Стиль "{header["style"]}": ' + msg, err))
result_str += ("<br>".join(err) + "<br>" if len(err) else "")
break
if not marked_style:
err = f"Заголовок \"{header['text']}\": "
err += f'Стиль "{header["style"]}" не соответствует ни одному из стилей заголовков.'
result_str += (str(err) + "<br>")

if not result_str:
return answer(True, "Форматирование заголовков соответствует требованиям.")
else:
result_string = f'Найдены ошибки в оформлении заголовков:<br>{result_str}<br>'
result_string += '''
Попробуйте сделать следующее:
<ul>
<li>Убедитесь в соответствии стиля заголовка требованиям к отчету по ВКР;</li>
<li>Убедитесь, что названия разделов и нумированные разделы оформлены по ГОСТу;</li>
<li>Убедитесь, что красная строка не сделана с помощью пробелов или табуляции.</li>
</ul>
'''
return answer(False, result_string)
if not result_str:
return answer(True, "Форматирование заголовков соответствует требованиям.")
else:
return answer(False, 'Во время обработки произошла критическая ошибка')
result_string = f'Найдены ошибки в оформлении заголовков:<br>{result_str}<br>'
result_string += '''
Попробуйте сделать следующее:
<ul>
<li>Убедитесь в соответствии стиля заголовка требованиям к отчету по ВКР;</li>
<li>Убедитесь, что названия разделов и нумированные разделы оформлены по ГОСТу;</li>
<li>Убедитесь, что красная строка не сделана с помощью пробелов или табуляции.</li>
</ul>
'''
return answer(False, result_string)
51 changes: 51 additions & 0 deletions app/main/checks/report_checks/check_chapters_3_level.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from ..base_check import BaseReportCriterion, answer

class ReportСhaptersLevel3ContentCheck(BaseReportCriterion):
label = "Проверка содержания на наличия объктов 3 уровня"
description = "В содержании не должно быть объектов третьего уровня"
id = 'report_3_level_in_content_check'

def __init__(self, file_info):
super().__init__(file_info)


def check(self):
try:
headers = self.file.make_chapters(self.file_type['report_type'])

if not headers:
return answer(False, "Не найдено ни одного заголовка.")

level_3_count = 0
bool_content_find = False
for header in headers:
if header["text"].upper() == "СОДЕРЖАНИЕ":
bool_content_find = True
level_3_count = self._count_level_3_headers(header["child"])
break

if not bool_content_find:
return answer(False, "Не найдено заголовка 'Содержание'")

if level_3_count > 0:
result_str = f"Найдено {level_3_count} заголовков 3 уровня и выше. "
result_str += "Содержание должно содержать только заголовки 1 и 2 уровня.<br>"
return answer(False, result_str)

return answer(True, "Все заголовки соответствуют требованиям (1-2 уровень)")

except Exception as e:
return answer(False, f"Ошибка при проверке: {str(e)}")

def _count_level_3_headers(self, content):
count = 0

for header in content:
if self._is_level_3_or_higher(header):
count += 1
count += self._count_level_3_headers(header["child"])

return count

def _is_level_3_or_higher(self, header):
return header["level"] >= 3
Loading