Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .dvc/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
/config.local
/tmp
/cache
Empty file added .dvc/config
Empty file.
3 changes: 3 additions & 0 deletions .dvcignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# Add patterns of files dvc should ignore, which could improve
# the performance. Learn more at
# https://dvc.org/doc/user-guide/dvcignore
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,9 @@ MANIFEST
mlruns/

# Data
data
data/*

# Cookiecutter
hack_digital_transformation/
uv.lock
.coverage
.coverage
28 changes: 16 additions & 12 deletions job_config.yaml
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
name: digital_hack_ml_job # Уникальное имя задания
desc: "Джоба для проведения экспериментов по созданию CV модели для определения местоположения по фотографии" # Описание
name: digital_hack_ml_job
desc: "Джоба для проведения экспериментов по созданию CV модели для определения местоположения по фотографии"

# Команда для запуска. ${VARIABLE} - шаблоны, заменяемые на значения из `inputs`
cmd: python src/engine/main.py #--input ${INPUT_FILE}
# Команда для запуска
cmd: python src/engine/main.py --csv-path csv_file_mob --images-dir images_dir_dzk/

# передать в DataSphere код и зависимости pip
# Используйте python: auto для автоматического определения версии Python
env:
python: auto

# Входные данные (файлы или каталоги)
#inputs:
# - input_data.txt: INPUT_FILE
# Входные данные (обновите алиасы)
inputs:
- data/processed_data/merged_data.csv: csv_file_mob
- data/raw_data/data/metadata/INC/united_image: images_dir_dzk
- src/models: models
- src/utils: utils

# Выходные данные (файлы или каталоги, которые вернутся с облака)
#outputs:
# - output_results.zip
# - logs.txt
# Выходные данные
outputs:
- optuna_study.pkl: optuna_study
- ocr_model_params.json: ocr_model_params
- test_results.json: test_results

cloud-instance-type: gt4.1
50 changes: 50 additions & 0 deletions notebooks/1_data_exploration/1_1_download_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,56 @@
" file_s3_dst='processed_data/merge_data.csv',\n",
" bucket_name='s3-dvc',)"
]
},
{
"cell_type": "markdown",
"id": "64d2d0d0",
"metadata": {},
"source": [
"# Перемещяем все фотки в единую папку "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1f56653",
"metadata": {},
"outputs": [],
"source": [
"os.mkdir(ROOT_DIR / 'data/raw_data/data/metadata/INC/united_image')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35417e5a",
"metadata": {},
"outputs": [],
"source": [
"move_and_remove_files(source_dir=ROOT_DIR / 'data/raw_data/data/metadata/INC/18-001_gin_building_echd_19.08.25', \n",
" destination_dir=ROOT_DIR / 'data/raw_data/data/metadata/INC/united_image',\n",
" remove_after_move=True,)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b123a07e",
"metadata": {},
"outputs": [],
"source": [
"move_and_remove_files(source_dir=ROOT_DIR / 'data/raw_data/data/metadata/INC/19-001_gin_garbage_echd_19.08.25', \n",
" destination_dir=ROOT_DIR / 'data/raw_data/data/metadata/INC/united_image',\n",
" remove_after_move=True,)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b193a00",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
320 changes: 276 additions & 44 deletions notebooks/1_data_exploration/1_2_prepare_data.ipynb

Large diffs are not rendered by default.

155 changes: 46 additions & 109 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,110 +1,47 @@
arrow==1.2.3
attrs==23.2.0
Automat==22.10.0
Babel==2.10.3
bcrypt==3.2.2
binaryornot==0.4.4
blinker==1.7.0
build==1.0.3
CacheControl==0.14.0
certifi==2023.11.17
chardet==5.2.0
cleo==2.1.0
click==8.1.6
cloud-init==25.1.4
Mako==1.3.10
MarkupSafe==3.0.2
PyYAML==6.0.2
Pygments==2.19.2
SQLAlchemy==2.0.43
alembic==1.16.5
charset-normalizer==3.4.3
cmaes==0.12.0
colorama==0.4.6
command-not-found==0.3
configobj==5.0.8
constantly==23.10.4
cookiecutter==2.6.0
crashtest==0.4.1
cryptography==41.0.7
dbus-python==1.3.2
distlib==0.3.8
distro==1.9.0
distro-info==1.7+build1
dulwich==0.21.6
fastimport==0.9.14
fastjsonschema==2.19.0
filelock==3.13.1
h11==0.14.0
httplib2==0.20.4
hyperlink==21.0.0
idna==3.6
importlib-metadata==4.12.0
incremental==22.10.0
installer==0.7.0
jaraco.classes==3.2.1
jeepney==0.8.0
Jinja2==3.1.2
jsonpatch==1.32
jsonpointer==2.0
jsonschema==4.10.3
keyring==24.3.1
launchpadlib==1.11.0
lazr.restfulclient==0.14.6
lazr.uri==1.0.6
lockfile==0.12.2
markdown-it-py==3.0.0
MarkupSafe==2.1.5
mdurl==0.1.2
more-itertools==10.2.0
msgpack==1.0.3
netifaces==0.11.0
oauthlib==3.2.2
packaging==24.0
pexpect==4.9.0
pkginfo==1.9.6
platformdirs==4.2.0
poetry==1.8.2
poetry-core==1.9.0
poetry-plugin-export==1.6.0
ptyprocess==0.7.0
pyasn1==0.4.8
pyasn1-modules==0.2.8
pycurl==7.45.3
Pygments==2.17.2
PyGObject==3.48.2
PyHamcrest==2.1.0
PyJWT==2.7.0
pylev==1.4.0
pyOpenSSL==23.2.0
pyparsing==3.1.1
pyproject_hooks==1.0.0
pyrsistent==0.20.0
pyserial==3.5
python-apt==2.7.7+ubuntu5
python-dateutil==2.8.2
python-magic==0.4.27
python-slugify==8.0.4
pytz==2024.1
PyYAML==6.0.1
requests==2.31.0
requests-toolbelt==1.0.0
rich==13.7.1
ruamel.yaml==0.17.21
ruamel.yaml.clib==0.2.8
s3cmd==2.4.0
SecretStorage==3.3.3
service-identity==24.1.0
setuptools==68.1.2
shellingham==1.5.4
six==1.16.0
systemd-python==235
toml==0.10.2
tomlkit==0.12.4
trove-classifiers==2024.1.31
Twisted==24.3.0
typing_extensions==4.10.0
ubuntu-pro-client==8001
unattended-upgrades==0.1
Unidecode==1.3.8
urllib3==2.0.7
uvicorn==0.27.1
uvloop==0.19.0
virtualenv==20.25.0+ds
wadllib==1.3.6
wheel==0.42.0
wsproto==1.2.0
zipp==1.0.0
zope.interface==6.1
colorlog==6.9.0
defusedxml==0.7.1
easyocr==1.7.2
googleapis-common-protos==1.70.0
greenlet==3.2.4
grpcio==1.75.0
imageio==2.37.0
importlib-metadata==6.11.0
joblib==1.5.2
mpmath==1.3.0
networkx==3.5
numpy==2.2.6
opencv-python==4.12.0.88
optuna==4.5.0
packaging==23.2
pandas==2.3.2
pillow==11.3.0
platformdirs==4.4.0
protobuf==6.32.1
psutil==7.1.0
pyarrow==21.0.0
pydot==4.0.1
python-bidi==0.6.6
python-dateutil==2.9.0.post0
pytz==2025.2
scikit-image==0.25.2
scikit-learn==1.7.2
scipy==1.16.2
six==1.17.0
sympy==1.14.0
threadpoolctl==3.6.0
tifffile==2025.9.20
torch==2.8.0
torchvision==0.23.0
tqdm==4.67.1
triton==3.4.0
typing-extensions==4.15.0
zipp==3.23.0
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@
"scikit-learn>=1.0.0",
"matplotlib>=3.5.0",
"seaborn>=0.11.0",
"dvc>=2.0.0",
],
extras_require={
"dev": [
Expand Down
3 changes: 3 additions & 0 deletions src/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from . import utils

__all__ = ['utils']
Loading
Loading