diff --git a/.dockerignore b/.dockerignore index 60db947..a42c0f2 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,7 +1,66 @@ +# ==================== Python artifacts **/__pycache__ **/*.pyc **/*.pyo **/*.pyd +.python-version +.mypy_cache +_autosummary + +# ==================== Build artifacts +build +dist +target +*.egg-info +*.DS_Store +*.scala-build +.bsp +.bloop +.metals +.cache +.lib +tmp +.tmp +large_scale_gnn.egg-info/ +gigantic_graph_learning.egg-info/ +gigl.egg-info/ +gigl_dataflow_setup.egg-info/ +miniconda/ + +# ==================== IDE / Editor folders +.idea +.vscode +.history + +# ==================== Project tools & devcontainers +.devcontainer +tools/ +project/boot/ +project/plugins/project/ +project/target/ +**/project/**/metals.sbt +src_managed/ +lib_managed/ +src/test/assets/output/* +containers/ + +# ==================== Test / temp / logs +.test_assets/ +logs/ +**/src/test/assets/**/output/* + +# ==================== Git & VCS **/.git -**/.mypy_cache -examples/MAG240M/downloads + +# ==================== Fossa files +fossa +fossa.bundle +fossa*.zip + +# ==================== Miscellaneous folders +proto/ +examples/MAG240M/downloads/ +scripts/ +graphlearn_torch/ +graphlearn_torch.egg-info/ +do_not_open_source diff --git a/.github/cloud_builder/run_command_on_active_checkout.yaml b/.github/cloud_builder/run_command_on_active_checkout.yaml index b94d0f2..d977222 100644 --- a/.github/cloud_builder/run_command_on_active_checkout.yaml +++ b/.github/cloud_builder/run_command_on_active_checkout.yaml @@ -3,7 +3,7 @@ substitutions: options: logging: CLOUD_LOGGING_ONLY steps: - - name: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-builder:b47dfe98080036a22a50bf9bce8c75443b60a482.36.1 + - name: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-builder:e7390e5bfdb35b8d9492a77b20dac7eba025019d.37.1 entrypoint: /bin/bash args: - -c @@ -24,7 +24,7 @@ steps: source ~/.profile docker buildx create --driver=docker-container --use docker run --rm --privileged multiarch/qemu-user-static --reset -p yes - gcloud auth configure-docker us-central1-docker.pkg.dev --quiet + gcloud auth configure-docker us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-builder:e7390e5bfdb35b8d9492a77b20dac7eba025019d.37.1 # TODO: (svij) Enable install_scala_deps.sh to inside Docker image build bash ./requirements/install_scala_deps.sh diff --git a/.github/cloud_builder/run_command_on_pr_cloud_build.yaml b/.github/cloud_builder/run_command_on_pr_cloud_build.yaml deleted file mode 100644 index 8a5c6c4..0000000 --- a/.github/cloud_builder/run_command_on_pr_cloud_build.yaml +++ /dev/null @@ -1,34 +0,0 @@ -serviceAccount: projects/external-snap-ci-github-gigl/serviceAccounts/untrusted-external-github-gigl@external-snap-ci-github-gigl.iam.gserviceaccount.com -substitutions: - _CMD: "" - -options: - logging: CLOUD_LOGGING_ONLY -steps: - - name: 'us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/builder:svij-test-5232 - entrypoint: /bin/bash - args: - - -c - - | - set -e - set -x - - if [[ -z "${_CMD}" ]]; then - echo "Error: _CMD is not set." - exit 1 - fi - - echo "Setting up environment..." - # gcloud runner will run as a non-root user, but all paths/profiles, etc are set up for root - echo "source /root/.bashrc" >> ~/.bashrc - echo "source /root/.profile" >> ~/.profile - - source ~/.bashrc - sudo bash .github/scripts/setup_cloud_build_vm.sh - - bash ./requirements/install_py_deps.sh --dev - bash ./requirements/install_scala_deps.sh - pip install -e ./python/ - - echo "Finished setting up environment." - $_CMD diff --git a/.gitignore b/.gitignore index ad74848..57dca2e 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,9 @@ python/gigl/deps/ _autosummary/ graphlearn_torch/ graphlearn_torch.egg-info/ +.git/ + +do_not_open_source # Ignore downloaded fossa files. fossa diff --git a/.mdformat.toml b/.mdformat.toml new file mode 100644 index 0000000..717eb51 --- /dev/null +++ b/.mdformat.toml @@ -0,0 +1 @@ +wrap = 120 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ead68ff..cafd363 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,3 +9,9 @@ repos: language: system pass_filenames: false stages: [manual] + + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.0.1 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f37dcc..6e0f2ae 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,7 +17,7 @@ Latest Release: 0.0.6 ### Changed - Config Validation now also checks GiglResourceConfig and PreprocessedMetadata (#123)(#125) -- Simplified Dockerfiles to be 40-50\% smaller (#198) +- Simplified Dockerfiles to be 40-50% smaller (#198) ### Deprecated @@ -25,7 +25,7 @@ Latest Release: 0.0.6 ### Fixed -- Corrected invalid transductive node classification test (#128) +- Corrected invalid transductive node classification test (#128) - Corrected some inconsistencies in component cleanup logic (#196) ## [0.0.6] - 2024-05-16 @@ -62,6 +62,7 @@ Latest Release: 0.0.6 - Deprecate cora_assets.py and toy_graph.py in favour of `PassthroughPreprocessorConfigForMockedAssets` (#25) ### Fixed + - Make feature order determinisitc in FeatureEmbeddingLayer (#23) ## [0.0.5] - 2024 @@ -73,7 +74,6 @@ Latest Release: 0.0.6 - Cost Tooling Script - Torch, TFT, TF, PyG upgrades + mac arm64 support (#807) - ### Fixes -- None \ No newline at end of file +- None diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..87e3c44 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,95 @@ +# Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders of the GiGL community pledge to make participation in our community a +harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex +characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, +personal appearance, race, caste, color, religion, or sexual identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our community include: + +- Demonstrating empathy and kindness toward other people +- Being respectful of differing opinions, viewpoints, and experiences +- Giving and gracefully accepting constructive feedback +- Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience +- Focusing on what is best not just for us as individuals, but for the overall community + +Examples of unacceptable behavior include: + +- The use of sexualized language or imagery, and sexual attention or advances of any kind Trolling, insulting or + derogatory comments, and personal or political attacks +- Public or private harassment +- Publishing others’ private information, such as a physical or email address, without their explicit permission +- Other conduct which could reasonably be considered inappropriate in a professional setting + +## Our Enforcement Responsibilities + +GiGL community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take +appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +GiGL community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, +issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for +moderation decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing +the community in public spaces. Examples of representing our community include using an official email address, posting +via an official social media account, or acting as an appointed representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible +for enforcement at `opensource [at] snap [dot] com`. All complaints will be reviewed and investigated promptly and +fairly. + +All community leaders are obligated to respect the privacy and security of the reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem +in violation of this Code of Conduct: + +### 1. Correction + +Community Impact: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. + +Consequence: A private, written warning from community leaders, providing clarity around the nature of the violation and +an explanation of why the behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +Community Impact: A violation through a single incident or series of actions. + +Consequence: A warning with consequences for continued behavior. No interaction with the people involved, including +unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding +interactions in community spaces as well as external channels like social media. Violating these terms may lead to a +temporary or permanent ban. + +### 3. Temporary Ban + +Community Impact: A serious violation of community standards, including sustained inappropriate behavior. + +Consequence: A temporary ban from any sort of interaction or public communication with the community for a specified +period of time. No public or private interaction with the people involved, including unsolicited interaction with those +enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +Community Impact: Demonstrating a pattern of violation of community standards, including sustained inappropriate +behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. + +Consequence: A permanent ban from any sort of public interaction within the community. + +## Attribution + +This Code of Conduct is adapted from the Contributor Covenant, version 2.1, available at +https://www.contributor-covenant.org/version/2/1/code_of_conduct.html. + +For answers to common questions about this code of conduct, see the FAQ at https://www.contributor-covenant.org/faq. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 4a2844d..0143520 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,7 +1,126 @@ -# What is a good pull request? +# Contributing to GiGL -Your PRs will be rejected by reviewers if they dont follow the best practices. +Thank you for your interest in GiGL! We welcome community contributions and appreciate your time and effort in helping +improve the project. Before getting started, please take a moment to review these guidelines. +## Code of Conduct -Please read before drafting code changes / pull requests for this repo: -- https://google.github.io/eng-practices/review/ +We want this project to be a welcoming space for everyone. By contributing, you agree to follow our +[Code of Conduct](CODE_OF_CONDUCT.md) and help keep the community respectful and inclusive. + +## Reporting Issues + +**If security related please see [SECURITY.md](SECURITY.md) for guidance.** + +If you find a bug or have a feature request, please open an issue and provide as much detail as possible: + +- Search existing issues to avoid duplicates. +- Clearly describe the issue with steps to reproduce (If applicable). + - Include relevent specs used both task and resource. + - Provide relevant logs or screenshots if applicable. +- Expected and actual behahavior. +- List Suggested solutions (if any). + +## Legal Terms + +By submitting a contribution, you represent and warrant that: + +- It is your original work, or you have sufficient rights to submit it. +- You grant the GiGL maintainers and users the right to use, modify, and distribute it under the MIT license (see + [LICENSE](LICENSE) file); and +- To the extent your contribution is covered by patents, you grant a perpetual, worldwide, non-exclusive, royalty-free, + irrevocable license to the GiGL maintainers and users to make, use, sell, offer for sale, import, and otherwise + transfer your contribution as part of the project. + +We do not require a Contributor License Agreement (CLA). However, by contributing, you agree to license your submission +under terms compatible with the MIT License and to grant the patent rights described above. If your contribution +includes third-party code, you are responsible for ensuring it is MIT-compatible and properly attributed. + +Moral Rights Disclaimer: Where permitted by law, you waive any moral rights (e.g., the right to object to modifications) +in your contribution. If such rights cannot be waived, you agree not to assert them in a way that interferes with the +project’s use of your contribution. + +## Open Development + +We follow an open development process where both core team members and the community contribute through the same review +process. All pull requests, regardless of the author, go through the same review and approval workflow to ensure +consistency and quality. + +## How to Contribute + +### Proposing a Non-Trivial Change + +- Before starting major work, open an issue to discuss your proposal with the maintainers. +- Clearly outline the problem and your proposed solution. +- Gather feedback and refine your approach before implementation. +- This ensures alignment with project goals and avoids unnecessary work. + +### Submitting Code + +1. Fork the repository and create a feature branch. +1. Ensure all unit tests pass before submitting. +1. Add relevant unit/integration/performance tests. +1. Submit a pull request (PR) with a clear description of your changes. +1. Address review feedback promptly. +1. All changes should be submitted to the `main` branch via a pull request. + +### Semantic Versioning & Changelog + +We adhere to [Semantic Versioning](https://semver.org/) (MAJOR.MINOR.PATCH) to ensure clear version tracking: + +- **MAJOR** versions introduce breaking changes. +- **MINOR** versions add functionality in a backward-compatible manner. +- **PATCH** versions fix bugs and small issues. + +All significant changes are recorded in the [CHANGELOG](CHANGELOG.md), where contributors should document major updates, +new features, and fixes. + +TODO: (svij) More instructions to come on how release process will be managed. + +### Commit Guidelines + +- We squash commit PRs. Ensure your PRs follow the [pull_request_template](pull_request_template.md). + +### PR Checklist: + +#### Code Correctness + +- Is the code logically correct? +- Are there any edge cases that we have not covered? +- Has the author executed on a reasonable testing plan and/or has the reviewer tested the changes themselves? +- Does the PR meet its objective of satisfying the task requirements? i.e. will it scale to necessary requirements + +#### Code Comprehension/Consistency + +- Will the change be easily understandable to a broader audience? +- Will the solution make sense as the code-base evolves? +- Does the PR follow agreed upon/ industry best practices, and follow patterns already established in the codebase? + +### Author’s Responsibility: + +#### Most important: Create *[Small PRs](https://google.github.io/eng-practices/review/developer/small-cls.html)* + +Explicitly tag two people on your PR (See [OWNERS](OWNERS) for list of reviewers). Generally the first review to your PR +should be done within 1-2 business days depending on scope from when you ask for review; if this isn't done, it is your +responsibility to follow up for a response or to find a different reviewer if needed using our +[Communication Channels](#questions). In cases when the PR is not “small” +([see what it means for PR to be small](https://google.github.io/eng-practices/review/developer/small-cls.html#what_is_small), +1-2 business days guidance is not reasonable, and reviewers may push back and ask you to break the PR down into “small +PRs.” + +More generally, + +- In rare cases, you may need to add more reviewers in cases of driving consensus or leveraging certain “domain + expertise”. +- In some “contextually” very small PRs, you may only require one reviewer, for example: + - Adding a new small unit test. + - Formatting, variable or directory name change; this could span many files and lines; contextually, it is still a + “very small” change. + - Editing the OWNERS file + - Fixing a spelling mistake or adding a few lines to a README + +Happy coding! + +## Questions/Comments/Ideas? + +If you need help, or need to get in touch with the primary maintainers of the project, please open a discussion/issue. diff --git a/LICENSE b/LICENSE index 466d4ee..285a8af 100644 --- a/LICENSE +++ b/LICENSE @@ -1,5 +1,7 @@ Copyright © 2025 Snap Inc. +GiGL is made available under the MIT License. + MIT License Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: @@ -8,34 +10,32 @@ The above copyright notice and this permission notice shall be included in all c THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +============================================== Third-Party Software -This software includes or interacts with third-party software. A detailed report of the third-party software used, including their respective licenses, can be found at the following link. +GiGL includes or interacts with third-party software. A detailed report of the third-party software used, including their respective licenses, can be found at the following link. https://portal.fossa.com/p/snap/release/2724/5782 For third-party software licensed under terms requiring source code distribution, copies of the source code are available upon request by emailing sourcecoderequest@snap.com. +============================================== Third-Party Datasets -This software may utilize third-party datasets that are licensed under different terms. Users are responsible for complying with the terms of those dataset licenses. Below is a list of the datasets used: - +GiGL utilizes third-party datasets that are licensed under different terms. Users are responsible for complying with the terms of those dataset licenses. Below is a list of the datasets used: Cora Dataset Copyright © Andrew McCallum License: Creative Commons Attribution 4.0 International (CC BY 4.0) License. Project Link: https://people.cs.umass.edu/~mccallum/data.html - DBLP Dataset Collected in the paper "MAGNN: Metapath Aggregated Graph Neural Network for Heterogeneous Graph Embedding" License: CC0 1.0 Public Domain Dedication License Project Link: https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.datasets.DBLP.html - MAG240M Dataset Wang, K., Shen, Z., Huang, C., Wu, C. H., Dong, Y., & Kanakia, A. (2020). Microsoft Academic Graph: When experts are not enough. Quantitative Science Studies, 1(1), 396-413. License: Open Data Commons Attribution License (ODC-BY) Project Link: https://ogb.stanford.edu/docs/lsc/mag240m/ - - diff --git a/Makefile b/Makefile index 2e8c208..968a2df 100644 --- a/Makefile +++ b/Makefile @@ -8,18 +8,29 @@ DATE:=$(shell /bin/date "+%Y%m%d-%H%M") # GIT HASH, or empty string if not in a git repo. GIT_HASH?=$(shell git rev-parse HEAD 2>/dev/null || "") -PROJECT:=external-snap-ci-github-gigl -DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME:=us-central1-docker.pkg.dev/${PROJECT}/gigl/src_cpu_dataflow -DOCKER_IMAGE_MAIN_CUDA_NAME:=us-central1-docker.pkg.dev/${PROJECT}/gigl/src_cuda -DOCKER_IMAGE_MAIN_CPU_NAME:=us-central1-docker.pkg.dev/${PROJECT}/gigl/src_cpu +# You can override GIGL_PROJECT by setting it in your environment i.e. +# adding `export GIGL_PROJECT=your_project` to your shell config (~/.bashrc, ~/.zshrc, etc.) +GIGL_PROJECT?=external-snap-ci-github-gigl +DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME:=us-central1-docker.pkg.dev/${GIGL_PROJECT}/gigl-base-images/src-cpu-dataflow +DOCKER_IMAGE_MAIN_CUDA_NAME:=us-central1-docker.pkg.dev/${GIGL_PROJECT}/gigl-base-images/src-cuda +DOCKER_IMAGE_MAIN_CPU_NAME:=us-central1-docker.pkg.dev/${GIGL_PROJECT}/gigl-base-images/src-cpu DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG:=${DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME}:${DATE} DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG:=${DOCKER_IMAGE_MAIN_CUDA_NAME}:${DATE} DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG:=${DOCKER_IMAGE_MAIN_CPU_NAME}:${DATE} -PYTHON_DIRS:=examples python shared scripts +PYTHON_DIRS:=examples python shared scripts PY_TEST_FILES?="*_test.py" +GIT_BRANCH:=$(shell git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "") + +# If we're in a git repo, then find only the ".md" files in our repo to format, else we format everything ".". +# We do this because some of our dependencies (Spark) include md files, +# but since we don't push those dependenices (or their documentation) to git, +# then when we *check* the format of those files, we will fail. +# Thus, we only want to format the Markdown files that we explicitly include in our repo. +MD_FILES:=$(shell if [ ! ${GIT_BRANCH} ]; then echo "."; else git ls-tree --name-only -r ${GIT_BRANCH} . | grep ".md"; fi;) + get_ver_hash: # Fetches the git commit hash and stores it in `$GIT_COMMIT` git diff --quiet || { echo Branch is dirty, please commit changes and ensure branch is clean; exit 1; } @@ -46,7 +57,7 @@ rebuild_dev_environment: make install_dev_deps check_if_valid_env: - @command -v docker >/dev/null 2>&1 || { echo >&2 "docker is required but it's not installed. Aborting."; exit 1; } + #@command -v docker >/dev/null 2>&1 || { echo >&2 "docker is required but it's not installed. Aborting."; exit 1; } @command -v gsutil >/dev/null 2>&1 || { echo >&2 "gsutil is required but it's not installed. Aborting."; exit 1; } @python --version | grep -q "Python ${PYTHON_VERSION}" || (echo "Python version is not 3.9" && exit 1) @@ -113,6 +124,7 @@ generate_dev_linux_cuda_hashed_requirements: precondition_tests: python shared/tests/dep_vars_check.py + assert_yaml_configs_parse: python scripts/assert_yaml_configs_parse.py -d . @@ -140,8 +152,7 @@ unit_test_scala: clean_build_files_scala # Eventually, we should look into splitting these up. # We run `make check_format` separately instead of as a dependent make rule so that it always runs after the actual testing. # We don't want to fail the tests due to non-conformant formatting during development. -unit_test: precondition_tests unit_test_py unit_test_scala assert_yaml_configs_parse - make check_format +unit_test: precondition_tests unit_test_py unit_test_scala check_format_py: autoflake --check --config python/pyproject.toml ${PYTHON_DIRS} @@ -152,9 +163,14 @@ check_format_scala: ( cd scala; sbt "scalafmtCheckAll; scalafixAll --check"; ) ( cd scala_spark35; sbt "scalafmtCheckAll; scalafixAll --check"; ) -check_format: check_format_py check_format_scala +check_format_md: + @echo "Checking markdown files..." + mdformat --check ${MD_FILES} + +check_format: check_format_py check_format_scala check_format_md + + - # Set PY_TEST_FILES= to test a specifc file. # Ex. `make integration_test PY_TEST_FILES="dataflow_test.py"` # By default, runs all tests under python/testing/integration. @@ -176,13 +192,18 @@ format_py: isort --settings-path=python/pyproject.toml ${PYTHON_DIRS} black --config=python/pyproject.toml ${PYTHON_DIRS} -format_scala: +format_scala: # We run "clean" before the formatting because otherwise some "scalafix.sbt.ScalafixFailed: NoFilesError" may get thrown after switching branches... # TODO(kmonte): Once open sourced, follow up with scalafix people on this. ( cd scala; sbt clean scalafixAll scalafmtAll ) ( cd scala_spark35; sbt clean scalafixAll scalafmtAll ) -format: format_py format_scala +format_md: + @echo "Formatting markdown files..." + mdformat ${MD_FILES} + +format: format_py format_scala format_md + type_check: mypy ${PYTHON_DIRS} --check-untyped-defs @@ -222,8 +243,7 @@ push_new_docker_images: push_cuda_docker_image push_cpu_docker_image push_datafl # See usage w/ run_cora_nalp_e2e_kfp_test, run_cora_snc_e2e_kfp_test, run_cora_udl_e2e_kfp_test # and run_all_e2e_tests _run_e2e_kfp_test: compile_jars push_new_docker_images - $(eval BRANCH:=$(shell git rev-parse --abbrev-ref HEAD)) - $(eval TRIMMED_BRANCH:=$(shell echo "${BRANCH}" | tr '/' '_' | cut -c 1-20 | tr '[:upper:]' '[:lower:]')) + $(eval TRIMMED_BRANCH:=$(shell echo "${GIT_BRANCH}" | tr '/' '_' | tr '-' '_' | cut -c 1-20 | tr '[:upper:]' '[:lower:]')) $(eval TRIMMED_TIME:=$(shell date +%s | tail -c 6)) @should_wait_for_job_to_finish=false @( \ @@ -310,8 +330,8 @@ run_cora_snc_e2e_kfp_test: resource_config_uris_str:="deployment/configs/e2e_cic run_cora_snc_e2e_kfp_test: should_compile_then_run_str:="false" run_cora_snc_e2e_kfp_test: _run_e2e_kfp_test -# Note UDL dataset produces a transient issue due to UDL Split Strategy -# where in some cases the root node doesn't properly get added back to +# Note UDL dataset produces a transient issue due to UDL Split Strategy +# where in some cases the root node doesn't properly get added back to # the returned subgraph. Meaning, trainer will fail. run_cora_udl_e2e_kfp_test: job_name_prefixes_str:="cora_udl_test_on" run_cora_udl_e2e_kfp_test: task_config_uris_str:="gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml" @@ -418,7 +438,7 @@ clean_build_files: clean_build_files_py clean_build_files_scala # Call to generate new proto definitions if any of the .proto files have been changed. # We intentionally rebuild *all* protos with one commmand as they should all be in sync. # Run `make install_dev_deps` to setup the correct protoc versions. -compile_protos: +compile_protos: tools/python_protoc/bin/protoc \ --proto_path=proto \ --python_out=./python \ @@ -450,3 +470,9 @@ stop_toaster: # Deletes everything associated with all stopped containers including dangling resources docker system prune -a --volumes docker buildx prune + +release_gigl: + @echo "This needs to be implemented" + +publish_docs: + @echo "This needs to be implemented" diff --git a/README.md b/README.md index 8031473..0d0607d 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,5 @@ +
GiGL Logo

@@ -6,36 +7,39 @@

-# This REPO is a bit dusty while we prepare for the OSS launch. We will have it in working order coming soon. - GiGL is an open-source library for training and inference of Graph Neural Networks at very large (billion) scale. - See 📖 [Documentation](docs/) for more details ## Key Features 🌟 -- 🧠 **Versatile GNN Applications**: Supports easy customization in using GNNs in supervised and unsupervised ML applications like node classification and link prediction. - -- 🚀 **Designed for Scalability**: The architecture is built with horizontal scaling in mind, ensuring cost-effective performance throughout the process of data preprocessing and transformation, model training, and inference. +- 🧠 **Versatile GNN Applications**: Supports easy customization in using GNNs in supervised and unsupervised ML + applications like node classification and link prediction. -- 🎛️ **Easy Orchestration**: Simplified end-to-end orchestration, making it easy for developers to implement, scale, and manage their GNN projects. +- 🚀 **Designed for Scalability**: The architecture is built with horizontal scaling in mind, ensuring cost-effective + performance throughout the process of data preprocessing and transformation, model training, and inference. ------------------------------------------------------------------------- +- 🎛️ **Easy Orchestration**: Simplified end-to-end orchestration, making it easy for developers to implement, scale, and + manage their GNN projects. +______________________________________________________________________ ## GiGL Components ⚡️ -GiGL contains six components, each designed to facilitate the platforms end-to-end graph machine learning (ML) tasks. The components are as follows: +GiGL contains six components, each designed to facilitate the platforms end-to-end graph machine learning (ML) tasks. +The components are as follows: -| Component | Source Code | Documentation | +| Component | Source Code | Documentation | |-------------------|---------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------| -| Config Populator | [here](python/gigl/src/config_populator/config_populator.py) | [here](docs/sphinx/source/components/config_populator.md) | -| Data Preprocessor | [here](python/gigl/src/data_preprocessor/data_preprocessor.py) | [here](docs/sphinx/source/components/data_preprocessor.md) | -| Subgraph Sampler | [here](scala/subgraph_sampler/src/main/scala/Main.scala) | [here](docs/sphinx/source/components/subgraph_sampler.md) | -| Split Generator | [here](scala/split_generator/src/main/scala/Main.scala) | [here](docs/sphinx/source/components/split_generator.md) | -| Trainer | [here](python/gigl/src/training/trainer.py) | [here](docs/sphinx/source/components/trainer.md) | -| Inferencer | [here](python/gigl/src/inference/gnn_inferencer.py) | [here](docs/sphinx/source/components/inferencer.md) | +| Config Populator | [here](python/gigl/src/config_populator/config_populator.py) | +[here](docs/sphinx/source/components/config_populator.md) | | Data Preprocessor | +[here](python/gigl/src/data_preprocessor/data_preprocessor.py) | +[here](docs/sphinx/source/components/data_preprocessor.md) | | Subgraph Sampler | +[here](scala/subgraph_sampler/src/main/scala/Main.scala) | [here](docs/sphinx/source/components/subgraph_sampler.md) | | +Split Generator | [here](scala/split_generator/src/main/scala/Main.scala) | +[here](docs/sphinx/source/components/split_generator.md) | | Trainer | [here](python/gigl/src/training/trainer.py) | +[here](docs/sphinx/source/components/trainer.md) | | Inferencer | [here](python/gigl/src/inference/gnn_inferencer.py) | +[here](docs/sphinx/source/components/inferencer.md) | The figure below illustrates at a high level how all the components work together for and end-to-end GiGL pipeline. @@ -43,7 +47,8 @@ The figure below illustrates at a high level how all the components work togethe ## Installation ⚙️ -There are various ways to use GiGL. The recommended solution is to set up a conda environment and use some handy commands: +There are various ways to use GiGL. The recommended solution is to set up a conda environment and use some handy +commands: From the root directory: @@ -52,7 +57,7 @@ make initialize_environment conda activate gnn ``` -This creates a Python 3.9 environment with some basic utilities. Next, to install all user dependencies: +This creates a Python 3.9 environment with some basic utilities. Next, to install all user dependencies: ```bash make install_deps @@ -80,24 +85,28 @@ todo ## Configuration 📄 -Before getting started with running components in GiGL, it’s important to set up your config files. These are necessary files required for each component to operate. The two required files are: +Before getting started with running components in GiGL, it’s important to set up your config files. These are necessary +files required for each component to operate. The two required files are: -- **Resource Config**: Details the resource allocation and environmental settings across all GiGL components. This encompasses shared resources for all components, as well as component-specific settings. +- **Resource Config**: Details the resource allocation and environmental settings across all GiGL components. This + encompasses shared resources for all components, as well as component-specific settings. -- **Task Config**: Specifies task-related configurations, guiding the behavior of components according to the needs of your machine learning task. +- **Task Config**: Specifies task-related configurations, guiding the behavior of components according to the needs of + your machine learning task. To configure these files and customize your GiGL setup, follow our step-by-step guides: -- [Resource Config Guide]("todo/") -- [Task Config Guide]("todo/") +- [Resource Config Guide](%22todo/%22) +- [Task Config Guide](%22todo/%22) ## Usage 🚀 GiGL offers 3 primiary methods of usage to run the components for your graph machine learning tasks. -### 1. Importable `gigl` +### 1. Importable GiGL -To easily get started or incorporate gigl into your existing workflows, you can simply import `gigl` and call the `.run()` method on its components. +To easily get started or incorporate GiGL into your existing workflows, you can simply `import gigl` and call the +`.run()` method on its components.
Example @@ -108,12 +117,13 @@ from gigl.src.training.trainer import Trainer trainer = Trainer() trainer.run(task_config_uri, resource_config_uri, job_name) ``` -
+ ### 2. Command-Line Execution -Each GiGL component can be executed as a standalone module from the command line. This method is useful for batch processing or when integrating into shell scripts. +Each GiGL component can be executed as a standalone module from the command line. This method is useful for batch +processing or when integrating into shell scripts.
Example @@ -130,18 +140,22 @@ python -m \ ### 3. Kubeflow Pipeline Orchestration -GiGL also supports pipeline orchestration using Kubeflow. This allows you to easily kick off an end-to-end run with little to no code. See [Kubeflow Orchestration]("todo") for more information +GiGL also supports pipeline orchestration using Kubeflow. This allows you to easily kick off an end-to-end run with +little to no code. See [Kubeflow Orchestration](%22todo%22) for more information \ -The best way to get more familiar with GiGL is to go through the various [examples]("todo") or for specific details see our [user guide]("todo"). - +The best way to get more familiar with GiGL is to go through the various [examples](%22todo%22) or for specific details +see our [user guide](%22todo%22). ## Tests 🔧 -Testing in GiGL is designed to ensure reliability and robustness across different components of the library. We support three types of tests: unit tests, local integration tests, and cloud integration end-to-end tests. +Testing in GiGL is designed to ensure reliability and robustness across different components of the library. We support +three types of tests: unit tests, local integration tests, and cloud integration end-to-end tests. ### Unit Tests -GiGL's unit tests focus on validating the functionality of individual components and high-level utilities. They also check for proper formatting, typing, and linting standards. + +GiGL's unit tests focus on validating the functionality of individual components and high-level utilities. They also +check for proper formatting, typing, and linting standards.
More Details @@ -164,14 +178,15 @@ make unit_test_scala
- ### Local Integration Test -GiGL's local integration tests simulate the pipeline behavior of GiGL components. These tests are crucial for verifying that components function correctly in sequence and that outputs from one component are correctly handled by the next. + +GiGL's local integration tests simulate the pipeline behavior of GiGL components. These tests are crucial for verifying +that components function correctly in sequence and that outputs from one component are correctly handled by the next.
More Details -- Utilizes mocked/synthetic data publicly hosted in GCS (see: [Public Assets]("todo")) +- Utilizes mocked/synthetic data publicly hosted in GCS (see: [Public Assets](%22todo%22)) - Require access and run on cloud services such as BigQuery, Dataflow etc. - Required to pass before merging PR (Pre-merge check) @@ -185,43 +200,51 @@ make integration_test resource_config_uri="gs://your-project-bucket/resource_con ### Cloud Integration Test (End-to-End) -Cloud integration tests run a full end-to-end GiGL pipeline within GCP, also leveraging cloud services such as Dataflow, Dataproc, and Vertex AI. +Cloud integration tests run a full end-to-end GiGL pipeline within GCP, also leveraging cloud services such as Dataflow, +Dataproc, and Vertex AI.
More Details -- Utilizes mocked/synthetic data publicly hosted in GCS (see: [Public Assets]("todo")) +- Utilizes mocked/synthetic data publicly hosted in GCS (see: [Public Assets](%22todo%22)) - Require access and run on cloud services such as BigQuery, Dataflow etc. -- Required to pass before merging PR (Pre-merge check). Access to the orchestration, logs, etc., is restricted to authorized internal engineers to maintain security. Failures will be reported back to contributor as needed. +- Required to pass before merging PR (Pre-merge check). Access to the orchestration, logs, etc., is restricted to + authorized internal engineers to maintain security. Failures will be reported back to contributor as needed. -To test cloud integration test functionality, you can replicate by running and end-to-end pipeline by following along one of our Cora examples (See: [Examples]("todo")) +To test cloud integration test functionality, you can replicate by running and end-to-end pipeline by following along +one of our Cora examples (See: [Examples](%22todo%22))

## Contribution 🔥 -Your contributions are always welcome and appreciated. The following are the things you can do to contribute to this project. - 1. **Report a bug**
- If you think you have encountered a bug please feel free to report it [here]("todo_point_to_issues") and someone from the team will take a look. +Your contributions are always welcome and appreciated. The following are the things you can do to contribute to this +project. - 2. **Request a feature**
-Feature requests are always welcome! You can request a feature by adding it [here]("todo_point_to_feature_req") +1. **Report a bug**
If you think you have encountered a bug please feel free to report it + [here](%22todo_point_to_issues%22) and someone from the team will take a look. - 3. **Create a pull request**
-Pull request are always greatly appreciated. You can get started by picking up any open issues from [here]("todo_point_to_issues") and making a pull request. +1. **Request a feature**
Feature requests are always welcome! You can request a feature by adding it + [here](%22todo_point_to_feature_req%22) - > If you are new to open-source, make sure to check read more about it [here](https://www.digitalocean.com/community/tutorial_series/an-introduction-to-open-source) and learn more about creating a pull request [here](https://www.digitalocean.com/community/tutorials/how-to-create-a-pull-request-on-github). +1. **Create a pull request**
Pull request are always greatly appreciated. You can get started by picking up any + open issues from [here](%22todo_point_to_issues%22) and making a pull request. +> If you are new to open-source, make sure to check read more about it +> [here](https://www.digitalocean.com/community/tutorial_series/an-introduction-to-open-source) and learn more about +> creating a pull request +> [here](https://www.digitalocean.com/community/tutorials/how-to-create-a-pull-request-on-github). -For more information, see our [Contributing Guide]("todo") - +For more information, see our [Contributing Guide](%22todo%22) ## Additional Resources ❗ -You may still have unanswered questions or may be facing issues. If so please see our [FAQ]("todo") or our [User Guide]("todo") for further guidence. +You may still have unanswered questions or may be facing issues. If so please see our [FAQ](%22todo%22) or our +[User Guide](%22todo%22) for further guidance. ## Citation + If you use GiGL in publications, we would appreciate citations to [our paper](https://arxiv.org/pdf/2502.15054): ```bibtex @@ -234,4 +257,5 @@ If you use GiGL in publications, we would appreciate citations to [our paper](ht ``` ## License 🔒 + [MIT License](LICENSE) diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..c8b1489 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,4 @@ +# Security Protocol + +If you discover a security vulnerability, please do not report it publicly. Instead, email us at: +`opensource [at] snap [dot] com`. We’ll do our best to respond quickly and fix the issue. diff --git a/containers/Dockerfile.builder b/containers/Dockerfile.builder new file mode 100644 index 0000000..ecf8870 --- /dev/null +++ b/containers/Dockerfile.builder @@ -0,0 +1,58 @@ +# syntax=docker/dockerfile:1 + +# This dockerfile is contains all Dev dependencies, and is used by gcloud +# builders for running tests, et al. + +FROM continuumio/miniconda3:4.12.0 + +SHELL ["/bin/bash", "-c"] + +# Non-interactive install +ENV DEBIAN_FRONTEND=noninteractive + +# Install base dependencies +RUN apt-get update && apt-get install && apt-get install -y \ + curl \ + tar \ + unzip \ + bash \ + openjdk-11-jdk \ + git \ + cmake \ + sudo \ + build-essential \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + + +RUN curl -fsSL https://get.docker.com -o get-docker.sh && \ + sh get-docker.sh && \ + rm get-docker.sh + +# Install Google Cloud CLI +RUN mkdir -p /tools && \ + curl -o /tools/google-cloud-cli-linux-x86_64.tar.gz https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz && \ + tar -xzf /tools/google-cloud-cli-linux-x86_64.tar.gz -C /tools/ && \ + bash /tools/google-cloud-sdk/install.sh --quiet --path-update=true --usage-reporting=false && \ + rm -rf /tools/google-cloud-cli-linux-x86_64.tar.gz + +RUN echo 'export PATH="/tools/google-cloud-sdk/bin:/usr/lib/jvm/java-1.11.0-openjdk-amd64/bin:$PATH"' >> /root/.bashrc +RUN echo 'export JAVA_HOME="/usr/lib/jvm/java-1.11.0-openjdk-amd64"' >> /root/.bashrc + +# Create the environment: +RUN conda create -y --name gigl python=3.9 pip + +# Update path so any call for python executables in the built image defaults to using the gnn conda environment +ENV PATH=/opt/conda/envs/gigl/bin:$PATH + +RUN conda init bash +RUN echo "conda activate gigl" >> ~/.bashrc + +COPY requirements tools/gigl/requirements +RUN cat ~/.bashrc +RUN source ~/.bashrc && pip install --upgrade pip +RUN source ~/.bashrc && cd tools/gigl && bash ./requirements/install_py_deps.sh --no-pip-cache --dev +# TODO: (svij) Enable install_scala_deps.sh to inside Docker image build +# RUN source ~/.bashrc && cd tools/gigl && bash ./requirements/install_scala_deps.sh + +CMD [ "/bin/bash" ] diff --git a/containers/Dockerfile.cpu.base b/containers/Dockerfile.cpu.base index f36f5f1..c9863a7 100644 --- a/containers/Dockerfile.cpu.base +++ b/containers/Dockerfile.cpu.base @@ -6,9 +6,12 @@ SHELL ["/bin/bash", "-c"] # TODO(mkolodner-sc): iputils-ping temporarily needed to setup inter-job VAI communication for GLT Inference. # Once VAI natively supports this communication, we can remove this requirement. -RUN apt-get update \ - && apt-get upgrade -y \ - && apt-get install -y build-essential git wget cmake iputils-ping \ +RUN apt-get update && apt-get install -y \ + build-essential \ + git \ + wget \ + cmake \ + iputils-ping \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* @@ -16,7 +19,7 @@ RUN apt-get update \ RUN conda create -y --name gnn python=3.9 pip # Update path so any call for python executables in the built image defaults to using the gnn conda environment -ENV PATH /opt/conda/envs/gnn/bin:$PATH +ENV PATH=/opt/conda/envs/gnn/bin:$PATH RUN conda init bash RUN echo "conda activate gnn" >> ~/.bashrc @@ -25,5 +28,4 @@ COPY requirements tmp/requirements RUN source ~/.bashrc && pip install --upgrade pip RUN source ~/.bashrc && cd tmp && bash ./requirements/install_py_deps.sh --no-pip-cache - CMD [ "/bin/bash" ] diff --git a/containers/Dockerfile.cuda.base b/containers/Dockerfile.cuda.base index faee82d..6918699 100644 --- a/containers/Dockerfile.cuda.base +++ b/containers/Dockerfile.cuda.base @@ -6,7 +6,7 @@ FROM nvidia/cuda:12.1.0-cudnn8-devel-ubuntu22.04 SHELL ["/bin/bash", "-c"] # Install basic dependencies -# TODO(mkolodner-sc): iputils-ping temporarily needed to setup inter-job VAI communication for GLT Inference. +# TODO(mkolodner-sc): iputils-ping temporarily needed to setup inter-job VAI communication for GLT Inference. # Once VAI natively supports this communication, we can remove this requirement. RUN apt-get update \ && apt-get upgrade -y \ diff --git a/containers/Dockerfile.src b/containers/Dockerfile.src index b2244e9..6ca3e05 100644 --- a/containers/Dockerfile.src +++ b/containers/Dockerfile.src @@ -17,7 +17,6 @@ COPY python/snapchat snapchat COPY examples examples COPY python/gigl gigl - # enables usage of tcm as the memory allocator instead of default C memory allocators. Mainly, advantageous for CPU training jobs # Either boosts performance or does not make any improvement compared to default settings. # PyTorch recommendation: https://pytorch.org/tutorials/recipes/recipes/tuning_guide.html#switch-memory-allocator diff --git a/dep_vars.env b/dep_vars.env index 1c5100e..ff75635 100644 --- a/dep_vars.env +++ b/dep_vars.env @@ -1,7 +1,7 @@ # Note this file only supports static key value pairs so it can be loaded by make, bash, python, and sbt without any additional parsing. -DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-cuda-base:af4fc3cddd3ea41a4dce5d34b043bfbff021d07b.28.1 -DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-cpu-base:af4fc3cddd3ea41a4dce5d34b043bfbff021d07b.28.1 -DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/gigl-dataflow-base:af4fc3cddd3ea41a4dce5d34b043bfbff021d07b.28.1 +DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-cuda-base:e7390e5bfdb35b8d9492a77b20dac7eba025019d.37.1 +DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-cpu-base:e7390e5bfdb35b8d9492a77b20dac7eba025019d.37.1 +DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG=us-central1-docker.pkg.dev/external-snap-ci-github-gigl/public-gigl/gigl-dataflow-base:e7390e5bfdb35b8d9492a77b20dac7eba025019d.37.1 SPARK_31_TFRECORD_JAR_GCS_PATH=gs://public-gigl/tools/scala/spark_packages/spark-custom-tfrecord_2.12-0.5.0.jar SPARK_35_TFRECORD_JAR_GCS_PATH=gs://public-gigl/tools/scala/spark_packages/spark_3.5.0-custom-tfrecord_2.12-0.6.1.jar diff --git a/deployment/configs/e2e_cicd_resource_config.yaml b/deployment/configs/e2e_cicd_resource_config.yaml index 27a6aea..ec6fa95 100644 --- a/deployment/configs/e2e_cicd_resource_config.yaml +++ b/deployment/configs/e2e_cicd_resource_config.yaml @@ -1,10 +1,10 @@ # GiglResourceConfig for our e2e intergration tests. -shared_resource_config: +shared_resource_config: # Resource labels are just compute labels that should be attached to all compute resources spun up by GiGL. # So a practitioner can have a more fine grained understanding of resource utilization and cost of the resources in their GCP billing. # Read more here: https://cloud.google.com/compute/docs/labeling-resources#what-are-labels resource_labels: - # We have a 63 character limit for cost_resource_group_tag. + # We have a 63 character limit for cost_resource_group_tag. # COMPONENT is one of {pre|sgs|spl|tra|inf|pos} standing for: # {Preprocessor | Subgraph Sampler | Split Generator | Trainer | Inference @@ -50,5 +50,5 @@ trainer_config: inferencer_config: num_workers: 1 max_num_workers: 256 - machine_type: "c2d-highmem-32" #"c3-standard-22" + machine_type: "c2d-highmem-32" disk_size_gb: 100 diff --git a/deployment/configs/e2e_glt_resource_config.yaml b/deployment/configs/e2e_glt_resource_config.yaml new file mode 100644 index 0000000..e6daab5 --- /dev/null +++ b/deployment/configs/e2e_glt_resource_config.yaml @@ -0,0 +1,47 @@ +shared_resource_config: + resource_labels: + cost_resource_group_tag: dev_experiments_COMPONENT + cost_resource_group: gigl_platform + common_compute_config: + project: "external-snap-ci-github-gigl" + region: "us-central1" + temp_assets_bucket: "gs://gigl-cicd-temp" + temp_regional_assets_bucket: "gs://gigl-cicd-temp" + perm_assets_bucket: "gs://gigl-cicd-perm" + temp_assets_bq_dataset_name: "gigl_temp_assets" + embedding_bq_dataset_name: "gigl_embeddings" + gcp_service_account_email: "untrusted-external-github-gigl@external-snap-ci-github-gigl.iam.gserviceaccount.com" + dataflow_runner: "DataflowRunner" +preprocessor_config: + edge_preprocessor_config: + num_workers: 1 + max_num_workers: 128 + machine_type: "n2d-highmem-32" + disk_size_gb: 300 + node_preprocessor_config: + num_workers: 1 + max_num_workers: 128 + machine_type: "n2d-highmem-64" + disk_size_gb: 300 +trainer_resource_config: + vertex_ai_trainer_config: + machine_type: n1-highmem-32 + gpu_type: NVIDIA_TESLA_T4 + gpu_limit: 2 + num_replicas: 2 +inferencer_resource_config: + vertex_ai_inferencer_config: + machine_type: n1-highmem-32 + gpu_type: NVIDIA_TESLA_T4 + gpu_limit: 2 + num_replicas: 2 +# ======== Below are not used for GLT Inference +subgraph_sampler_config: + machine_type: "n2d-highmem-32" + num_local_ssds: 4 + num_replicas: 200 +split_generator_config: + machine_type: n2d-standard-16 + num_local_ssds: 2 + num_replicas: 256 +# ========== Above are not used for GLT Inference diff --git a/deployment/configs/unittest_resource_config.yaml b/deployment/configs/unittest_resource_config.yaml index 7577e96..adf1776 100644 --- a/deployment/configs/unittest_resource_config.yaml +++ b/deployment/configs/unittest_resource_config.yaml @@ -4,7 +4,7 @@ shared_resource_config: # So a practitioner can have a more fine grained understanding of resource utilization and cost of the resources in their GCP billing. # Read more here: https://cloud.google.com/compute/docs/labeling-resources#what-are-labels resource_labels: - # We have a 63 character limit for cost_resource_group_tag. + # We have a 63 character limit for cost_resource_group_tag. # COMPONENT is one of {pre|sgs|spl|tra|inf|pos} standing for: # {Preprocessor | Subgraph Sampler | Split Generator | Trainer | Inference @@ -53,4 +53,4 @@ inferencer_config: num_workers: 1 max_num_workers: 256 machine_type: "c3-standard-22" - disk_size_gb: 100 \ No newline at end of file + disk_size_gb: 100 diff --git a/docs/assets/images/gigl.png b/docs/assets/images/gigl.png index 630872d..9ed978b 100644 Binary files a/docs/assets/images/gigl.png and b/docs/assets/images/gigl.png differ diff --git a/docs/sphinx/source/components/config_populator.md b/docs/sphinx/source/components/config_populator.md index dd611a8..2911bcb 100644 --- a/docs/sphinx/source/components/config_populator.md +++ b/docs/sphinx/source/components/config_populator.md @@ -2,13 +2,19 @@ The Config Populator takes a "template" config and generates a "frozen" config to be used by all subsequent components. -## Input -- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. -- **task_config_uri** (Uri): Path which points to a "template" `GbmlConfig` proto yaml file. +## Input + +- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. +- **task_config_uri** (Uri): Path which points to a "template" `GbmlConfig` proto yaml file. - **resource_config_uri** (Uri): Path which points to a `GiGLResourceConfig` yaml -## What does it do? -Takes in a template `GbmlConfig` and outputs a frozen `GbmlConfig` by populating all job related metadata paths in `sharedConfig`. These are mostly GCS paths which the following components read and write from, and use as an intermediary data communication medium. For example, the field `sharedConfig.trainedModelMetadata` is populated with a GCS URI, which indicates to the Trainer to write the trained model to this path, and to the Inferencer to read the model from this path +## What does it do? + +Takes in a template `GbmlConfig` and outputs a frozen `GbmlConfig` by populating all job related metadata paths in +`sharedConfig`. These are mostly GCS paths which the following components read and write from, and use as an +intermediary data communication medium. For example, the field `sharedConfig.trainedModelMetadata` is populated with a +GCS URI, which indicates to the Trainer to write the trained model to this path, and to the Inferencer to read the model +from this path ## How do I run it? @@ -29,6 +35,7 @@ task_config_uri = config_populator.run( ``` **Command Line** + ```bash python -m \ gigl.src.config_populator.config_populator \ @@ -39,15 +46,18 @@ python -m \ **Notes:** -- `output_file_path_frozen_gbml_config_uri` is the output of the run method as seen in the import gigl usage. -- Be sure to note the discrepency of `template_uri` for command line usage vs `task_config_uri` for import gigl usage. +- `output_file_path_frozen_gbml_config_uri` is the output of the run method as seen in the import gigl usage. +- Be sure to note the discrepency of `template_uri` for command line usage vs `task_config_uri` for import gigl usage. ## Output -A frozen `GbmlConfig` URI. +A frozen `GbmlConfig` URI. ## Other -- If `trainedModelMetadata.trainedModelUri` exists and/or `skipTraining = true`, this indicates that we will be running the pipeline with a pre-trained model, and Config Populator will not overwrite these fields in the `sharedConfig` of the frozen `GbmlConfig`. +- If `trainedModelMetadata.trainedModelUri` exists and/or `skipTraining = true`, this indicates that we will be running + the pipeline with a pre-trained model, and Config Populator will not overwrite these fields in the `sharedConfig` of + the frozen `GbmlConfig`. -- Although `sharedConfig` is added to the frozen config by the config populator, you may add the field to your template config to enable any feature/optional flags. \ No newline at end of file +- Although `sharedConfig` is added to the frozen config by the config populator, you may add the field to your template + config to enable any feature/optional flags. diff --git a/docs/sphinx/source/components/data_preprocessor.md b/docs/sphinx/source/components/data_preprocessor.md index a97db0a..ae3db36 100644 --- a/docs/sphinx/source/components/data_preprocessor.md +++ b/docs/sphinx/source/components/data_preprocessor.md @@ -1,31 +1,45 @@ # Data Preprocessor -The Data Preprocessor reads node, edge and respective feature data from a data source, and produces preprocessed / transformed versions of all this data, for subsequent components to use. It uses Tensorflow Transform to achieve data transformation in a distributed fashion, and allows for transformations like categorical encoding, scaling, normalization, casting and more. +The Data Preprocessor reads node, edge and respective feature data from a data source, and produces preprocessed / +transformed versions of all this data, for subsequent components to use. It uses Tensorflow Transform to achieve data +transformation in a distributed fashion, and allows for transformations like categorical encoding, scaling, +normalization, casting and more. -## Input +## Input -- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. -- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from a template config. +- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. +- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually + created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from + a template config. - **resource_config_uri** (Uri): Path which points to a `GiGLResourceConfig` yaml - **Optional: custom_worker_image_uri**: Path to docker file to be used for dataflow worker harness image -## What does it do? +## What does it do? The Data Preprocessor undertakes the following actions -- Reads frozen `GbmlConfig` proto yaml, which contains a pointer to a user-defined instance of the `DataPreprocessorConfig` class (see `dataPreprocessorConfigClsPath` field of `datasetConfig.dataPreprocessorConfig`). This class houses logic for - - Preparing datasets for ingestion and transformation (see `prepare_for_pipeline`) - - Defining transformation imperatives for different node types (`get_nodes_preprocessing_spec`) - - Defining transformation imperatives for different edge types (`get_edges_preprocessing_spec`) - - Custom arguments can also be passed into the `DataPreprocessorConfig` class by including them in the `dataPreprocessorArgs` field inside `datasetConfig.dataPreprocessorConfig` section of `GbmlConfig`. +- Reads frozen `GbmlConfig` proto yaml, which contains a pointer to a user-defined instance of the + `DataPreprocessorConfig` class (see `dataPreprocessorConfigClsPath` field of `datasetConfig.dataPreprocessorConfig`). + This class houses logic for + - Preparing datasets for ingestion and transformation (see `prepare_for_pipeline`) + - Defining transformation imperatives for different node types (`get_nodes_preprocessing_spec`) + - Defining transformation imperatives for different edge types (`get_edges_preprocessing_spec`) -- Builds a `GraphMetadata` proto instance, which contains information about the node types (e.g. “user”) and edge types in the graph (e.g. “user-friends-user”), and assigns them corresponding “condensed” integer node and edge types. + Custom arguments can also be passed into the `DataPreprocessorConfig` class by including them in the + `dataPreprocessorArgs` field inside `datasetConfig.dataPreprocessorConfig` section of `GbmlConfig`. -- Runs an “enumeration” step to internally map all the node ids to integers to mitigate space overhead. Other components operate on these enumerated identifiers to reduce storage footprint, memory overhead, and network traffic. +- Builds a `GraphMetadata` proto instance, which contains information about the node types (e.g. “user”) and edge types + in the graph (e.g. “user-friends-user”), and assigns them corresponding “condensed” integer node and edge types. -- For each node and edge type, spins up a Dataflow job which manifests a Tensorflow Transform pipeline to operationalize the user-defined transformations specified in the `get_nodes_preprocessing_spec` and `get_edges_preprocessing_spec` functions inside the user-specified `DataPreprocessorConfig` instance. The pipelines write out transformed features as TFRecords, and a schema to help parse them, the inferred Tensorflow transform function for each feature-set, and other metadata to GCS. +- Runs an “enumeration” step to internally map all the node ids to integers to mitigate space overhead. Other components + operate on these enumerated identifiers to reduce storage footprint, memory overhead, and network traffic. + +- For each node and edge type, spins up a Dataflow job which manifests a Tensorflow Transform pipeline to operationalize + the user-defined transformations specified in the `get_nodes_preprocessing_spec` and `get_edges_preprocessing_spec` + functions inside the user-specified `DataPreprocessorConfig` instance. The pipelines write out transformed features as + TFRecords, and a schema to help parse them, the inferred Tensorflow transform function for each feature-set, and other + metadata to GCS. ## How do I run it? @@ -58,26 +72,44 @@ python -m \ ## Output -Upon completing the Dataflow jobs referenced in the last bullet point of [What](#what-does-it-do) above, the component writes out a `PreprocessedMetadata` proto to URI specified by the `preprocessedMetadataUri` field in the `sharedConfig` section of the frozen `GbmlConfig` i.e. the frozen task spec specified by `task_config_uri`. +Upon completing the Dataflow jobs referenced in the last bullet point of [What](#what-does-it-do) above, the component +writes out a `PreprocessedMetadata` proto to URI specified by the `preprocessedMetadataUri` field in the `sharedConfig` +section of the frozen `GbmlConfig` i.e. the frozen task spec specified by `task_config_uri`. This proto houses information about + - The inferred `GraphMetadata` - A map of all condensed node types to `NodeMetadataOutput` protos - A map of all condensed edge types to `EdgeMetadataOutput` protos -`NodeMetadataOutput` and `EdgeMetadataOutput` protos store information about the paths mentioned in the above bullet point, and relevant metadata including the fields in each TFExample which store node/edge identifiers, feature keys, labels, etc. `PreprocessedMetadata` will be read from this URI by other components. +`NodeMetadataOutput` and `EdgeMetadataOutput` protos store information about the paths mentioned in the above bullet +point, and relevant metadata including the fields in each TFExample which store node/edge identifiers, feature keys, +labels, etc. `PreprocessedMetadata` will be read from this URI by other components. ## Custom Usage -- The actions this component undertakes are largely determined by the imperative transformation logic specified in the user-provided `DataPreprocessorConfig` class instance. This leaves much to user control. Please take a look at the instance provided at the `dataPreprocessorConfigClsPath` field of `datasetConfig`.`dataPreprocessorConfig` in order to learn more. For an example `dataPreprocessorConfig`, see [here](../../../../python/gigl/src/mocking/mocking_assets/passthrough_preprocessor_config_for_mocked_assets.py) - -- In order to customize transformation logic for existing node features, take a look at preprocessing functions in [Tensorflow Transform ](https://www.tensorflow.org/tfx/transform/get_started) documentation. In order to add or remove node and edge features, you can modify the logic in `feature_spec_fn` and `preprocessing_fn` housed by `NodeDataPreprocessingSpec` and `EdgeDataPreprocessingSpec`. You can use the `build_ingestion_feature_spec_fn` function to conveniently generate feature specs which allow you to ingest and then transform these fields +- The actions this component undertakes are largely determined by the imperative transformation logic specified in the + user-provided `DataPreprocessorConfig` class instance. This leaves much to user control. Please take a look at the + instance provided at the `dataPreprocessorConfigClsPath` field of `datasetConfig`.`dataPreprocessorConfig` in order to + learn more. For an example `dataPreprocessorConfig`, see + [here](../../../../python/gigl/src/mocking/mocking_assets/passthrough_preprocessor_config_for_mocked_assets.py) -- Note that the identifier fields (indicating node id, edge src node id, or edge dst node id) are always designated as integer types due to the enumeration steps which precedes the Tensorflow Transform jobs. +- In order to customize transformation logic for existing node features, take a look at preprocessing functions in + [Tensorflow Transform ](https://www.tensorflow.org/tfx/transform/get_started) documentation. In order to add or remove + node and edge features, you can modify the logic in `feature_spec_fn` and `preprocessing_fn` housed by + `NodeDataPreprocessingSpec` and `EdgeDataPreprocessingSpec`. You can use the `build_ingestion_feature_spec_fn` + function to conveniently generate feature specs which allow you to ingest and then transform these fields +- Note that the identifier fields (indicating node id, edge src node id, or edge dst node id) are always designated as + integer types due to the enumeration steps which precedes the Tensorflow Transform jobs. ## Other -- **Design**: The design of this component is intended to leave maximal flexibility to the user in defining how they want to preprocess and transform their data. These steps are unlikely to be the same across many different custom pipelines (e.g. which fields to categorically encode, which to normalize, etc.) and thus we opted for a user-defined class to house as much code as possible which could be natively written by someone familiar with Tensorflow Transform. +- **Design**: The design of this component is intended to leave maximal flexibility to the user in defining how they + want to preprocess and transform their data. These steps are unlikely to be the same across many different custom + pipelines (e.g. which fields to categorically encode, which to normalize, etc.) and thus we opted for a user-defined + class to house as much code as possible which could be natively written by someone familiar with Tensorflow Transform. -- **Debugging**: The core logic of this component executes in Dataflow. A link to the Dataflow job will be printed in the logs of the component, which can be used to navigate to the Dataflow console and see fine-grained logging of the Dataflow pipeline. +- **Debugging**: The core logic of this component executes in Dataflow. A link to the Dataflow job will be printed in + the logs of the component, which can be used to navigate to the Dataflow console and see fine-grained logging of the + Dataflow pipeline. diff --git a/docs/sphinx/source/components/inferencer.md b/docs/sphinx/source/components/inferencer.md index fd27071..0ba8959 100644 --- a/docs/sphinx/source/components/inferencer.md +++ b/docs/sphinx/source/components/inferencer.md @@ -1,11 +1,13 @@ # Model Inference -The Inferencer component is responsible for running inference of a trained model on samples generated by the Subgraph Sampler component. At a high level, it works by applying a trained model in an embarrassingly parallel and distributed fashion across these samples, and persisting the output embeddings and/or predictions. +The Inferencer component is responsible for running inference of a trained model on samples generated by the Subgraph +Sampler component. At a high level, it works by applying a trained model in an embarrassingly parallel and distributed +fashion across these samples, and persisting the output embeddings and/or predictions. ## Input -- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. -- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from a template config. +- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. +- **task_config_uri** (Uri): Path which points to a "template" `GbmlConfig` proto yaml file. - **resource_config_uri** (Uri): Path which points to a `GiGLResourceConfig` yaml - **Optional: custom_worker_image_uri**: Path to docker file to be used for dataflow worker harness image @@ -13,17 +15,33 @@ The Inferencer component is responsible for running inference of a trained model The Inferencer undertakes the following actions: -- Reads frozen `GbmlConfig` proto yaml. This proto contains a pointer to a class instance which implements the `BaseInferencer` protocol (see `inferencerClsPath` field of `inferencerConfig` in `GbmlConfig`). This class houses logic which dictates how to run inference for a batch of samples (see `infer_batch` in [modeling task spec](../../../../python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py)) -- the types of these samples are determined by the `taskMetadata` in the frozen `GbmlConfig`. +- Reads frozen `GbmlConfig` proto yaml. This proto contains a pointer to a class instance which implements the + `BaseInferencer` protocol (see `inferencerClsPath` field of `inferencerConfig` in `GbmlConfig`). This class houses + logic which dictates how to run inference for a batch of samples (see `infer_batch` in + [modeling task spec](../../../../python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py)) + -- the types of these samples are determined by the `taskMetadata` in the frozen `GbmlConfig`. - Custom arguments can also be passed into the class instance by including them in the `inferencerArgs` field inside `inferencerConfig` section of `GbmlConfig`. Several standard configurations of this instance are implemented already at a GiGL platform-level; for example, the `NodeAnchorBasedLinkPredictionModelingTaskSpec` instance referenced in the sample frozen `GbmlConfig` can be used with no/minimal changes for other node-anchor based link prediction tasks. + Custom arguments can also be passed into the class instance by including them in the `inferencerArgs` field inside + `inferencerConfig` section of `GbmlConfig`. Several standard configurations of this instance are implemented already + at a GiGL platform-level; for example, the `NodeAnchorBasedLinkPredictionModelingTaskSpec` instance referenced in the + sample frozen `GbmlConfig` can be used with no/minimal changes for other node-anchor based link prediction tasks. -- Reads the trained model asset from the `trainedModelUri` field in the `sharedConfig.trainedModelMetadata` section of the frozen `GbmlConfig`, and uses it to initialize the `BaseInferencer` class instance above. +- Reads the trained model asset from the `trainedModelUri` field in the `sharedConfig.trainedModelMetadata` section of + the frozen `GbmlConfig`, and uses it to initialize the `BaseInferencer` class instance above. -- Instantiates a Dataflow job to read samples produced by the Subgraph Sampler component, which are stored at URIs referenced inside the `sharedConfig.flattenedGraphMetadata` section of the frozen `GbmlConfig`. Note that depending on the `taskMetadata` in the `GbmlConfig`, the URIs will be housed under different keys in this section. Upon reading the outputs from Subgraph Sampler, the pipeline follows logic housed in a `BaseInferenceBlueprint` class (platform-level), which decodes and collates individual samples into batches, and then runs the inference logic specified in infer_batch of the `BaseInferencer` class instance referenced above. Subsequently, the pipeline writes out embeddings and/or predictions (in classification scenarios) to BigQuery. +- Instantiates a Dataflow job to read samples produced by the Subgraph Sampler component, which are stored at URIs + referenced inside the `sharedConfig.flattenedGraphMetadata` section of the frozen `GbmlConfig`. Note that depending on + the `taskMetadata` in the `GbmlConfig`, the URIs will be housed under different keys in this section. Upon reading the + outputs from Subgraph Sampler, the pipeline follows logic housed in a `BaseInferenceBlueprint` class (platform-level), + which decodes and collates individual samples into batches, and then runs the inference logic specified in infer_batch + of the `BaseInferencer` class instance referenced above. Subsequently, the pipeline writes out embeddings and/or + predictions (in classification scenarios) to BigQuery. -- Finally, the component "un-enumerates" all the assets in BigQuery (to revert the "enumeration" conducted by the Data Preprocessor component). +- Finally, the component "un-enumerates" all the assets in BigQuery (to revert the "enumeration" conducted by the Data + Preprocessor component). ## How do I run it? + **Import GiGL** ```python @@ -42,6 +60,7 @@ inferencer.run( ``` **Command Line** + ``` python -m gigl.src.inference.v1.gnn_inferencer \ --job_name="sample_job_name" \ @@ -51,14 +70,22 @@ python -m gigl.src.inference.v1.gnn_inferencer \ ## Output -The Inferencer outputs embedding and / or prediction assets, based on the `taskMetadata` in the frozen `GbmlConfig`. Specifically, for Node-anchor Based Link Prediction tasks as we have in the sample MAU config, the embeddings are written to the BQ table specified at the `embeddingsBqPath` field in the `sharedConfig.inferenceMetadata` section. +The Inferencer outputs embedding and / or prediction assets, based on the `taskMetadata` in the frozen `GbmlConfig`. +Specifically, for Node-anchor Based Link Prediction tasks as we have in the sample MAU config, the embeddings are +written to the BQ table specified at the `embeddingsBqPath` field in the `sharedConfig.inferenceMetadata` section. ## Custom Usage -None of the logic in this component should require changing for currently supported tasks, such as the inference logic specified in the provided `NodeAnchorBasedLinkPredictionModelingTaskSpec` which is fairly standard. However, you may override `infer_batch` in a custom class `BaseInferencer` class instance for custom tasks that are not supported. +None of the logic in this component should require changing for currently supported tasks, such as the inference logic +specified in the provided `NodeAnchorBasedLinkPredictionModelingTaskSpec` which is fairly standard. However, you may +override `infer_batch` in a custom class `BaseInferencer` class instance for custom tasks that are not supported. ## Other -- **Design:** Currently, all inference happens on CPU. This is because we can easily scale this component by adding more worker machines in Dataflow, and compute is cheap. Dataflow does support GPU instances, but seems it requires more care/attention to monitor utilization due to cost implications for limited benefits. +- **Design:** Currently, all inference happens on CPU. This is because we can easily scale this component by adding more + worker machines in Dataflow, and compute is cheap. Dataflow does support GPU instances, but seems it requires more + care/attention to monitor utilization due to cost implications for limited benefits. -- **Debugging:** The core logic of this component executes in Dataflow. A link to the Dataflow job will be printed in the logs of the component, which can be used to navigate to the Dataflow console and see fine-grained logging of the Dataflow pipeline. +- **Debugging:** The core logic of this component executes in Dataflow. A link to the Dataflow job will be printed in + the logs of the component, which can be used to navigate to the Dataflow console and see fine-grained logging of the + Dataflow pipeline. diff --git a/docs/sphinx/source/components/split_generator.md b/docs/sphinx/source/components/split_generator.md index 4e685b2..3d11e81 100644 --- a/docs/sphinx/source/components/split_generator.md +++ b/docs/sphinx/source/components/split_generator.md @@ -1,14 +1,19 @@ ## Split Generator -The Split Generator reads localized subgraph samples produced by Subgraph Sampler, and executes logic to split the data into training, validation and test sets. The semantics of which nodes and edges end up in which data split depends on the particular semantics of the splitting strategy. +The Split Generator reads localized subgraph samples produced by Subgraph Sampler, and executes logic to split the data +into training, validation and test sets. The semantics of which nodes and edges end up in which data split depends on +the particular semantics of the splitting strategy. ## Input -- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. -- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from a template config. +- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. +- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually + created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from + a template config. - **resource_config_uri** (Uri): Path which points to a `GiGLResourceConfig` yaml Optional Development Args: + - **cluster_name** (str): Optional param if you want to re-use a cluster for development - **skip_cluster_delete** (bool): Provide flag to skip automatic cleanup of dataproc cluster - **debug_cluster_own_alias** (str): Add alias to cluster @@ -17,11 +22,27 @@ Optional Development Args: The Split Generator undertakes the following actions: -- Reads frozen `GbmlConfig` proto yaml, which contains a pointer to an instance of a `SplitStrategy` class (see `splitStrategyClsPath` field of `datasetConfig.splitGeneratorConfig`), and an instance of an `Assigner` class (see `assignerClsPath` field of `datasetConfig.splitGeneratorConfig`). These classes house logic for constructs which dictate how to assign nodes and/or edges to different buckets, which are then utilized to assign these objects to training, validation and test sets accordingly. See the currently supported strategies in: `scala/splitgenerator/src/main/scala/lib/split_strategies/*` - - Custom arguments can also be passed into the `SplitStrategy` class (`Assigner` class) by including them in the `splitStrategyArgs` (`assignerArgs`) field(s) inside `datasetConfig.splitGeneratorConfig` section of `GbmlConfig`. Several standard configurations of `SplitStrategy` and corresponding `Assigner` classes are implemented already at a GiGL platform-level: transductive node classification, inductive node classification, and transductive link prediction split routines, as detailed here. - -- The component kicks off a Spark job which read samples produced by the Subgraph Sampler component, which are stored at URIs referenced inside the `sharedConfig.flattenedGraphMetadata` section of the frozen `GbmlConfig`. Note that depending on the `taskMetadata` in the `GbmlConfig`, the URIs will be housed under different keys in this section; for example, given the Node-anchor Based Link Prediction setting used in the sample frozen `GbmlConfig` MAU yaml, we can find the Subgraph Sampler outputs under the `nodeAnchorBasedLinkPredictionOutput` field. Upon reading the outputs from Subgraph Sampler, the Split Generator component executes methods defined in the provided SplitStrategy instance on each of the input samples. The pipeline writes out TFRecord samples with appropriate data meant to be visible in training, validation and test sets to GCS. +- Reads frozen `GbmlConfig` proto yaml, which contains a pointer to an instance of a `SplitStrategy` class (see + `splitStrategyClsPath` field of `datasetConfig.splitGeneratorConfig`), and an instance of an `Assigner` class (see + `assignerClsPath` field of `datasetConfig.splitGeneratorConfig`). These classes house logic for constructs which + dictate how to assign nodes and/or edges to different buckets, which are then utilized to assign these objects to + training, validation and test sets accordingly. See the currently supported strategies in: + `scala/splitgenerator/src/main/scala/lib/split_strategies/*` + + Custom arguments can also be passed into the `SplitStrategy` class (`Assigner` class) by including them in the + `splitStrategyArgs` (`assignerArgs`) field(s) inside `datasetConfig.splitGeneratorConfig` section of `GbmlConfig`. + Several standard configurations of `SplitStrategy` and corresponding `Assigner` classes are implemented already at a + GiGL platform-level: transductive node classification, inductive node classification, and transductive link prediction + split routines, as detailed here. + +- The component kicks off a Spark job which read samples produced by the Subgraph Sampler component, which are stored at + URIs referenced inside the `sharedConfig.flattenedGraphMetadata` section of the frozen `GbmlConfig`. Note that + depending on the `taskMetadata` in the `GbmlConfig`, the URIs will be housed under different keys in this section; for + example, given the Node-anchor Based Link Prediction setting used in the sample frozen `GbmlConfig` MAU yaml, we can + find the Subgraph Sampler outputs under the `nodeAnchorBasedLinkPredictionOutput` field. Upon reading the outputs from + Subgraph Sampler, the Split Generator component executes methods defined in the provided SplitStrategy instance on + each of the input samples. The pipeline writes out TFRecord samples with appropriate data meant to be visible in + training, validation and test sets to GCS. ## How do I run it? @@ -55,6 +76,7 @@ split_generator.run( ``` **Command Line** + ``` python -m gigl.src.split_generator.split_generator \ --job_name"sample_job_name" \ @@ -69,23 +91,28 @@ The python entry point `split_generator.py` performs the following: - Run the Split Generator Spark job, - Delete the Dataproc cluster after the job is finished. -**Optional Arguments**: -Provide a custom cluster name so you can re-use it instead of having to create a new one every time. +**Optional Arguments**: Provide a custom cluster name so you can re-use it instead of having to create a new one every +time. + ``` --cluster_name="unique_name_for_the_cluster" ``` -Ensure to skip deleting the cluster so it can be re-used. -But, be sure to clean up manually after to prevent $ waste. + +Ensure to skip deleting the cluster so it can be re-used. But, be sure to clean up manually after to prevent $ waste. + ``` --skip_cluster_delete ``` -Marks cluster is to be used for debugging/development by the alias provided. -i.e. for username some_user, provide debug_cluster_owner_alias="some_user" + +Marks cluster is to be used for debugging/development by the alias provided. i.e. for username some_user, provide +debug_cluster_owner_alias="some_user" + ``` --debug_cluster_owner_alias="your_alias" ``` *Example for when you would want to use cluster for development:* + ``` python -m gigl.src.split_generator.split_generator \ --job_name sample_job_name \ @@ -98,20 +125,35 @@ python -m gigl.src.split_generator.split_generator \ ## Output -Upon completing the Dataflow job referenced in the last bullet point of the [What Does it Do](#what-does-it-do) section, the Split Generator writes out TFRecord samples belonging to each of the training, validation and test sets to URIs which are referenced in `sharedConfig.datasetMetadata` section of the `GbmlConfig`. Based on the `taskMetadata` in the `GbmlConfig`, the outputs will be written to different keys within this section. Given the sample configs for the MAU task referenced here, they are written to URIs referenced at the `NodeAnchorBasedLinkPredictionDataset` field. +Upon completing the Dataflow job referenced in the last bullet point of the [What Does it Do](#what-does-it-do) section, +the Split Generator writes out TFRecord samples belonging to each of the training, validation and test sets to URIs +which are referenced in `sharedConfig.datasetMetadata` section of the `GbmlConfig`. Based on the `taskMetadata` in the +`GbmlConfig`, the outputs will be written to different keys within this section. Given the sample configs for the MAU +task referenced here, they are written to URIs referenced at the `NodeAnchorBasedLinkPredictionDataset` field. ## Custom Usage -- To customize the semantics of the splitting method desired, users can manipulate arguments passed to existing `Assigner` and `SplitStrategy` class instances, or even write their own. The instances provided reflect "standard" splitting techniques in graph ML literature, which can be tricky to implement, so caution is advised in trying to customize or write modified variants, in order to avoid leaking data between training, validation and test sets. +- To customize the semantics of the splitting method desired, users can manipulate arguments passed to existing + `Assigner` and `SplitStrategy` class instances, or even write their own. The instances provided reflect "standard" + splitting techniques in graph ML literature, which can be tricky to implement, so caution is advised in trying to + customize or write modified variants, in order to avoid leaking data between training, validation and test sets. -- Currently, all `SplitStrategy` instances leverage `HashingAssigner` (a specialized `Assigner` in which nodes / edges are assigned to different buckets randomly, reflecting random splits). In the future, we can consider introducing new `Assigner` policies to reflect temporal splitting. +- Currently, all `SplitStrategy` instances leverage `HashingAssigner` (a specialized `Assigner` in which nodes / edges + are assigned to different buckets randomly, reflecting random splits). In the future, we can consider introducing new + `Assigner` policies to reflect temporal splitting. ## Other -- **Design**: Graph ML data splitting is tricky. Please see [here](http://snap.stanford.edu/class/cs224w-2020/slides/09-theory.pdf) for a good academic reference into how splitting is standardly conducted to avoid leakage. We chose to create abstractions around splitting which reflect flexible policies around assignment of nodes and/or edges to different buckets, from which defining the visible data during training, validation and testing becomes deterministic. +- **Design**: Graph ML data splitting is tricky. Please see + [here](http://snap.stanford.edu/class/cs224w-2020/slides/09-theory.pdf) for a good academic reference into how + splitting is standardly conducted to avoid leakage. We chose to create abstractions around splitting which reflect + flexible policies around assignment of nodes and/or edges to different buckets, from which defining the visible data + during training, validation and testing becomes deterministic. -This component runs on Spark. Some info on monitoring this job: +This component runs on Spark. Some info on monitoring this job: -- The list of all jobs/clusters is available on [Dataproc UI](https://cloud.google.com/dataproc?hl=en), and we can monitor the overall Spark job statuses and configurations. +- The list of all jobs/clusters is available on [Dataproc UI](https://cloud.google.com/dataproc?hl=en), and we can + monitor the overall Spark job statuses and configurations. -- While the cluster is running, we can access Spark UI's WEB INTERFACES tab to monitor each stage of the job in more detail. \ No newline at end of file +- While the cluster is running, we can access Spark UI's WEB INTERFACES tab to monitor each stage of the job in more + detail. diff --git a/docs/sphinx/source/components/subgraph_sampler.md b/docs/sphinx/source/components/subgraph_sampler.md index 23b04a5..46db18c 100644 --- a/docs/sphinx/source/components/subgraph_sampler.md +++ b/docs/sphinx/source/components/subgraph_sampler.md @@ -1,33 +1,49 @@ # Subgraph Sampler -The Subgraph Sampler receives node and edge data from Data Preprocessor and mainly generates k-hop localized subgraphs for each node in the graph. Basically, the Subgraph Sampler enables us to store the computation graph of each node independently without worrying about maintaining a huge graph in memory for down-stream components. It uses Spark/Scala and runs on a Dataproc cluster. Based on the predefined sample schema for each task, the output samples are serialized/saved in TFRecord format. +The Subgraph Sampler receives node and edge data from Data Preprocessor and mainly generates k-hop localized subgraphs +for each node in the graph. Basically, the Subgraph Sampler enables us to store the computation graph of each node +independently without worrying about maintaining a huge graph in memory for down-stream components. It uses Spark/Scala +and runs on a Dataproc cluster. Based on the predefined sample schema for each task, the output samples are +serialized/saved in TFRecord format. ## Input -- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. -- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from a template config. +- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. +- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually + created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from + a template config. - **resource_config_uri** (Uri): Path which points to a `GiGLResourceConfig` yaml Optional Development Args: + - **cluster_name** (str): Optional param if you want to re-use a cluster for development - **skip_cluster_delete** (bool): Provide flag to skip automatic cleanup of dataproc cluster - **debug_cluster_own_alias** (str): Add alias to cluster ## What does it do? -The Subgraph Sampler, supports localized neighborhood sampling for homogeneous and heterogeneous graphs, where subgraph edges can be sampled with the following strategies: random uniform, top-k, weighted random, or customized sampling strategies. +The Subgraph Sampler, supports localized neighborhood sampling for homogeneous and heterogeneous graphs, where subgraph +edges can be sampled with the following strategies: random uniform, top-k, weighted random, or customized sampling +strategies. The Subgraph Sampler performs the following steps: -- Reads frozen `GbmlConfig` proto yaml to get - - `preprocessedMetadataUri` to read relevant node and edge metadata such as feature names, node id key and path to TFRecords that store node and edge data obtained from the Data Preprocessor. - - `flattenedGraphMetadata` which includes the URI for storing the Subgraph Sampler outputs - - `subgraphSamplerConfig` +- Reads frozen `GbmlConfig` proto yaml to get + + - `preprocessedMetadataUri` to read relevant node and edge metadata such as feature names, node id key and path to + TFRecords that store node and edge data obtained from the Data Preprocessor. + - `flattenedGraphMetadata` which includes the URI for storing the Subgraph Sampler outputs + - `subgraphSamplerConfig` + +- Converts node/edge TFRecords to DataFrames -- Converts node/edge TFRecords to DataFrames - Samples k-hop neighbors for all nodes according to the `subgraphSamplingStrategy` provided in config + - Hydrates the sampled neighborhoods (with node/edge features) -- If the task is NodeAnchorBasedLinkPrediction, it will sample positive edges and positive node neighborhoods for each root node + +- If the task is NodeAnchorBasedLinkPrediction, it will sample positive edges and positive node neighborhoods for each + root node + - Converts final DataFrames to TFRecord format based on the predefined schema in protos. ## How do I run it? @@ -36,19 +52,25 @@ The Subgraph Sampler performs the following steps: Firstly, you can adjust the `subgraphSamplerConfig` parameters in the `GbmlConfig`. -- `SubgraphSamplingStrategy` allows customization of subgraph sampling operations by the user on a config level. +- `SubgraphSamplingStrategy` allows customization of subgraph sampling operations by the user on a config level. - Users can specify each step of the `messagePassingPaths` through `samplingOp`. - Each `samplingOp` has `inputOpNames` where you can specify the parent of the `samplingOp`. -- The `samplingOp` essentially forms a DAG of edge types to sample, indicating how we should construct our sampled k-hop message passing graph, one for each root node type. -- (Note: Note: Only node types which exist in `supervision_edge_types` need their own `MessagePassingPaths` define, see [task_config_guide](../user_guide/config_guides/task_config_guide.md) for more details) +- The `samplingOp` essentially forms a DAG of edge types to sample, indicating how we should construct our sampled k-hop + message passing graph, one for each root node type. +- (Note: Note: Only node types which exist in `supervision_edge_types` need their own `MessagePassingPaths` define, see + [task_config_guide](../user_guide/config_guides/task_config_guide.md) for more details) - We currently support the following sampling methods in `samplingOp`: - - `randomUniform`: Random sample - - `topK`: Sample top K, based on `edgeFeatName` - - `weightedRandom`: Sample nodes based on a specified weight from `edgeFeatName` - - `custom`: Custom sampling strategy. Users can implement their own custom sampling method. -- New `SubgraphSamplingStrategy` can also be introduced in addition to `MessagePassingPathStrategy` `GlobalRandomUniformStrategy`, for example, [Pixie](https://cs.stanford.edu/people/jure/pubs/pixie-www18.pdf) random walk sampling. + - `randomUniform`: Random sample + - `topK`: Sample top K, based on `edgeFeatName` + - `weightedRandom`: Sample nodes based on a specified weight from `edgeFeatName` + - `custom`: Custom sampling strategy. Users can implement their own custom sampling method. +- New `SubgraphSamplingStrategy` can also be introduced in addition to `MessagePassingPathStrategy` + `GlobalRandomUniformStrategy`, for example, [Pixie](https://cs.stanford.edu/people/jure/pubs/pixie-www18.pdf) random + walk sampling. + +Example of `SubgraphSamplingStrategy` for heterogeneous graph with 2 edge types (user, to, story) and (story, to, user) +that does 2-hop sampling. -Example of `SubgraphSamplingStrategy` for heterogeneous graph with 2 edge types (user, to, story) and (story, to, user) that does 2-hop sampling. ```yaml subgraphSamplerConfig: subgraphSamplingStrategy: @@ -93,6 +115,7 @@ subgraphSamplerConfig: ``` Example of `SubgraphSamplingStrategy` for a user - user homogeneous graph that does 2-hop sampling. + ```yaml subgraphSamplerConfig: subgraphSamplingStrategy: @@ -118,7 +141,10 @@ subgraphSamplerConfig: numNodesToSample: 10 ``` -(2024 Aug) We support two backends for Subgraph Sampling: GraphDB-based and Pure-Spark. These solutions have different implications in flexibility, cost-scaling, and relevance for different applications. As of Aug 2024, for heterogeneous subgraph sampling, a graphDB backend must be used, while for homogeneous subgraph sampling, both backends may be used. Enabling parity between these two is work-in-progress. +(2024 Aug) We support two backends for Subgraph Sampling: GraphDB-based and Pure-Spark. These solutions have different +implications in flexibility, cost-scaling, and relevance for different applications. As of Aug 2024, for heterogeneous +subgraph sampling, a graphDB backend must be used, while for homogeneous subgraph sampling, both backends may be used. +Enabling parity between these two is work-in-progress. An example of specifying the `subgraphSamplerConfig` to use the graphDB backend with Nebula graph-DB is @@ -132,6 +158,7 @@ subgraphSamplerConfig: ``` An example of specifying the `subgraphSamplerconfig` to use the Pure-Spark backend: + ```yaml subgraphSamplerConfig: numNeighborsToSample: 10 @@ -155,6 +182,7 @@ subgraph_sampler.run( ``` **Command Line** + ``` python -m gigl.src.subgraph_sampler.subgraph_sampler \ --job_name="sample_job_name" \ @@ -169,23 +197,28 @@ The python entry point `split_generator.py` performs the following: - Run the Split Generator Spark job, - Delete the Dataproc cluster after the job is finished. -**Optional Arguments**: -Provide a custom cluster name so you can re-use it instead of having to create a new one every time. +**Optional Arguments**: Provide a custom cluster name so you can re-use it instead of having to create a new one every +time. + ``` --cluster_name="unique_name_for_the_cluster" ``` -Ensure to skip deleting the cluster so it can be re-used. -But, be sure to clean up manually after to prevent $ waste. + +Ensure to skip deleting the cluster so it can be re-used. But, be sure to clean up manually after to prevent $ waste. + ``` --skip_cluster_delete ``` -Marks cluster is to be used for debugging/development by the alias provided. -i.e. for username some_user, provide debug_cluster_owner_alias="some_user" + +Marks cluster is to be used for debugging/development by the alias provided. i.e. for username some_user, provide +debug_cluster_owner_alias="some_user" + ``` --debug_cluster_owner_alias="your_alias" ``` *Example for when you would want to use cluster for development:* + ``` python -m gigl.src.split_generator.split_generator \ --job_name="sample_job_name" \ @@ -196,27 +229,33 @@ python -m gigl.src.split_generator.split_generator \ --debug_cluster_owner_alias="$(whoami)" ``` - ## Output -Upon completion of the Spark job, subgraph samples are stored in the URIs defined in `flattenedGraphMetadata` field in frozen `GbmlConfig`. +Upon completion of the Spark job, subgraph samples are stored in the URIs defined in `flattenedGraphMetadata` field in +frozen `GbmlConfig`. -For example, for the Node Anchor Based Link Prediction task, we will have two types of samples referenced in `nodeAnchorBasedLinkPredictionOutput`: +For example, for the Node Anchor Based Link Prediction task, we will have two types of samples referenced in +`nodeAnchorBasedLinkPredictionOutput`: -- `tfrecordUriPrefix` which includes main samples in `NodeAnchorBasedLinkPredictionSample` protos which contain an anchor node and positive samples with respective neighborhood information. +- `tfrecordUriPrefix` which includes main samples in `NodeAnchorBasedLinkPredictionSample` protos which contain an + anchor node and positive samples with respective neighborhood information. -- `randomNegativeTfrecordUriPrefix` which includes negative samples in `RootedNodeNeighborhood` protos which contain anchor node and respective neighborhood information. +- `randomNegativeTfrecordUriPrefix` which includes negative samples in `RootedNodeNeighborhood` protos which contain + anchor node and respective neighborhood information. ## How do I extend business logic? -It is not intended that core Subgraph Sampler logic be extended by end users. +It is not intended that core Subgraph Sampler logic be extended by end users. -For example, if you want to implement a new sampling strategy, you can add a new `SamplingOp` to the `subgraph_sampling_strategy.proto` and add implementation of the logic custom query translation class. +For example, if you want to implement a new sampling strategy, you can add a new `SamplingOp` to the +`subgraph_sampling_strategy.proto` and add implementation of the logic custom query translation class. ## Other -This component runs on Spark. Some info on monitoring this job: +This component runs on Spark. Some info on monitoring this job: -- The list of all jobs/clusters is available on [Dataproc UI](https://console.cloud.google.com/dataproc/), and we can monitor the overall Spark job statuses and configurations. +- The list of all jobs/clusters is available on [Dataproc UI](https://console.cloud.google.com/dataproc/), and we can + monitor the overall Spark job statuses and configurations. -- While the cluster is running, we can access Spark UI's WEB INTERFACES tab to monitor each stage of the job in more detail. +- While the cluster is running, we can access Spark UI's WEB INTERFACES tab to monitor each stage of the job in more + detail. diff --git a/docs/sphinx/source/components/trainer.md b/docs/sphinx/source/components/trainer.md index d807c48..ccec729 100644 --- a/docs/sphinx/source/components/trainer.md +++ b/docs/sphinx/source/components/trainer.md @@ -1,28 +1,47 @@ ## Trainer -The Trainer component reads the outputs of split generator (which paths are specified in the frozen config), and trains a GNN model on the training set, early stops on the performance of the validation set, and finally evaluates on the test set. The training logic is implemented with PyTorch Distributed Data Parallel (DDP) Training, which enables distributed training on multiple GPU cards across multiple worker nodes. +The Trainer component reads the outputs of split generator (which paths are specified in the frozen config), and trains +a GNN model on the training set, early stops on the performance of the validation set, and finally evaluates on the test +set. The training logic is implemented with PyTorch Distributed Data Parallel (DDP) Training, which enables distributed +training on multiple GPU cards across multiple worker nodes. ## Input -- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. -- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from a template config. +- **job_name** (AppliedTaskIdentifier): which uniquely identifies an end-to-end task. +- **task_config_uri** (Uri): Path which points to a "frozen" `GbmlConfig` proto yaml file - Can be either manually + created, or `config_populator` component (recommended approach) can be used which can generate this frozen config from + a template config. - **resource_config_uri** (Uri): Path which points to a `GiGLResourceConfig` yaml ## What does it do? -The whole model training contains two main components: (i) the Trainer, which that sets up the environment, and (ii) a user-defined instance of `BaseTrainer` that contains the actual training loop w.r.t. the given task. For example, for node anchor-based link prediction, we have `NodeAnchorBasedLinkPredictionModelingTaskSpec`. Model training involves the following steps: +The whole model training contains two main components: (i) the Trainer, which that sets up the environment, and (ii) a +user-defined instance of `BaseTrainer` that contains the actual training loop w.r.t. the given task. For example, for +node anchor-based link prediction, we have `NodeAnchorBasedLinkPredictionModelingTaskSpec`. Model training involves the +following steps: - The Trainer sets up the (optionally distributed) Torch training environment. + - The Trainer reads `GraphMetadata` that was generated by the Data Preprocessor. -- The Trainer initializes the `BaseTrainer` instance (instance specified at the `trainerClsPath` field in the `trainerConfig` section of the frozen `GbmlConfig`, and with arguments at `trainerArgs`) and initializes the GNN model. -- We start model training as indicated by the `BaseTrainer` instance. This may look something like: - - We initialize training and validation dataloaders (See: `NodeAnchorBasedLinkPredictionDatasetDataloaders` in [dataset_metadata_utils.py](../../python/gigl/src/common/types/pb_wrappers/dataset_metadata_utils.py)) - - Follow a standard distributed training scheme: each worker loads a batch of data and performs the normal forward and backward passes for model training in a distributed way. - - Every fixed number of training batches(`val_every_num_batches`), we evaluate the current model on the validation set with a fixed number of validation batches (`num_val_batches`) - - We follow a standard early-stopping strategy on the validation performances on offline metrics, with a configurable patience parameter (`early_stop_patience`) or see `EarlyStopper` utility class in [early_stop.py](../../python/gigl/src/common/modeling_task_specs/utils/early_stop.py) - - When early-stopping is triggered to end the training process, we reload the saved model at the best validation batch, and run evaluation (test) it with a fixed number of test batches (`num_test_batches`). - - At the end, we return the model and its test performance (offline metrics) back to the Trainer. +- The Trainer initializes the `BaseTrainer` instance (instance specified at the `trainerClsPath` field in the + `trainerConfig` section of the frozen `GbmlConfig`, and with arguments at `trainerArgs`) and initializes the GNN + model. + +- We start model training as indicated by the `BaseTrainer` instance. This may look something like: + + - We initialize training and validation dataloaders (See: `NodeAnchorBasedLinkPredictionDatasetDataloaders` in + [dataset_metadata_utils.py](../../python/gigl/src/common/types/pb_wrappers/dataset_metadata_utils.py)) + - Follow a standard distributed training scheme: each worker loads a batch of data and performs the normal forward and + backward passes for model training in a distributed way. + - Every fixed number of training batches(`val_every_num_batches`), we evaluate the current model on the validation set + with a fixed number of validation batches (`num_val_batches`) + - We follow a standard early-stopping strategy on the validation performances on offline metrics, with a configurable + patience parameter (`early_stop_patience`) or see `EarlyStopper` utility class in + [early_stop.py](../../python/gigl/src/common/modeling_task_specs/utils/early_stop.py) + - When early-stopping is triggered to end the training process, we reload the saved model at the best validation + batch, and run evaluation (test) it with a fixed number of test batches (`num_test_batches`). + - At the end, we return the model and its test performance (offline metrics) back to the Trainer. - The Trainer persists output metadata like model parameters and offline metrics (see [Output](#output)). @@ -44,8 +63,8 @@ trainer.run( ) ``` -Note: If you are training on VertexAI and using a custom class, you will have to provide a docker image (Either `cuda_docker_uri` for GPU training or `cpu_docker_uri` for CPU training.) - +Note: If you are training on VertexAI and using a custom class, you will have to provide a docker image (Either +`cuda_docker_uri` for GPU training or `cpu_docker_uri` for CPU training.) **Command Line** @@ -61,13 +80,17 @@ python -m \ Ater the training process finishes: -- The Trainer saves the trained model’s `state_dict` at specified location (`trainedModelUri` field of `sharedConfig.trainedModelMetadata`). +- The Trainer saves the trained model’s `state_dict` at specified location (`trainedModelUri` field of + `sharedConfig.trainedModelMetadata`). -- The trainer logs training metrics to `trainingLogsUri` field of `sharedConfig.trainedModelMetadata`. To view the metrics on your local, you can run the command: `tensorboard --logdir gs://tensorboard_logs_uri_here` +- The trainer logs training metrics to `trainingLogsUri` field of `sharedConfig.trainedModelMetadata`. To view the + metrics on your local, you can run the command: `tensorboard --logdir gs://tensorboard_logs_uri_here` ## Custom Usage -The Trainer is designed to be task-agnostic, with the detailed model and training logics specified in the user-provided `BaseTrainer` instance. Modifying the `BaseTrainer` instance allows maximal flexibility in changing model architecture and training parameters. +The Trainer is designed to be task-agnostic, with the detailed model and training logics specified in the user-provided +`BaseTrainer` instance. Modifying the `BaseTrainer` instance allows maximal flexibility in changing model architecture +and training parameters. ## Other @@ -84,90 +107,119 @@ profilerConfig: with_stack: 'True' ``` - ### Monitoring and logging -Once the trainer component starts, the training process can be monitored via the gcloud console under Vertex AI Custom Jobs (`https://console.cloud.google.com/vertex-ai/training/custom-jobs?project=`). You can also view the job name, status, jobspec, and more using `gcloud ai custom-jobs list --project ` +Once the trainer component starts, the training process can be monitored via the gcloud console under Vertex AI Custom +Jobs (`https://console.cloud.google.com/vertex-ai/training/custom-jobs?project=`). You can also view +the job name, status, jobspec, and more using `gcloud ai custom-jobs list --project ` -On the Vertex AI UI, you can see all the information like machine/acceleratior information, CPU Utilization, GPU utiliization, Network data etc. Here, you will also find the "View logs" tab, which will open the Stackdriver for your job which logs everything from your modeling task spec as the training progresses in real time. +On the Vertex AI UI, you can see all the information like machine/acceleratior information, CPU Utilization, GPU +utiliization, Network data etc. Here, you will also find the "View logs" tab, which will open the Stackdriver for your +job which logs everything from your modeling task spec as the training progresses in real time. -If you would like to view the logs locally, you can also use: `gcloud ai custom-jobs stream-logs --project= --region=`. +If you would like to view the logs locally, you can also use: +`gcloud ai custom-jobs stream-logs --project= --region=`. ### Parameters -We provide some base class implementations for training. -See: +We provide some base class implementations for training. See: + - `python/gigl/src/common/modeling_task_specs/graphsage_template_modeling_spec.py` - `python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py` - `python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py` -**** Note: many training/model params require dep on using the right model / training setup i.e. specific configurations may not be supported - see individual implementations to understand how each param is used. Training specs are fully customizable - these are only examples +\*\*\*\* Note: many training/model params require dep on using the right model / training setup i.e. specific +configurations may not be supported - see individual implementations to understand how each param is used. Training +specs are fully customizable - these are only examples -They all provide runtime arguments similar to below that can help with your model training behaviour/configs. We present example of the args for `node_anchor_based_link_prediction_modeling_task_spec.py` below. Please look at the respective coasses above for more exhaustive list. +They all provide runtime arguments similar to below that can help with your model training behaviour/configs. We present +example of the args for `node_anchor_based_link_prediction_modeling_task_spec.py` below. Please look at the respective +coasses above for more exhaustive list. - Training environment parameters (number of workers for different dataloaders) - - `train_main_num_workers` - - `train_random_negative_num_workers` - - `val_main_num_workers` - - `val_random_negative_num_workers` - - `test_main_num_workers` - - `test_random_negative_num_workers` - - Note that training involves multiple dataloaders simultaneously. Take care to specify these parameters in a way which avoids overburdening your machine. It is recommended to specify `(train_main_sample_num_workers + train_random_sample_num_workers + val_main_sample_num_workers + val_random_sample_num_workers < num_cpus)`, and `(test_main_sample_num_workers + test_random_sample_num_workers < num_cpus)` to avoid training stalling due to contention. -- Modifying the GNN model: - - Specified by arg `gnn_model_class_path` - - Some Sample GNN models are defined [here](/python/gigl/src/common/models/pyg/homogeneous.py) and initialized in the `init_model` function in ModelingTaskSpec. When trying different GNN models, it is recommended to also include the new GNN architectures under the same file and declare them as is currently done. This cannot currently be done from the default `GbmlConfig` yaml. + - `train_main_num_workers` + - `train_random_negative_num_workers` + - `val_main_num_workers` + - `val_random_negative_num_workers` + - `test_main_num_workers` + - `test_random_negative_num_workers` + Note that training involves multiple dataloaders simultaneously. Take care to specify these parameters in a way which + avoids overburdening your machine. It is recommended to specify + `(train_main_sample_num_workers + train_random_sample_num_workers + val_main_sample_num_workers + val_random_sample_num_workers < num_cpus)`, + and `(test_main_sample_num_workers + test_random_sample_num_workers < num_cpus)` to avoid training stalling due to + contention. +- Modifying the GNN model: + + - Specified by arg `gnn_model_class_path` + - Some Sample GNN models are defined [here](/python/gigl/src/common/models/pyg/homogeneous.py) and initialized in + the `init_model` function in ModelingTaskSpec. When trying different GNN models, it is recommended to also include + the new GNN architectures under the same file and declare them as is currently done. This cannot currently be done + from the default `GbmlConfig` yaml. - Non Exhaustive list of Model parameters: - - `hidden_dim`: dimension of the hidden layers - - `num_layers`: number of layers in the GNN (this should be the same as numHops under subgraphSamplerConfig) - - `out_channels`: dimension of the output embeddings - - `should_l2_normalize_embedding_layer_output`: whether apply L2 normalization on the output embeddings + + - `hidden_dim`: dimension of the hidden layers + - `num_layers`: number of layers in the GNN (this should be the same as numHops under subgraphSamplerConfig) + - `out_channels`: dimension of the output embeddings + - `should_l2_normalize_embedding_layer_output`: whether apply L2 normalization on the output embeddings - Non Exhaustive list of Training parameters: - - `num_heads` - - `val_every_num_batches`: validation frequence per training batches - - `num_val_batches`: number of validation batches - - `num_test_batches`: number of testing batches - - `optim_class_path`: defaults to "torch.optim.Adam" - - `optim_lr`: learning rate of the optimizer - - `optim_weight_decay`: weight decay of the optimizer - - `clip_grad_norm` - - `lr_scheduler_name`: defaults to "torch.optim.lr_scheduler.ConstantLR" - - `factor`: param for lr scheduler - - `total_iters`: param for lr scheduler - - `main_sample_batch_size`: training batch size - - `random_negative_sample_batch_size`: random negative sample batch size for training - - `random_negative_sample_batch_size_for_evaluation`: random negative sample batch size for evaluation - - `train_main_num_workers` - - `val_main_num_workers` - - `test_main_num_workers` - - `train_random_negative_num_workers` - - `val_random_negative_num_workers` - - `test_random_negative_num_workers` - - `early_stop_criterion`: defaults to "loss" - - `early_stop_patience`: patience for earlystopping - - `task_path`: python class path to supported training tasks i.e. Retrieval `gigl.src.common.models.layers.task.Retrieval`; see gigl.src.common.models.layers.task.py for more info - - `softmax_temp`: temperature parameter in the `softmax` loss - - `should_remove_accidental_hits` - - -### Background for distributed training - -Trainer currently uses PyTorch distributed training abstractions to enable multi-node and multi-GPU training. Some useful terminology and links to learn about these abstractions below. + + - `num_heads` + - `val_every_num_batches`: validation frequence per training batches + - `num_val_batches`: number of validation batches + - `num_test_batches`: number of testing batches + - `optim_class_path`: defaults to "torch.optim.Adam" + - `optim_lr`: learning rate of the optimizer + - `optim_weight_decay`: weight decay of the optimizer + - `clip_grad_norm` + - `lr_scheduler_name`: defaults to "torch.optim.lr_scheduler.ConstantLR" + - `factor`: param for lr scheduler + - `total_iters`: param for lr scheduler + - `main_sample_batch_size`: training batch size + - `random_negative_sample_batch_size`: random negative sample batch size for training + - `random_negative_sample_batch_size_for_evaluation`: random negative sample batch size for evaluation + - `train_main_num_workers` + - `val_main_num_workers` + - `test_main_num_workers` + - `train_random_negative_num_workers` + - `val_random_negative_num_workers` + - `test_random_negative_num_workers` + - `early_stop_criterion`: defaults to "loss" + - `early_stop_patience`: patience for earlystopping + - `task_path`: python class path to supported training tasks i.e. Retrieval + `gigl.src.common.models.layers.task.Retrieval`; see gigl.src.common.models.layers.task.py for more info + - `softmax_temp`: temperature parameter in the `softmax` loss + - `should_remove_accidental_hits` + +### Background for distributed training + +Trainer currently uses PyTorch distributed training abstractions to enable multi-node and multi-GPU training. Some +useful terminology and links to learn about these abstractions below. - **WORLD**: Group of processes/workers that are used for distributed training. -- **WORLD_SIZE**: The number of processes/workers in the distributed training WORLD. + +- **WORLD_SIZE**: The number of processes/workers in the distributed training WORLD. + - **RANK**: The unique id (usually index) of the process/worker in the distributed training WORLD. -- **Data loader worker**: A worker used specifically for loading data; if the dataloader worker is utilizing the same thread/process as a worker in distributed training WORLD, then we may incur blocking execution of training, resulting in slowdowns. -- **[Distributed Data Parallel](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html)**: Pytorch's version of [Data parallalism](https://en.wikipedia.org/wiki/Data_parallelism) across different **processes** (could even be processes on different machines), to speed up traiing on large datasets. +- **Data loader worker**: A worker used specifically for loading data; if the dataloader worker is utilizing the same + thread/process as a worker in distributed training WORLD, then we may incur blocking execution of training, resulting + in slowdowns. +- **[Distributed Data Parallel](https://pytorch.org/tutorials/intermediate/ddp_tutorial.html)**: Pytorch's version of + [Data parallalism](https://en.wikipedia.org/wiki/Data_parallelism) across different **processes** (could even be + processes on different machines), to speed up traiing on large datasets. -- **[TORCH.DISTRIBUTED package](https://pytorch.org/docs/stable/distributed.html)**: A torch package containing tools for distributed communication and trainings. - - Defines [backends for distributed communication](https://pytorch.org/docs/stable/distributed.html#backends) like `gloo` and `nccl` - as a ML practitioner you should not worry about how these work, but important to know what **devices** and **collective functions** they support. - - Contains **"[Collective functions](https://pytorch.org/docs/stable/distributed.html#collective-functions)"** like `torch.distributed.broadcast`, `torch.distributed.all_gather`, et al. which allow communication of tensors across the **WORLD**. +- **[TORCH.DISTRIBUTED package](https://pytorch.org/docs/stable/distributed.html)**: A torch package containing tools + for distributed communication and trainings. + - Defines [backends for distributed communication](https://pytorch.org/docs/stable/distributed.html#backends) like + `gloo` and `nccl` - as a ML practitioner you should not worry about how these work, but important to know what + **devices** and **collective functions** they support. + - Contains **"[Collective functions](https://pytorch.org/docs/stable/distributed.html#collective-functions)"** like + `torch.distributed.broadcast`, `torch.distributed.all_gather`, et al. which allow communication of tensors across + the **WORLD**. diff --git a/docs/sphinx/source/conf.py b/docs/sphinx/source/conf.py index 8eb8699..fd77e05 100644 --- a/docs/sphinx/source/conf.py +++ b/docs/sphinx/source/conf.py @@ -53,7 +53,7 @@ "icon_links": [ { "name": "GitHub", - "url": "https://github.com/snap-research/GiGL", + "url": "https://github.com/Snapchat/GiGL", "icon": "fa-brands fa-github", }, { @@ -82,4 +82,3 @@ # "examples/no-sidebar": [], #}, } - diff --git a/docs/sphinx/source/user_guide/assets/docker_images.md b/docs/sphinx/source/user_guide/assets/docker_images.md index 121f8c6..c67e056 100644 --- a/docs/sphinx/source/user_guide/assets/docker_images.md +++ b/docs/sphinx/source/user_guide/assets/docker_images.md @@ -1 +1 @@ -# Docker Images \ No newline at end of file +# Docker Images diff --git a/docs/sphinx/source/user_guide/assets/public_data.md b/docs/sphinx/source/user_guide/assets/public_data.md index 11e93f2..e86b010 100644 --- a/docs/sphinx/source/user_guide/assets/public_data.md +++ b/docs/sphinx/source/user_guide/assets/public_data.md @@ -1 +1 @@ -# Public Assets \ No newline at end of file +# Public Assets diff --git a/docs/sphinx/source/user_guide/config_guides/resource_config_guide.md b/docs/sphinx/source/user_guide/config_guides/resource_config_guide.md index 6e8a531..4fce6d6 100644 --- a/docs/sphinx/source/user_guide/config_guides/resource_config_guide.md +++ b/docs/sphinx/source/user_guide/config_guides/resource_config_guide.md @@ -1,12 +1,12 @@ # Resource Config Guide -GiGL Resource Config is a yaml file that is passed in at runtime and specifies the resource and environment configurations for each component in the GiGL. The proto definition for GiGL Resource Config can be seen [here](/proto/snapchat/research/gbml/gigl_resource_config.proto) - +GiGL Resource Config is a yaml file that is passed in at runtime and specifies the resource and environment +configurations for each component in the GiGL. The proto definition for GiGL Resource Config can be seen +[here](https://github.com/Snapchat/GiGL/blob/main/proto/snapchat/research/gbml/gigl_resource_config.proto) ## Prerequisites -If you don't have cloud assets already setup i.e. a GCP project. See [guide](../getting_started/cloud_setup_guide.md) - +If you don't have cloud assets already setup i.e. a GCP project. See [guide](../getting_started/cloud_setup_guide.md) ## Resource Config Breakdown @@ -55,7 +55,7 @@ trainer_config: vertex_ai_trainer_config: machine_type: "" # e.g. n1-highmem-16 gpu_type: "" # e.g. nvidia-tesla-p100 - gpu_limit: 1 + gpu_limit: 1 num_replicas: 1 inferencer_config: num_workers: 1 @@ -66,14 +66,16 @@ inferencer_config:
- **Shared Resource Config** -The `shared_resource_config` field includes settings that apply across all GiGL components. You need to customize this section according to your GCP project specifics. +The `shared_resource_config` field includes settings that apply across all GiGL components. You need to customize this +section according to your GCP project specifics. -- **Resource Labels**: Resource labels help you manage costs and organzie resources. Modify the `resource_labels` section to fit your project's labeling scheme. +- **Resource Labels**: Resource labels help you manage costs and organzie resources. Modify the `resource_labels` + section to fit your project's labeling scheme. -- **Common Compute Config**: This section includes important project specifications. Fill out the fields with your project ID, region, asset buckets, and service account email. +- **Common Compute Config**: This section includes important project specifications. Fill out the fields with your + project ID, region, asset buckets, and service account email. ```yaml common_compute_config: @@ -86,7 +88,9 @@ common_compute_config: **Preprocessor Config** -The `preprocessor_config` specifies settings for the Dataflow preprocessor component, includes number of workers, machine type, and disk size. You must specify both the `node_preprocessor_config` and `edge_preprocessor_config`. See example: +The `preprocessor_config` specifies settings for the Dataflow preprocessor component, includes number of workers, +machine type, and disk size. You must specify both the `node_preprocessor_config` and `edge_preprocessor_config`. See +example: ```yaml preprocessor_config: @@ -104,7 +108,8 @@ preprocessor_config: **Subgraph Sampler Config** -The `subgraph_sampler_config` specifies settings for the Spark subgraph sampler component, includes machine type, local SSDs, and number of replicas. See example: +The `subgraph_sampler_config` specifies settings for the Spark subgraph sampler component, includes machine type, local +SSDs, and number of replicas. See example: ```yaml subgraph_sampler_config: @@ -115,7 +120,8 @@ subgraph_sampler_config: **Split Generator Config** -The `split_generator_config` specifies settings for the Spark split generator component, includes machine type, local SSDs, and number of replicas +The `split_generator_config` specifies settings for the Spark split generator component, includes machine type, local +SSDs, and number of replicas ```yaml split_generator_config: @@ -126,30 +132,33 @@ split_generator_config: **Trainer Config** -The `trainer_config` specifies settings for the trainer config, currently supporting Vertex AI training or Local Training. +The `trainer_config` specifies settings for the trainer config, currently supporting Vertex AI training or Local +Training. -- **Vertex AI Trainer Config**: The `vertex_ai_trainer_config` field of the trainer config requires a machine type, GPU type, GPU limit, and number of replicas. See example: +- **Vertex AI Trainer Config**: The `vertex_ai_trainer_config` field of the trainer config requires a machine type, GPU + type, GPU limit, and number of replicas. See example: - ```yaml - trainer_config: - vertex_ai_trainer_config: - machine_type: "n1-standard-8" - gpu_type: "nvidia-tesla-t4" - gpu_limit: 1 - num_replicas: 1 - ``` - -- **Local Trainer Config**: The `local_trainer_config` field of the trainer config just requires `num_workers` which can be used for data loaders. + ```yaml + trainer_config: + vertex_ai_trainer_config: + machine_type: "n1-standard-8" + gpu_type: "nvidia-tesla-t4" + gpu_limit: 1 + num_replicas: 1 + ``` +- **Local Trainer Config**: The `local_trainer_config` field of the trainer config just requires `num_workers` which can + be used for data loaders. **Inferencer Config** -The `inferencer_config` specifies settings for the Dataflow preprocessor component, includes number of workers, machine type, and disk size. See example: +The `inferencer_config` specifies settings for the Dataflow preprocessor component, includes number of workers, machine +type, and disk size. See example: ```yaml inferencer_config: num_workers: 1 max_num_workers: 256 machine_type: "c2-standard-16" - disk_size_gb: 100 + disk_size_gb: 100 ``` diff --git a/docs/sphinx/source/user_guide/config_guides/task_config_guide.md b/docs/sphinx/source/user_guide/config_guides/task_config_guide.md index 9cd9377..ad7ac8c 100644 --- a/docs/sphinx/source/user_guide/config_guides/task_config_guide.md +++ b/docs/sphinx/source/user_guide/config_guides/task_config_guide.md @@ -1,12 +1,14 @@ # Task Config Guide We currently provide the following tasks metadata options: -`NodeBasedTaskMetadata`,`NodeAnchorBasedLinkPredictionTaskMetadata`,`LinkBasedTaskMetadata`. -However, only `NodeAnchorBasedLinkPredictionTaskMetadata` is currently supported. +`NodeBasedTaskMetadata`,`NodeAnchorBasedLinkPredictionTaskMetadata`,`LinkBasedTaskMetadata`. However, only +`NodeAnchorBasedLinkPredictionTaskMetadata` is currently supported. To Specify the task configuration in GiGL, you will have to specify `TaskMetadata` in your config. - -Example of a `NodeAnchorBasedLinkPredictionTaskMetadata` for a graph with two edge types `user-to-story` `story-to-user`, and where the supervision edge type is `story_to_user`: + +Example of a `NodeAnchorBasedLinkPredictionTaskMetadata` for a graph with two edge types `user-to-story` +`story-to-user`, and where the supervision edge type is `story_to_user`: + ```yaml taskMetadata: nodeAnchorBasedLinkPredictionTaskMetadata: @@ -15,13 +17,16 @@ taskMetadata: relation: to dstNodeType: story ``` + In this example, the user_to_story edge will be used to sample supervision/positive edges for each user sample. -Example of a `NodeAnchorBasedLinkPredictionTaskMetadata` for a user-user graph where the supervision edge type is `user_to_user`: +Example of a `NodeAnchorBasedLinkPredictionTaskMetadata` for a user-user graph where the supervision edge type is +`user_to_user`: + ```yaml nodeAnchorBasedLinkPredictionTaskMetadata: supervisionEdgeTypes: - srcNodeType: user relation: is_friends_with dstNodeType: user -``` \ No newline at end of file +``` diff --git a/docs/sphinx/source/user_guide/contributing/contributing_guide.md b/docs/sphinx/source/user_guide/contributing/contributing_guide.md index aa3cd8d..4f1c99f 100644 --- a/docs/sphinx/source/user_guide/contributing/contributing_guide.md +++ b/docs/sphinx/source/user_guide/contributing/contributing_guide.md @@ -1 +1 @@ -# Contributing Guide \ No newline at end of file +# Contributing Guide diff --git a/docs/sphinx/source/user_guide/custom_specs/data_preprocessor_spec_guide.md b/docs/sphinx/source/user_guide/custom_specs/data_preprocessor_spec_guide.md index 1990d14..19adab1 100644 --- a/docs/sphinx/source/user_guide/custom_specs/data_preprocessor_spec_guide.md +++ b/docs/sphinx/source/user_guide/custom_specs/data_preprocessor_spec_guide.md @@ -1 +1 @@ -# Data Preprocessor Spec Guide \ No newline at end of file +# Data Preprocessor Spec Guide diff --git a/docs/sphinx/source/user_guide/custom_specs/modelling_task_spec_guide.md b/docs/sphinx/source/user_guide/custom_specs/modelling_task_spec_guide.md index a6a31d1..c5afe45 100644 --- a/docs/sphinx/source/user_guide/custom_specs/modelling_task_spec_guide.md +++ b/docs/sphinx/source/user_guide/custom_specs/modelling_task_spec_guide.md @@ -1 +1 @@ -# Modeling Task Spec Guide \ No newline at end of file +# Modeling Task Spec Guide diff --git a/docs/sphinx/source/user_guide/examples/example1.md b/docs/sphinx/source/user_guide/examples/example1.md index 23602ad..f2deac6 100644 --- a/docs/sphinx/source/user_guide/examples/example1.md +++ b/docs/sphinx/source/user_guide/examples/example1.md @@ -1 +1 @@ -# Example Walkthrough #1 \ No newline at end of file +# Example Walkthrough #1 diff --git a/docs/sphinx/source/user_guide/examples/example2.md b/docs/sphinx/source/user_guide/examples/example2.md index 7089330..930f707 100644 --- a/docs/sphinx/source/user_guide/examples/example2.md +++ b/docs/sphinx/source/user_guide/examples/example2.md @@ -1 +1 @@ -# Example Walkthrough #2 \ No newline at end of file +# Example Walkthrough #2 diff --git a/docs/sphinx/source/user_guide/getting_started/cloud_setup_guide.md b/docs/sphinx/source/user_guide/getting_started/cloud_setup_guide.md index f454c95..2e82916 100644 --- a/docs/sphinx/source/user_guide/getting_started/cloud_setup_guide.md +++ b/docs/sphinx/source/user_guide/getting_started/cloud_setup_guide.md @@ -1,4 +1,5 @@ # Cloud Setup Guide + ## GCP Project Setup Guide - A GCP account with billing enabled. @@ -21,11 +22,14 @@ - notebooks.legacyViewer - aiplatform.user - dataproc.worker -- Created a GCS bucket(s) for storing assets. You can specify two different buckets for storing temporary and permanent assets. At large scale GiGL creates alot of intermediary assets; so you may want to create a bucket for storing these temp assets and set a lifecycle rule on it to automatically delete assets. +- Created a GCS bucket(s) for storing assets. You can specify two different buckets for storing temporary and permanent + assets. At large scale GiGL creates alot of intermediary assets; so you may want to create a bucket for storing these + temp assets and set a lifecycle rule on it to automatically delete assets. - Give your service account storage.objectAdmin perms for the bucket(s) you created -Refer to the [GCP documentation](https://cloud.google.com/docs) for detailed instructions on meeting these prerequisites. - +Refer to the [GCP documentation](https://cloud.google.com/docs) for detailed instructions on meeting these +prerequisites. ## AWS Project Setup Guide + - TODO (Not yet supported) diff --git a/docs/sphinx/source/user_guide/getting_started/installation.md b/docs/sphinx/source/user_guide/getting_started/installation.md index 8c10c87..2e1e9ca 100644 --- a/docs/sphinx/source/user_guide/getting_started/installation.md +++ b/docs/sphinx/source/user_guide/getting_started/installation.md @@ -1,43 +1,48 @@ # Installation -There are various ways to use GiGL, depending on your preferred environment. These are the current environments supported by GiGL +There are various ways to use GiGL, depending on your preferred environment. These are the current environments +supported by GiGL -| | Mac (Arm64) | Linux CPU | CUDA 11.8 | CUDA 12.1 | -|---------|-------------------|-------------------|-------------------|-------------------| -| Python | | | | | -| 3.9 | Supported | Supported | Supported | Not Yet Supported | -| 3.10 | Not Yet Supported | Not Yet Supported | Not Yet Supported | Not Yet Supported | +| | Mac (Arm64) | Linux CPU | CUDA 11.8 | CUDA 12.1 | | ------ | ----------------- | ----------------- | +----------------- | ----------------- | | Python | | | | | | 3.9 | Supported | Supported | Supported | Not Yet Supported +| | 3.10 | Not Yet Supported | Not Yet Supported | Not Yet Supported | Not Yet Supported | -The easiest way to set up [gigl](https://pypi.org/project/gigl/) is to install it using pip. However, before installing the package, make sure you have the following prerequisites: +The easiest way to set up [gigl](https://pypi.org/project/gigl/) is to install it using pip. However, before installing +the package, make sure you have the following prerequisites: - PyTorch Version: 2.1.2 (see [PyTorch Installation Docs](https://pytorch.org/get-started/locally/)) - Torchvision Version: 0.16.2 - Torchaudio Version: 2.1.2 -To simplify this process, the steps to create a new conda enviornment and install gigl (and its dependencies) are shown below (seperated by platform/OS). +To simplify this process, the steps to create a new conda enviornment and install gigl (and its dependencies) are shown +below (seperated by platform/OS). ## Installation Steps ::::{tab-set} -:::{tab-item} ARM Mac +:::{tab-item} ARM Mac Create the conda environment (python 3.9) + ```bash conda create -y -c conda-force --name ANY_NAME python=3.9 pip-tools ``` Activate the newly created environment: + ```bash conda activate ANY_NAME ``` Install prerequisites + ```bash conda install pytorch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 -c pytorch -y ``` Install GiGL + ```bash pip install gigl[torch21-cpu, transform] ``` @@ -47,21 +52,25 @@ pip install gigl[torch21-cpu, transform] :::{tab-item} Linux CPU Create the conda environment (python 3.9) + ```bash conda create -y -c conda-force --name ANY_NAME python=3.9 pip-tools ``` Activate the newly created environment: + ```bash conda activate ANY_NAME ``` Install prerequisites + ```bash conda install pytorch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 cpuonly -c pytorch -y ``` Install GiGL + ```bash pip install gigl[torch21-cpu, transform] ``` @@ -71,26 +80,29 @@ pip install gigl[torch21-cpu, transform] :::{tab-item} Linux CUDA 11.8 Create the conda environment (python 3.9) + ```bash conda create -y -c conda-forge --name ANY_NAME python=3.9 pip-tools ``` Activate the newly created environment: + ```bash conda activate ANY_NAME ``` Install prerequisites + ```bash conda install pytorch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 pytorch-cuda=11.8 -c pytorch -c nvidia ``` Install GiGL + ```bash pip install gigl[torch21-cuda-118, transform] ``` ::: -:::: -::::: \ No newline at end of file +:::: ::::: diff --git a/docs/sphinx/source/user_guide/getting_started/orchestration.md b/docs/sphinx/source/user_guide/getting_started/orchestration.md index cfea3cc..b7d7083 100644 --- a/docs/sphinx/source/user_guide/getting_started/orchestration.md +++ b/docs/sphinx/source/user_guide/getting_started/orchestration.md @@ -1,11 +1,15 @@ # Orchestration -GiGL is designed to support easy end to end orchestration of your GNN tasks/workflows with minimal setup required. This page outlines three ways to orchestrate GiGL for after you have set up your configs (See: [quick start](../config_guides/task_config_guide.md) if you have not done so). +GiGL is designed to support easy end to end orchestration of your GNN tasks/workflows with minimal setup required. This +page outlines three ways to orchestrate GiGL for after you have set up your configs (See: +[quick start](../config_guides/task_config_guide.md) if you have not done so). ## Local Runner -The local runner provides a simple interface to kick off an end to end GiGL pipeline. + +The local runner provides a simple interface to kick off an end to end GiGL pipeline. 1. Create a pipeline config. The pipeline config takes in: + - applied_task_identifier: your job name (string) - template_task_config_uri: the URI to your template task config (Uri) - resource_config_uri: The URI to your resource config (URI) @@ -29,7 +33,7 @@ PipelineConfig( 2. Initialize and Run -Now you can create the GiGL runner object and kick off a pipeline. +Now you can create the GiGL runner object and kick off a pipeline. Example: @@ -49,47 +53,19 @@ Runner.run_data_preprocessor(pipeline_config=pipeline_config) ## Kubeflow Orchestration -GiGL also supports orchestration of your workflows using [Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/v2/) using a `KfpOrchestrator` class. - -This guide assumes you have a KFP pipeline deployed and access to the experiment ID, pipeline ID, and KFP host. If not, you can see [Kubeflow Docs](https://www.kubeflow.org/docs/components/pipelines/v1/sdk/connect-api/). - -The `KfpOrchestrator` requires your Kubeflow metadata, which can be passed in 2 different ways. - -- **`kfp_metadata`** (`KfpEnvMetadata`, optional): Instance containing KFP environment metadata. Loaded from environment variables if not provided. -- **`env_path`** (`str`, optional): Path to the environment file with KFP metadata. Defaults to the current directory. - - -**KFP_HOST**: This is the URL of your Kubeflow Pipelines endpoint. You can find it in the Kubeflow user interface or in your cloud provider's Kubernetes cluster configuration where Kubeflow is deployed. - -**K8_SA** (Kubernetes Service Account): The name of the Kubernetes service account used by your pipeline runs. This can typically be found in your Kubernetes cluster configuration or set up through your cluster's service account management. - -**EXPERIMENT_ID**: This is the ID of the experiment under which your pipeline runs will be grouped. You can create a new experiment in the Kubeflow Pipelines UI and use the generated ID, or you can use the ID of an existing experiment. - -**PIPELINE_ID**: The unique identifier for your pipeline. This ID can be obtained from the Kubeflow Pipelines UI after you have initially uploaded your pipeline. - -**EXPERIMENT_NAME** (optional): The name of the experiment in Kubeflow. This is usually set when you create an experiment in the Kubeflow UI and can be used for easier identification of your experiment runs. - -For more information see [Kubeflow Quickstart](https://www.kubeflow.org/docs/components/pipelines/v2/installation/quickstart/) - -The .env file would look like: - -``` -KFP_HOST=https://example-kubeflow-host.com -K8_SA=my-k8-service-account -EXPERIMENT_ID=my-experiment-id -PIPELINE_ID=my-pipeline-id -EXPERIMENT_NAME=my-experiment-name -``` +GiGL also supports orchestration of your workflows using +[Kubeflow Pipelines](https://www.kubeflow.org/docs/components/pipelines/v2/) using a `KfpOrchestrator` class. We make +use of Vertex AI to run the Kubeflow Pipelines. ### Usage Example ```python -from gigl.orchestration.kubeflow.kfp_orchestrator import KfpOrchestrator +from gigl.orchestration.kubeflow.runner import KfpOrchestrator from gigl.common import UriFactory from gigl.src.common.types import AppliedTaskIdentifier -orchestrator = KfpOrchestrator("path/to/.env") +orchestrator = KfpOrchestrator() task_config_uri = UriFactory.create_uri("gs://path/to/task_config.yaml") resource_config_uri = UriFactory.create_uri("gs://path/to/resource_config.yaml") @@ -104,10 +80,10 @@ orchestrator.run( ``` - ## Importable GiGL -You may want to integrate gigl into your existing workflows or create custom orchestration logic. This can be done by importing the components and using each of their `.run()` components. +You may want to integrate gigl into your existing workflows or create custom orchestration logic. This can be done by +importing the components and using each of their `.run()` components. ### Trainer Component Example: diff --git a/docs/sphinx/source/user_guide/getting_started/quick_start.md b/docs/sphinx/source/user_guide/getting_started/quick_start.md index fdc05bc..67698c7 100644 --- a/docs/sphinx/source/user_guide/getting_started/quick_start.md +++ b/docs/sphinx/source/user_guide/getting_started/quick_start.md @@ -1,56 +1,70 @@ # Quick Start Guide -GiGL is a flexible framework that allows customization for many graph ML tasks in its components like data data pre-processing, training logic, inference. - -This page outlines the steps needed to get up and running an end to end pipeline in different scenarios, starting from a simple local setup to more complex cloud-based operations. +GiGL is a flexible framework that allows customization for many graph ML tasks in its components like data data +pre-processing, training logic, inference. +This page outlines the steps needed to get up and running an end to end pipeline in different scenarios, starting from a +simple local setup to more complex cloud-based operations. ## Install GiGL -Before proceeding, make sure you have correctly installed `gigl` by following the [installation guide](./installation.md). + +Before proceeding, make sure you have correctly installed `gigl` by following the +[installation guide](./installation.md). ## Quick Start: Local -:::{admonition} Note -:class: note +:::{admonition} Note :class: note -Not yet supported -::: +Not yet supported ::: -Running an end to end GiGL pipeline on your local machine is not yet supported. This would help provide a quick way to test functionalities without any cloud dependencies. Stay tuned for future releases! +Running an end to end GiGL pipeline on your local machine is not yet supported. This would help provide a quick way to +test functionalities without any cloud dependencies. Stay tuned for future releases! ## Quick Start: Running Distributed on Cloud -This section outlines the steps needed to get up and running an end to end pipeline (via GCP) on an in-built sample task: training and inference for transductive node-anchor based link prediction task on Cora (homogeneous). Running in the cloud enables the usage of services like Dataproc, VertexAI, and Dataflow to enable large-scale distributed/optimized performance. +This section outlines the steps needed to get up and running an end to end pipeline (via GCP) on an in-built sample +task: training and inference for transductive node-anchor based link prediction task on Cora (homogeneous). Running in +the cloud enables the usage of services like Dataproc, VertexAI, and Dataflow to enable large-scale +distributed/optimized performance. ### Config Setup -To run an end to end pipeline in GiGL, two config files are required. In this guide, some samples/templates are provided to get started but these would need to be modified as needed for custom tasks. +To run an end to end pipeline in GiGL, two config files are required. In this guide, some samples/templates are provided +to get started but these would need to be modified as needed for custom tasks. **Resource Config**: -The resource config contains GCP project specific information (service account, buckets, etc.) as well as GiGL Component resource allocation. To setup cloud resources for `shared_resource_config.common_compute_config`, see [cloud setup guide](./cloud_setup_guide.md). +The resource config contains GCP project specific information (service account, buckets, etc.) as well as GiGL Component +resource allocation. To setup cloud resources for `shared_resource_config.common_compute_config`, see +[cloud setup guide](./cloud_setup_guide.md). -Once you have setup your cloud project, you can populate the `common_compute_config` section of the template resource config provided in `GiGL/docs/examples/template_resource_config.yaml`. The remainder of the resource config is populated with some pre-defined values which should be suitable for this task. +Once you have setup your cloud project, you can populate the `common_compute_config` section of the template resource +config provided in `GiGL/docs/examples/template_resource_config.yaml`. The remainder of the resource config is populated +with some pre-defined values which should be suitable for this task. For more information, see [resource config guide](../config_guides/resource_config_guide.md). - **Task Config**: -The template task config is for populating custom class paths, custom arguments, and data configuations which will be passed into config populator. For task config usage/spec creation, see the [task_config_guide](../config_guides/task_config_guide.md). - -In this guide, we will be using a built-in preprocessor config to run on one of GiGL's supported mocked datasets (in this specific example `cora_homogeneous_node_anchor_edge_features`). The other supported mocked datasets can be seen in `python/tests/test_assets/dataset_mocking/lib/mocked_dataset_artifact_metadata.json` +The template task config is for populating custom class paths, custom arguments, and data configuations which will be +passed into config populator. For task config usage/spec creation, see the +[task_config_guide](../config_guides/task_config_guide.md). -The path for the template task config is: `python/tests/test_assets/dataset_mocking/pipeline_test_assets/configs/e2e_node_anchor_based_link_prediction_template_gbml_config.yaml` +In this guide, we will be using a built-in preprocessor config to run on one of GiGL's supported mocked datasets (in +this specific example `cora_homogeneous_node_anchor_edge_features`). The other supported mocked datasets can be seen in +`python/tests/test_assets/dataset_mocking/lib/mocked_dataset_artifact_metadata.json` +The path for the template task config is: +`python/tests/test_assets/dataset_mocking/pipeline_test_assets/configs/e2e_node_anchor_based_link_prediction_template_gbml_config.yaml` ### Running an End To End GiGL Pipeline Now that we have our two config files setup, we can now kick off an end to end GiGL run. -GiGL supports various ways to orchestrate an end to end run such as Kubeflow Orchestration, GiGL Runner, and manual component import and running as needed. For more details see [here](./orchestration.md) +GiGL supports various ways to orchestrate an end to end run such as Kubeflow Orchestration, GiGL Runner, and manual +component import and running as needed. For more details see [here](./orchestration.md) -Below is an example on how we can use GiGL Runner and use the specs we created above. +Below is an example on how we can use GiGL Runner and use the specs we created above. ```python @@ -71,10 +85,16 @@ runner.run() ## Quick Start: Digging Deeper and Advanced Usage -Now that you have an idea on how GiGL works, you may want to explore advanced customization options for your specific tasks. This section directs you to various guides that detail how to create and modify task specifications, use custom data, and general customization: +Now that you have an idea on how GiGL works, you may want to explore advanced customization options for your specific +tasks. This section directs you to various guides that detail how to create and modify task specifications, use custom +data, and general customization: -- **Task Spec Customization**: For any custom logic needed at the component level, like pulling your own data, writing custom training/inference logic, or task specific arguments, see the [task_config_guide](../config_guides/task_config_guide.md). +- **Task Spec Customization**: For any custom logic needed at the component level, like pulling your own data, writing + custom training/inference logic, or task specific arguments, see the + [task_config_guide](../config_guides/task_config_guide.md). -- **Behind the Scenes**: To better understand how each of GiGL's components interact and operate, see the [components page](../overview/components.md) +- **Behind the Scenes**: To better understand how each of GiGL's components interact and operate, see the + [components page](../overview/components.md) -- **Examples**: For easy references and make your next steps easier, various example walkthroughs are available on the examples page. See [here](../examples/) +- **Examples**: For easy references and make your next steps easier, various example walkthroughs are available on the + examples page. See [here](../examples/) diff --git a/docs/sphinx/source/user_guide/index.md b/docs/sphinx/source/user_guide/index.md index 0ab153e..6ea15e0 100644 --- a/docs/sphinx/source/user_guide/index.md +++ b/docs/sphinx/source/user_guide/index.md @@ -66,6 +66,3 @@ contributing/local_setup trouble_shooting/faq ``` - - - diff --git a/docs/sphinx/source/user_guide/overview/components.md b/docs/sphinx/source/user_guide/overview/components.md index 21faeba..44cfe24 100644 --- a/docs/sphinx/source/user_guide/overview/components.md +++ b/docs/sphinx/source/user_guide/overview/components.md @@ -1,79 +1,64 @@ # GiGL Components -::::{grid} 1 2 2 3 -:gutter: 1 1 1 2 +::::{grid} 1 2 2 3 :gutter: 1 1 1 2 :::{grid-item-card} Config Populator ```{image} ../../_static/images/config_populator_icon.png -:target: +:target: :width: 100% ``` -Component for processing template config files and updating with fields that are needed for downstream components. -+++ -[More Details](../../components/config_populator) -::: +Component for processing template config files and updating with fields that are needed for downstream components. +++ +[More Details](../../components/config_populator) ::: :::{grid-item-card} Data Preprocessor ```{image} ../../_static/images/data_preprocessor_icon.png -:target: +:target: :width: 100% ``` -Component for reading and processing node, edge, and feature data and transforming it as needed for downstream components. -+++ -[More Details](../../components/data_preprocessor) -::: +Component for reading and processing node, edge, and feature data and transforming it as needed for downstream +components. +++ [More Details](../../components/data_preprocessor) ::: :::{grid-item-card} Subgraph Sampler ```{image} ../../_static/images/subgraph_sampler_icon.png -:target: +:target: :width: 100% ``` -Component that generates k-hop localized subgraphs for each node in the graph. -+++ -[More Details](../../components/subgraph_sampler) -::: +Component that generates k-hop localized subgraphs for each node in the graph. +++ +[More Details](../../components/subgraph_sampler) ::: :::{grid-item-card} Split Generator ```{image} ../../_static/images/split_generator_icon.png -:target: +:target: :width: 100% ``` -Component to split the data into training, validation, and test sets. -+++ -[More Details](../../components/split_generator) -::: +Component to split the data into training, validation, and test sets. +++ +[More Details](../../components/split_generator) ::: :::{grid-item-card} Trainer ```{image} ../../_static/images/trainer_icon.png -:target: +:target: :width: 100% ``` -Component to run distributed training either locally or on the cloud. -+++ -[More Details](../../components/trainer) -::: +Component to run distributed training either locally or on the cloud. +++ [More Details](../../components/trainer) ::: :::{grid-item-card} Inferencer ```{image} ../../_static/images/inferencer_icon.png -:target: +:target: :width: 100% ``` -Compoennt that runs inference to generate output embeddings and/or predictions -+++ -[More Details](../../components/inferencer) -::: +Compoennt that runs inference to generate output embeddings and/or predictions +++ +[More Details](../../components/inferencer) ::: - -:::: \ No newline at end of file +:::: diff --git a/docs/sphinx/source/user_guide/overview/what_is_gigl.md b/docs/sphinx/source/user_guide/overview/what_is_gigl.md index f008759..5079b4e 100644 --- a/docs/sphinx/source/user_guide/overview/what_is_gigl.md +++ b/docs/sphinx/source/user_guide/overview/what_is_gigl.md @@ -1,34 +1,76 @@ # What is GiGL? -GiGL (Gigantic Graph Learning) is an open-source library designed for training and inference of Graph Neural Networks (GNNs) at a very large scale, capable of handling billion-scale heterogeneous and feature-rich graph data. This library provides: -- support of both supervised and unsupervised machine learning applications on graph data - including tasks like node classification, link prediction, self-supervised node representation learning, etc. -- abstracted designs to gracefully interface with modern ML libraries such as [PyTorch](https://pytorch.org/) and [TensorFlow](https://www.tensorflow.org/) -- flexible GNNs modeling with commonly used graph ML modeling libraries such as [PyG](https://github.com/pyg-team/pytorch_geometric) and [DGL](https://github.com/dmlc/dgl), -- utilities to enable data transformation, orchestration and pipelining of GNN workflows, which are particularly useful in large-scale deployments and recurrent applications. +GiGL (Gigantic Graph Learning) is an open-source library designed for training and inference of Graph Neural Networks +(GNNs) at a very large scale, capable of handling billion-scale heterogeneous and feature-rich graph data. This library +provides: -At high level, GiGL abstracts the complicated and distributed processing of the gigantic graph data aways from the users, such that users can focus on the graph ML modeling with the open-source libraries that they might already be familiar with (PyG, DGL, etc). For more background, please check out our [research blog]() and [paper](). +- support of both supervised and unsupervised machine learning applications on graph data - including tasks like node + classification, link prediction, self-supervised node representation learning, etc. +- abstracted designs to gracefully interface with modern ML libraries such as [PyTorch](https://pytorch.org/) and + [TensorFlow](https://www.tensorflow.org/) +- flexible GNNs modeling with commonly used graph ML modeling libraries such as + [PyG](https://github.com/pyg-team/pytorch_geometric) and [DGL](https://github.com/dmlc/dgl), +- utilities to enable data transformation, orchestration and pipelining of GNN workflows, which are particularly useful + in large-scale deployments and recurrent applications. + +At high level, GiGL abstracts the complicated and distributed processing of the gigantic graph data aways from the +users, such that users can focus on the graph ML modeling with the open-source libraries that they might already be +familiar with (PyG, DGL, etc). For more background, please check out our [research blog](<>) and [paper](<>). # Why use GiGL? -GiGL was designed to address a single need: to enable ML researchers, engineers, and practitioners explore and iterate on state-of-the-art graph ML models on large-scale graph data without having to stray far from the familiarities of common open-source modeling libraries like PyG and DGL which are widely adopted in the research community. These libraries have immense community support, especially for modeling advances and allowing native state-of-the-art GNN research implementations. However, using these libraries to scale up to extremely large graphs beyond million-scale is challenging and non-trivial. +GiGL was designed to address a single need: to enable ML researchers, engineers, and practitioners explore and iterate +on state-of-the-art graph ML models on large-scale graph data without having to stray far from the familiarities of +common open-source modeling libraries like PyG and DGL which are widely adopted in the research community. These +libraries have immense community support, especially for modeling advances and allowing native state-of-the-art GNN +research implementations. However, using these libraries to scale up to extremely large graphs beyond million-scale is +challenging and non-trivial. -GiGL is designed to interface cleanly with these libraries (for the benefit of ML researchers and engineers), while handling scalability challenges with distributed data transformation, subgraph sampling, and persistence behind-the-scenes with performant distributed libraries which the end-user needs minimal understanding of. Moreover, GiGL enables large-scale component re-use, i.e. allowing for settings where expensive transformation and subgraph sampling operations required to train GNNs can be generated once and re-used for hyperparameter tuning, experimentation, and iteration multiple times with large cost-amortization potential. This can be hugely useful in maintaining low and efficient cost-profiles in production settings at both training and inference-time, where multiple team-members are iterating on models or multiple production inference pipelines are running. +GiGL is designed to interface cleanly with these libraries (for the benefit of ML researchers and engineers), while +handling scalability challenges with distributed data transformation, subgraph sampling, and persistence +behind-the-scenes with performant distributed libraries which the end-user needs minimal understanding of. Moreover, +GiGL enables large-scale component re-use, i.e. allowing for settings where expensive transformation and subgraph +sampling operations required to train GNNs can be generated once and re-used for hyperparameter tuning, experimentation, +and iteration multiple times with large cost-amortization potential. This can be hugely useful in maintaining low and +efficient cost-profiles in production settings at both training and inference-time, where multiple team-members are +iterating on models or multiple production inference pipelines are running. -If you are a ML practitioner, engineer, and/or enthusiast who is interested in working with and deploying GNNs on very large-scale graph data, you will find GiGL useful. +If you are a ML practitioner, engineer, and/or enthusiast who is interested in working with and deploying GNNs on very +large-scale graph data, you will find GiGL useful. # Why *not* use GiGL? -GiGL is designed with large-scale GNN settings in mind. For academic benchmarking and experimentation on smaller graphs (ones that can be easily fit in RAM), open-source modeling libraries and built-in abstractions within PyG and DGL for data transformation, subgraph sampling, data splitting, training and inference may be suitable and easier to use directly. There is overhead introduced in GiGL compared to these libraries for distributed environment setup and execution, and while this overhead is marginal in proportion to the benefits in large-scale scenarios, it may be outsized in small-scale ones. +GiGL is designed with large-scale GNN settings in mind. For academic benchmarking and experimentation on smaller graphs +(ones that can be easily fit in RAM), open-source modeling libraries and built-in abstractions within PyG and DGL for +data transformation, subgraph sampling, data splitting, training and inference may be suitable and easier to use +directly. There is overhead introduced in GiGL compared to these libraries for distributed environment setup and +execution, and while this overhead is marginal in proportion to the benefits in large-scale scenarios, it may be +outsized in small-scale ones. # How does GiGL work? -GiGL is designed with **horizontal-scaling** across many compute resources in mind, which makes it a good fit to run on custom compute clusters and cloud-offerings. While vertically scaled GNN solutions are feasible to a limit, thanks to advances in memory, core and GPU capacity in single machines, this limit is quickly saturated and needs frequent revision as we consider graphs with more nodes, edges, and feature-rich data. Horizontal scaling is a resilient ideology which in principle (and in practice, as we have found) allows for elastically scaling resources based on needs, which is particularly appealing when considering deployment settings where scale of data can change rapidly with e.g. user or platform growth. +GiGL is designed with **horizontal-scaling** across many compute resources in mind, which makes it a good fit to run on +custom compute clusters and cloud-offerings. While vertically scaled GNN solutions are feasible to a limit, thanks to +advances in memory, core and GPU capacity in single machines, this limit is quickly saturated and needs frequent +revision as we consider graphs with more nodes, edges, and feature-rich data. Horizontal scaling is a resilient ideology +which in principle (and in practice, as we have found) allows for elastically scaling resources based on needs, which is +particularly appealing when considering deployment settings where scale of data can change rapidly with e.g. user or +platform growth. GiGL is comprised of the following 5 components ([more details on them](.components.md)): -- **Data Preprocessor** Distributed feature transformation pipeline which allows for feature-scaling, normalization, categorical-feature handling (encoding, vocabulary inference), and more. -- **Subgraph Sampler** Distributed subgraph generation pipeline which enables custom graph sampling specifications to dictate message-passing flow, and custom sample generation (e.g. handling positive/negative sampling) to facilitate tasks like node classification and link prediction. -- **Split Generator** Distributed data splitting routine to generate globally consistent train, validation and test splits according to flexible split strategies (transductive, inductive, custom, time-based, etc.) -- **Trainer** Distributed model trainer which consumes data output by Split Generator to do model training, validation and testing dictated by user code to generate model artifacts. + +- **Data Preprocessor** Distributed feature transformation pipeline which allows for feature-scaling, normalization, + categorical-feature handling (encoding, vocabulary inference), and more. +- **Subgraph Sampler** Distributed subgraph generation pipeline which enables custom graph sampling specifications to + dictate message-passing flow, and custom sample generation (e.g. handling positive/negative sampling) to facilitate + tasks like node classification and link prediction. +- **Split Generator** Distributed data splitting routine to generate globally consistent train, validation and test + splits according to flexible split strategies (transductive, inductive, custom, time-based, etc.) +- **Trainer** Distributed model trainer which consumes data output by Split Generator to do model training, validation + and testing dictated by user code to generate model artifacts. - **Inferencer** Distributed model inference which generates embeddings and/or class predictions dictated by user code. -Each of these components is run in specification with some combination of user code for flexibility, and a configuration file which is partially user-specified and partially auto-generated with a precursor component called Config Populator which houses the details on what logic will run, and where the resulting intermediate and final assets (transformed data, splits, model artifact, inferences) will be stored. +Each of these components is run in specification with some combination of user code for flexibility, and a configuration +file which is partially user-specified and partially auto-generated with a precursor component called Config Populator +which houses the details on what logic will run, and where the resulting intermediate and final assets (transformed +data, splits, model artifact, inferences) will be stored. diff --git a/docs/sphinx/source/user_guide/trouble_shooting/faq.md b/docs/sphinx/source/user_guide/trouble_shooting/faq.md index 09e49f1..eaff180 100644 --- a/docs/sphinx/source/user_guide/trouble_shooting/faq.md +++ b/docs/sphinx/source/user_guide/trouble_shooting/faq.md @@ -1 +1 @@ -# Frequently Asked Questions (FAQ) \ No newline at end of file +# Frequently Asked Questions (FAQ) diff --git a/examples/MAG240M/README.md b/examples/MAG240M/README.md index 3e1057e..ff9ccd8 100644 --- a/examples/MAG240M/README.md +++ b/examples/MAG240M/README.md @@ -1,17 +1,24 @@ ## Running the MAG240M experiments on your own GCP project + The following instructions assume you do not have a KFP cluster setup ### 1. (Optional) Pull MAG240M data into your own project -GiGL assumes your data is available in BQ Tables. BQ is ubiqutous to large enterprises as it provides a serverless, highly scalable, and cost-effective platform for storing and analyzing massive datasets. -We provide a script `fetch_data.ipynb` which you can utilize to load the MAG240M data into BQ tables in your own project. -Alternatively, you can skip this all together since we a copy of this dataset in BQ that can be utilized right away. +GiGL assumes your data is available in BQ Tables. BQ is ubiqutous to large enterprises as it provides a serverless, +highly scalable, and cost-effective platform for storing and analyzing massive datasets. + +We provide a script `fetch_data.ipynb` which you can utilize to load the MAG240M data into BQ tables in your own +project. Alternatively, you can skip this all together since we a copy of this dataset in BQ that can be utilized right +away. ### 2. Run e2e pipeline -Prerequiste: Ensure you have access to your own GCP project, and a service account setup. You should also have gcloud cli setup locally and/or running the notebook through a GCP VM. Some basic knowledge of GCP may be necessary here. +Prerequiste: Ensure you have access to your own GCP project, and a service account setup. You should also have gcloud +cli setup locally and/or running the notebook through a GCP VM. Some basic knowledge of GCP may be necessary here. -Note: If you decided to follow step 1. you may need to subesequently modify paths in `examples/MAG240M/preprocessor_config.py` +Note: If you decided to follow step 1. you may need to subesequently modify paths in +`examples/MAG240M/preprocessor_config.py` -Follow along `examples/MAG240M/mag240m.ipynb` to run an e2e GiGL pipeline on the MAG240M dataset. -It will guide you through running each component: `config_populator` -> `data_preprocessor` -> `subgraph_sampler` -> `split_generator` -> `trainer` -> `inferencer` +Follow along `examples/MAG240M/mag240m.ipynb` to run an e2e GiGL pipeline on the MAG240M dataset. It will guide you +through running each component: `config_populator` -> `data_preprocessor` -> `subgraph_sampler` -> `split_generator` -> +`trainer` -> `inferencer` diff --git a/examples/MAG240M/mag240m.ipynb b/examples/MAG240M/mag240m.ipynb index fd5f219..7d874a8 100644 --- a/examples/MAG240M/mag240m.ipynb +++ b/examples/MAG240M/mag240m.ipynb @@ -1,363 +1,363 @@ { - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%cd ../..\n", - "# We need to change the working directory to the root of GiGL repo so we can import the necessary modules/scripts used below" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Setting up GCP Project and configs\n", - "Assuming you have a GCP project setup:\n", - "\n", - "1. Open up `resource_config.yaml` and fill all relevant fields under `common_compute_config`:\n", - " - project\n", - " - region\n", - " - temp_assets_bucket\n", - " - temp_regional_assets_bucket\n", - " - perm_assets_bucket\n", - " - temp_assets_bq_dataset_name\n", - " - embedding_bq_dataset_name\n", - " - gcp_service_account_email\n", - "\n", - "2. Ensure your service account has relevant perms (A non-exaustive list):\n", - " - roles/bigquery.user\n", - " - roles/cloudprofiler.user\n", - " - roles/compute.admin\n", - " - roles/dataflow.admin\n", - " - roles/dataflow.worker\n", - " - roles/dataproc.editor\n", - " - roles/logging.logWriter\n", - " - roles/monitoring.metricWriter\n", - " - roles/notebooks.legacyViewer\n", - " - roles/aiplatform.user\n", - " - roles/dataproc.worker\n", - " - roles/storage.objectAdmin : on relevant buckets\n", - " - roles/artifactregistry.reader\n", - " - roles/artifactregistry.writer\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from gigl.common import LocalUri, GcsUri, Uri\n", - "from gigl.env.pipelines_config import get_resource_config\n", - "from gigl.src.common.types.pb_wrappers.gigl_resource_config import GiglResourceConfigWrapper\n", - "from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper\n", - "import datetime\n", - "\n", - "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n", - "JOB_NAME = \"test_mag240m\"\n", - "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/MAG240M/task_config.yaml\")\n", - "RESOURCE_CONFIG_URI = LocalUri(\"examples/MAG240M/resource_config.yaml\")\n", - "\n", - "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n", - "RESOURCE_CONFIG: GiglResourceConfigWrapper = get_resource_config(resource_config_uri=RESOURCE_CONFIG_URI)\n", - "PROJECT = RESOURCE_CONFIG.project\n", - "\n", - "\n", - "\n", - "print(f\"Succesfully found task config and resource config. Script will help execute job: {JOB_NAME} on project: {PROJECT}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Lets run some basic checks to validate correctness of the task and resource config\n", - "from gigl.src.validation_check.config_validator import kfp_validation_checks\n", - "\n", - "kfp_validation_checks(\n", - " job_name=JOB_NAME,\n", - " task_config_uri=TEMPLATE_TASK_CONFIG_URI,\n", - " resource_config_uri=RESOURCE_CONFIG_URI,\n", - " # config_populator is the first step in the pipeline; where we will populat the template task config specified above and generate a frozen config\n", - " start_at=\"config_populator\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Compiling Src Docker images\n", - "\n", - "You will need to build and push docker images with your custom code so that individual GiGL components can leverage your code.\n", - "For this experiment we will consider the MAG240M specs and code to be \"custom code\", and we will guide you how to build a docker image with the code.\n", - "\n", - "We will make use of `scripts/build_and_push_docker_image.py` for this.\n", - "\n", - "Make note that this builds `containers/Dockerfile.src` and `containers/Dockerfile.dataflow.src`; which have instructions to `COPY` the `examples` folder - which contains all the source code for MAG240M, and it has all the GiGL src code." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from scripts.build_and_push_docker_image import build_and_push_cpu_image, build_and_push_cuda_image, build_and_push_dataflow_image\n", - "\n", - "curr_datetime = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n", - "DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG = f\"gcr.io/{PROJECT}/gigl_dataflow_runtime:{curr_datetime}\"\n", - "DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG = f\"gcr.io/{PROJECT}/gigl_cuda:{curr_datetime}\"\n", - "DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG = f\"gcr.io/{PROJECT}/gigl_cpu:{curr_datetime}\"\n", - "\n", - "build_and_push_dataflow_image(\n", - " image_name=DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG,\n", - ")\n", - "build_and_push_cuda_image(\n", - " image_name=DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG,\n", - ")\n", - "build_and_push_cpu_image(\n", - " image_name=DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG,\n", - ")\n", - "\n", - "print(f\"\"\"We built and pushed the following docker images:\n", - "- {DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG}\n", - "- {DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG}\n", - "- {DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG}\n", - "\"\"\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## We will instantiate local runner to help orchestrate the test pipeline" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "metadata": {}, - "outputs": [], - "source": [ - "from gigl.orchestration.local.runner import Runner, PipelineConfig\n", - "\n", - "\n", - "runner = Runner()\n", - "pipeline_config = PipelineConfig(\n", - " applied_task_identifier=JOB_NAME,\n", - " task_config_uri=TEMPLATE_TASK_CONFIG_URI,\n", - " resource_config_uri=RESOURCE_CONFIG_URI,\n", - " custom_cuda_docker_uri=DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG,\n", - " custom_cpu_docker_uri=DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG,\n", - " dataflow_docker_uri=DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG,\n", - ")\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### First we will run config populator\n", - "The config populator takes in a template `GbmlConfig` and outputs a frozen `GbmlConfig` by populating all job related metadata paths in `sharedConfig`. These are mostly GCS paths which the following components read and write from, and use as an intermediary data communication medium. For example, the field `sharedConfig.trainedModelMetadata` is populated with a GCS URI, which indicates to the Trainer to write the trained model to this path, and to the Inferencer to read the model from this path\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from gigl.src.common.utils.file_loader import FileLoader\n", - "frozen_config_uri = runner.run_config_populator(pipeline_config=pipeline_config)\n", - "frozen_config = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=frozen_config_uri)\n", - "file_loader = FileLoader()\n", - "\n", - "print(f\"Config Populator has successfully generated the following frozen config from the template ({TEMPLATE_TASK_CONFIG_URI}) :\")\n", - "print(frozen_config.gbml_config_pb)\n", - "\n", - "pipeline_config.task_config_uri = frozen_config_uri # We need to update the task config uri to the new frozen config uri\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Next we run the preprocessor\n", - "The Data Preprocessor reads node, edge and respective feature data from a data source, and produces preprocessed / transformed versions of all this data, for subsequent components to use. It uses Tensorflow Transform to achieve data transformation in a distributed fashion, and allows for transformations like categorical encoding, scaling, normalization, casting and more.\n", - "\n", - "In this case we are using preprocessing spec defined in `examples/MAG240M/preprocessor_config.py` - take a look for more details.\n", - "\n", - "You will note that the preprocessor will create a few BQ jobs to prepare the node and edge tables, subsequently it will kick off TFT (dataflow) jobs to do the actual preprocessing. The preprocessor will: (1) create a preprocessing spec and dump it to path specified in frozen config `sharedConfig.preprocessedMetadataUri`. (2) Respective Dataflow jobs will dump the preprocessed assets as `.tfrecord` files to the paths specified inside the preprocessing spec `preprocessedMetadataUri`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# WARN: There is an issue when trying to run dataflow jobs from inside a jupyter kernel; thus we cannot use the line \n", - "# below to run the preprocessor as you would normally in a python script.\n", - "# runner.run_data_preprocessor(pipeline_config=pipeline_config) \n", - "\n", - "# Instead, we will run the preprocessor from the command line.\n", - "# Note: You can actually do this with every component; we just make use of the runner to make it easier to run the components.\n", - "!python -m gigl.src.data_preprocessor.data_preprocessor \\\n", - "--job_name=$JOB_NAME \\\n", - "--task_config_uri=$frozen_config_uri \\\n", - "--resource_config_uri=$RESOURCE_CONFIG_URI \\\n", - "--custom_worker_image_uri=$DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Next up is subgraph sampler\n", - "\n", - "The Subgraph Sampler receives node and edge data from Data Preprocessor and mainly generates k-hop localized subgraphs for each node in the graph. Basically, the Subgraph Sampler enables us to store the computation graph of each node independently without worrying about maintaining a huge graph in memory for down-stream components. It uses Spark/Scala and runs on a Dataproc cluster. Based on the predefined sample schema for each task, the output samples are serialized/saved in TFRecord format. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Will use the following subgraph sampler config:\\n{TEMPLATE_TASK_CONFIG.gbml_config_pb.dataset_config.subgraph_sampler_config}\")\n", - "print(f\"The resources requested for the dataproc cluster (spark job) are as follows:\\n{RESOURCE_CONFIG.subgraph_sampler_config}\")\n", - "\n", - "runner.run_subgraph_sampler(\n", - " pipeline_config=pipeline_config,\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Running Split Generator\n", - "The Split Generator reads localized subgraph samples produced by Subgraph Sampler, and executes logic to split the data into training, validation and test sets. The semantics of which nodes and edges end up in which data split depends on the particular semantics of the splitting strategy.\n", - "\n", - "Since the positive labels are user defined we use the setup printed in the cell below. More assigner and split strategies can be found in splitgenerator.lib.assigners and splitgenerator.lib.split_strategies respectively.\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Will use the following split generator config:\\n{TEMPLATE_TASK_CONFIG.gbml_config_pb.dataset_config.split_generator_config}\")\n", - "print(f\"The resources requested for the dataproc cluster (spark job) are as follows:\\n{RESOURCE_CONFIG.split_generator_config}\")\n", - "\n", - "runner.run_split_generator(\n", - " pipeline_config=pipeline_config,\n", - ")\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Training the model\n", - "The Trainer component reads the outputs of split generator (which paths are specified in the frozen config), and trains a GNN model on the training set, early stops on the performance of the validation set, and finally evaluates on the test set. The training logic is implemented with PyTorch Distributed Data Parallel (DDP) Training, which enables distributed training on multiple GPU cards across multiple worker nodes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Will use the following class and respective runtime arguments that will be passed into the training class constructor: {TEMPLATE_TASK_CONFIG.trainer_config}\")\n", - "print(f\"The resources requested for the Vertex AI based DDP training job: {RESOURCE_CONFIG.trainer_config}\")\n", - "\n", - "runner.run_trainer(\n", - " pipeline_config=pipeline_config,\n", - ")\n", - "\n", - "print(\n", - " f\"Model should be sucessfully trained and stored in the following location: \" +\n", - " f\"{frozen_config.trained_model_metadata_pb_wrapper.trained_model_metadata_pb.trained_model_uri}\"\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Inference\n", - "The Inferencer component is responsible for running inference of a trained model on samples generated by the Subgraph Sampler component. At a high level, it works by applying a trained model in an embarrassingly parallel and distributed fashion across these samples, and persisting the output embeddings and/or predictions." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# WARN: There is an issue when trying to run dataflow jobs from inside a jupyter kernel; thus we cannot use the line \n", - "# below to run the inferencer as you would normally in a python script.\n", - "# runner.run_inferencer(pipeline_config=pipeline_config) \n", - "\n", - "# Instead, we will run the inferencer from the command line.\n", - "# Note: You can actually do this with every component; we just make use of the runner to make it easier to run the components.\n", - "!python -m gigl.src.inference.inferencer \\\n", - "--job_name=$JOB_NAME \\\n", - "--task_config_uri=$frozen_config_uri \\\n", - "--resource_config_uri=$RESOURCE_CONFIG_URI \\\n", - "--custom_worker_image_uri=$DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG \\\n", - "--cpu_docker_uri=$DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG \\\n", - "--cuda_docker_uri=$DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Looking at inference results\n", - "bq_emb_out_table = frozen_config.shared_config.inference_metadata.node_type_to_inferencer_output_info_map[\"paper_or_author\"].embeddings_path\n", - "print(f\"Embeddings should be sucessfully stored in the following location: {bq_emb_out_table}\")\n", - "\n", - "from gigl.src.common.utils.bq import BqUtils\n", - "bq_utils = BqUtils(project=PROJECT)\n", - "query = f\"SELECT * FROM {bq_emb_out_table} LIMIT 5\"\n", - "result = list(bq_utils.run_query(query=query, labels={}))\n", - "\n", - "print(f\"Query result: {bq_emb_out_table}\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "bagl", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.21" - } - }, - "nbformat": 4, - "nbformat_minor": 2 - } \ No newline at end of file + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%cd ../..\n", + "# We need to change the working directory to the root of GiGL repo so we can import the necessary modules/scripts used below" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setting up GCP Project and configs\n", + "Assuming you have a GCP project setup:\n", + "\n", + "1. Open up `resource_config.yaml` and fill all relevant fields under `common_compute_config`:\n", + " - project\n", + " - region\n", + " - temp_assets_bucket\n", + " - temp_regional_assets_bucket\n", + " - perm_assets_bucket\n", + " - temp_assets_bq_dataset_name\n", + " - embedding_bq_dataset_name\n", + " - gcp_service_account_email\n", + "\n", + "2. Ensure your service account has relevant perms (A non-exaustive list):\n", + " - roles/bigquery.user\n", + " - roles/cloudprofiler.user\n", + " - roles/compute.admin\n", + " - roles/dataflow.admin\n", + " - roles/dataflow.worker\n", + " - roles/dataproc.editor\n", + " - roles/logging.logWriter\n", + " - roles/monitoring.metricWriter\n", + " - roles/notebooks.legacyViewer\n", + " - roles/aiplatform.user\n", + " - roles/dataproc.worker\n", + " - roles/storage.objectAdmin : on relevant buckets\n", + " - roles/artifactregistry.reader\n", + " - roles/artifactregistry.writer\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from gigl.common import LocalUri, GcsUri, Uri\n", + "from gigl.env.pipelines_config import get_resource_config\n", + "from gigl.src.common.types.pb_wrappers.gigl_resource_config import GiglResourceConfigWrapper\n", + "from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper\n", + "import datetime\n", + "\n", + "# Firstly, let's give your job a name and ensure that the resource and task configs exist and can be loaded\n", + "JOB_NAME = \"test_mag240m\"\n", + "TEMPLATE_TASK_CONFIG_URI = LocalUri(\"examples/MAG240M/task_config.yaml\")\n", + "RESOURCE_CONFIG_URI = LocalUri(\"examples/MAG240M/resource_config.yaml\")\n", + "\n", + "TEMPLATE_TASK_CONFIG: GbmlConfigPbWrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=TEMPLATE_TASK_CONFIG_URI)\n", + "RESOURCE_CONFIG: GiglResourceConfigWrapper = get_resource_config(resource_config_uri=RESOURCE_CONFIG_URI)\n", + "PROJECT = RESOURCE_CONFIG.project\n", + "\n", + "\n", + "\n", + "print(f\"Succesfully found task config and resource config. Script will help execute job: {JOB_NAME} on project: {PROJECT}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Lets run some basic checks to validate correctness of the task and resource config\n", + "from gigl.src.validation_check.config_validator import kfp_validation_checks\n", + "\n", + "kfp_validation_checks(\n", + " job_name=JOB_NAME,\n", + " task_config_uri=TEMPLATE_TASK_CONFIG_URI,\n", + " resource_config_uri=RESOURCE_CONFIG_URI,\n", + " # config_populator is the first step in the pipeline; where we will populat the template task config specified above and generate a frozen config\n", + " start_at=\"config_populator\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compiling Src Docker images\n", + "\n", + "You will need to build and push docker images with your custom code so that individual GiGL components can leverage your code.\n", + "For this experiment we will consider the MAG240M specs and code to be \"custom code\", and we will guide you how to build a docker image with the code.\n", + "\n", + "We will make use of `scripts/build_and_push_docker_image.py` for this.\n", + "\n", + "Make note that this builds `containers/Dockerfile.src` and `containers/Dockerfile.dataflow.src`; which have instructions to `COPY` the `examples` folder - which contains all the source code for MAG240M, and it has all the GiGL src code." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from scripts.build_and_push_docker_image import build_and_push_cpu_image, build_and_push_cuda_image, build_and_push_dataflow_image\n", + "\n", + "curr_datetime = datetime.datetime.now().strftime(\"%Y%m%d-%H%M%S\")\n", + "DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG = f\"gcr.io/{PROJECT}/gigl_dataflow_runtime:{curr_datetime}\"\n", + "DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG = f\"gcr.io/{PROJECT}/gigl_cuda:{curr_datetime}\"\n", + "DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG = f\"gcr.io/{PROJECT}/gigl_cpu:{curr_datetime}\"\n", + "\n", + "build_and_push_dataflow_image(\n", + " image_name=DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG,\n", + ")\n", + "build_and_push_cuda_image(\n", + " image_name=DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG,\n", + ")\n", + "build_and_push_cpu_image(\n", + " image_name=DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG,\n", + ")\n", + "\n", + "print(f\"\"\"We built and pushed the following docker images:\n", + "- {DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG}\n", + "- {DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG}\n", + "- {DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG}\n", + "\"\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## We will instantiate local runner to help orchestrate the test pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "from gigl.orchestration.local.runner import Runner, PipelineConfig\n", + "\n", + "\n", + "runner = Runner()\n", + "pipeline_config = PipelineConfig(\n", + " applied_task_identifier=JOB_NAME,\n", + " task_config_uri=TEMPLATE_TASK_CONFIG_URI,\n", + " resource_config_uri=RESOURCE_CONFIG_URI,\n", + " custom_cuda_docker_uri=DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG,\n", + " custom_cpu_docker_uri=DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG,\n", + " dataflow_docker_uri=DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG,\n", + ")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### First we will run config populator\n", + "The config populator takes in a template `GbmlConfig` and outputs a frozen `GbmlConfig` by populating all job related metadata paths in `sharedConfig`. These are mostly GCS paths which the following components read and write from, and use as an intermediary data communication medium. For example, the field `sharedConfig.trainedModelMetadata` is populated with a GCS URI, which indicates to the Trainer to write the trained model to this path, and to the Inferencer to read the model from this path\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from gigl.src.common.utils.file_loader import FileLoader\n", + "frozen_config_uri = runner.run_config_populator(pipeline_config=pipeline_config)\n", + "frozen_config = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri(gbml_config_uri=frozen_config_uri)\n", + "file_loader = FileLoader()\n", + "\n", + "print(f\"Config Populator has successfully generated the following frozen config from the template ({TEMPLATE_TASK_CONFIG_URI}) :\")\n", + "print(frozen_config.gbml_config_pb)\n", + "\n", + "pipeline_config.task_config_uri = frozen_config_uri # We need to update the task config uri to the new frozen config uri\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Next we run the preprocessor\n", + "The Data Preprocessor reads node, edge and respective feature data from a data source, and produces preprocessed / transformed versions of all this data, for subsequent components to use. It uses Tensorflow Transform to achieve data transformation in a distributed fashion, and allows for transformations like categorical encoding, scaling, normalization, casting and more.\n", + "\n", + "In this case we are using preprocessing spec defined in `examples/MAG240M/preprocessor_config.py` - take a look for more details.\n", + "\n", + "You will note that the preprocessor will create a few BQ jobs to prepare the node and edge tables, subsequently it will kick off TFT (dataflow) jobs to do the actual preprocessing. The preprocessor will: (1) create a preprocessing spec and dump it to path specified in frozen config `sharedConfig.preprocessedMetadataUri`. (2) Respective Dataflow jobs will dump the preprocessed assets as `.tfrecord` files to the paths specified inside the preprocessing spec `preprocessedMetadataUri`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# WARN: There is an issue when trying to run dataflow jobs from inside a jupyter kernel; thus we cannot use the line \n", + "# below to run the preprocessor as you would normally in a python script.\n", + "# runner.run_data_preprocessor(pipeline_config=pipeline_config) \n", + "\n", + "# Instead, we will run the preprocessor from the command line.\n", + "# Note: You can actually do this with every component; we just make use of the runner to make it easier to run the components.\n", + "!python -m gigl.src.data_preprocessor.data_preprocessor \\\n", + "--job_name=$JOB_NAME \\\n", + "--task_config_uri=$frozen_config_uri \\\n", + "--resource_config_uri=$RESOURCE_CONFIG_URI \\\n", + "--custom_worker_image_uri=$DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Next up is subgraph sampler\n", + "\n", + "The Subgraph Sampler receives node and edge data from Data Preprocessor and mainly generates k-hop localized subgraphs for each node in the graph. Basically, the Subgraph Sampler enables us to store the computation graph of each node independently without worrying about maintaining a huge graph in memory for down-stream components. It uses Spark/Scala and runs on a Dataproc cluster. Based on the predefined sample schema for each task, the output samples are serialized/saved in TFRecord format. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Will use the following subgraph sampler config:\\n{TEMPLATE_TASK_CONFIG.gbml_config_pb.dataset_config.subgraph_sampler_config}\")\n", + "print(f\"The resources requested for the dataproc cluster (spark job) are as follows:\\n{RESOURCE_CONFIG.subgraph_sampler_config}\")\n", + "\n", + "runner.run_subgraph_sampler(\n", + " pipeline_config=pipeline_config,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Running Split Generator\n", + "The Split Generator reads localized subgraph samples produced by Subgraph Sampler, and executes logic to split the data into training, validation and test sets. The semantics of which nodes and edges end up in which data split depends on the particular semantics of the splitting strategy.\n", + "\n", + "Since the positive labels are user defined we use the setup printed in the cell below. More assigner and split strategies can be found in splitgenerator.lib.assigners and splitgenerator.lib.split_strategies respectively.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Will use the following split generator config:\\n{TEMPLATE_TASK_CONFIG.gbml_config_pb.dataset_config.split_generator_config}\")\n", + "print(f\"The resources requested for the dataproc cluster (spark job) are as follows:\\n{RESOURCE_CONFIG.split_generator_config}\")\n", + "\n", + "runner.run_split_generator(\n", + " pipeline_config=pipeline_config,\n", + ")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Training the model\n", + "The Trainer component reads the outputs of split generator (which paths are specified in the frozen config), and trains a GNN model on the training set, early stops on the performance of the validation set, and finally evaluates on the test set. The training logic is implemented with PyTorch Distributed Data Parallel (DDP) Training, which enables distributed training on multiple GPU cards across multiple worker nodes." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(f\"Will use the following class and respective runtime arguments that will be passed into the training class constructor: {TEMPLATE_TASK_CONFIG.trainer_config}\")\n", + "print(f\"The resources requested for the Vertex AI based DDP training job: {RESOURCE_CONFIG.trainer_config}\")\n", + "\n", + "runner.run_trainer(\n", + " pipeline_config=pipeline_config,\n", + ")\n", + "\n", + "print(\n", + " f\"Model should be sucessfully trained and stored in the following location: \" +\n", + " f\"{frozen_config.trained_model_metadata_pb_wrapper.trained_model_metadata_pb.trained_model_uri}\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Inference\n", + "The Inferencer component is responsible for running inference of a trained model on samples generated by the Subgraph Sampler component. At a high level, it works by applying a trained model in an embarrassingly parallel and distributed fashion across these samples, and persisting the output embeddings and/or predictions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# WARN: There is an issue when trying to run dataflow jobs from inside a jupyter kernel; thus we cannot use the line \n", + "# below to run the inferencer as you would normally in a python script.\n", + "# runner.run_inferencer(pipeline_config=pipeline_config) \n", + "\n", + "# Instead, we will run the inferencer from the command line.\n", + "# Note: You can actually do this with every component; we just make use of the runner to make it easier to run the components.\n", + "!python -m gigl.src.inference.inferencer \\\n", + "--job_name=$JOB_NAME \\\n", + "--task_config_uri=$frozen_config_uri \\\n", + "--resource_config_uri=$RESOURCE_CONFIG_URI \\\n", + "--custom_worker_image_uri=$DOCKER_IMAGE_DATAFLOW_RUNTIME_NAME_WITH_TAG \\\n", + "--cpu_docker_uri=$DOCKER_IMAGE_MAIN_CPU_NAME_WITH_TAG \\\n", + "--cuda_docker_uri=$DOCKER_IMAGE_MAIN_CUDA_NAME_WITH_TAG" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Looking at inference results\n", + "bq_emb_out_table = frozen_config.shared_config.inference_metadata.node_type_to_inferencer_output_info_map[\"paper_or_author\"].embeddings_path\n", + "print(f\"Embeddings should be sucessfully stored in the following location: {bq_emb_out_table}\")\n", + "\n", + "from gigl.src.common.utils.bq import BqUtils\n", + "bq_utils = BqUtils(project=PROJECT)\n", + "query = f\"SELECT * FROM {bq_emb_out_table} LIMIT 5\"\n", + "result = list(bq_utils.run_query(query=query, labels={}))\n", + "\n", + "print(f\"Query result: {bq_emb_out_table}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "bagl", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.21" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/MAG240M/preprocessor_config.py b/examples/MAG240M/preprocessor_config.py index bf54a70..763d99a 100644 --- a/examples/MAG240M/preprocessor_config.py +++ b/examples/MAG240M/preprocessor_config.py @@ -1,14 +1,16 @@ +from __future__ import annotations + from typing import Callable, Dict import tensorflow as tf import tensorflow_transform as tft from examples.MAG240M.common import NUM_PAPER_FEATURES, TOTAL_NUM_PAPERS from examples.MAG240M.queries import ( - QUERY_TEMPLATE_CAST_TO_HOMOGENEOUS_EDGE_TABLE, - QUERY_TEMPLATE_CAST_TO_INTERMEDIARY_HOMOGENEOUS_NODE_TABLE, - QUERY_TEMPLATE_COMPUTED_NODE_DEGREE_TABLE, - QUERY_TEMPLATE_GENERATE_HOMOGENEOUS_NODE_TABLE, - QUERY_TEMPLATE_REINDEX_AUTHOR_WRITES_PAPER_TABLE, + query_template_cast_to_homogeneous_edge_table, + query_template_cast_to_intermediary_homogeneous_node_table, + query_template_computed_node_degree_table, + query_template_generate_homogeneous_node_table, + query_template_reindex_author_writes_paper_table, ) from google.cloud.bigquery.job import WriteDisposition @@ -159,7 +161,7 @@ def prepare_for_pipeline( ) query_reindex_author_writes_paper_table = ( - QUERY_TEMPLATE_REINDEX_AUTHOR_WRITES_PAPER_TABLE.format( + query_template_reindex_author_writes_paper_table.format( TOTAL_NUM_PAPERS=TOTAL_NUM_PAPERS, author_writes_paper_table=self.author_write_paper_table, ) @@ -171,7 +173,7 @@ def prepare_for_pipeline( write_disposition=WriteDisposition.WRITE_TRUNCATE, ) - query_cast_to_homogeneous_edge_table = QUERY_TEMPLATE_CAST_TO_HOMOGENEOUS_EDGE_TABLE.format( + query_cast_to_homogeneous_edge_table = query_template_cast_to_homogeneous_edge_table.format( reindexed_author_writes_paper_table=dst_reindex_author_writes_paper_table, paper_cites_paper_table=self.paper_cite_paper_table, ) @@ -183,7 +185,7 @@ def prepare_for_pipeline( ) query_computed_node_degree_table = ( - QUERY_TEMPLATE_COMPUTED_NODE_DEGREE_TABLE.format( + query_template_computed_node_degree_table.format( homogeneous_edge_table=self.dst_casted_homogeneous_edge_table, ) ) @@ -194,7 +196,7 @@ def prepare_for_pipeline( write_disposition=WriteDisposition.WRITE_TRUNCATE, ) - query_cast_to_intermediary_homogeneous_node_table = QUERY_TEMPLATE_CAST_TO_INTERMEDIARY_HOMOGENEOUS_NODE_TABLE.format( + query_cast_to_intermediary_homogeneous_node_table = query_template_cast_to_intermediary_homogeneous_node_table.format( reindexed_author_writes_paper_table=dst_reindex_author_writes_paper_table, paper_table=self.paper_table, ) @@ -206,7 +208,7 @@ def prepare_for_pipeline( ) query_generate_homogeneous_node_table = ( - QUERY_TEMPLATE_GENERATE_HOMOGENEOUS_NODE_TABLE.format( + query_template_generate_homogeneous_node_table.format( interim_node_table=dst_interim_casted_homogeneous_node_table, node_degree_table=dst_interim_node_degree_table, ) diff --git a/examples/MAG240M/queries.py b/examples/MAG240M/queries.py index 4024afd..4cdf3b8 100644 --- a/examples/MAG240M/queries.py +++ b/examples/MAG240M/queries.py @@ -1,6 +1,6 @@ from examples.MAG240M.common import NUM_PAPER_FEATURES -QUERY_TEMPLATE_REINDEX_AUTHOR_WRITES_PAPER_TABLE = """ +query_template_reindex_author_writes_paper_table = """ -- Firstly, we reindex the author to the same node space as papers -- TOTAL_NUM_PAPERS as defined in https://ogb.stanford.edu/docs/lsc/mag240m/ -- The paper node ids are thus: 0 to 121751665; and the author node ids will now start from 121751666 @@ -11,7 +11,7 @@ `{author_writes_paper_table}` """ -QUERY_TEMPLATE_CAST_TO_HOMOGENEOUS_EDGE_TABLE = """ +query_template_cast_to_homogeneous_edge_table = """ -- Combine the paper cites paper, and the re-indexed author writes paper tables into a single edge table SELECT src, @@ -26,7 +26,7 @@ `{paper_cites_paper_table}` """ -QUERY_TEMPLATE_COMPUTED_NODE_DEGREE_TABLE = """ +query_template_computed_node_degree_table = """ SELECT node_id, COUNT(*) AS degree @@ -45,7 +45,7 @@ node_id """ -QUERY_TEMPLATE_CAST_TO_INTERMEDIARY_HOMOGENEOUS_NODE_TABLE = ( +query_template_cast_to_intermediary_homogeneous_node_table = ( """ WITH authors AS ( SELECT @@ -71,7 +71,7 @@ ) -QUERY_TEMPLATE_GENERATE_HOMOGENEOUS_NODE_TABLE = ( +query_template_generate_homogeneous_node_table = ( """ SELECT interim_node_table.node_id as node_id, diff --git a/examples/MAG240M/resource_config.yaml b/examples/MAG240M/resource_config.yaml index 89f15f0..2c3fadd 100644 --- a/examples/MAG240M/resource_config.yaml +++ b/examples/MAG240M/resource_config.yaml @@ -2,13 +2,14 @@ shared_resource_config: resource_labels: # These are compute labels that we will try to attach to the resources created by GiGL components. # More information: https://cloud.google.com/compute/docs/labeling-resources. - # These can be mostly used to get finer gained cost reporting through GCP billing on individual component + # These can be mostly used to get finer grained cost reporting through GCP billing on individual component # and pipeline costs. - # If COMPONENT is provided in cost_resource_group_tag, it will be automatically be replaced with one of - # {pre|sgs|spl|tra|inf|pos} standing for: {Preprocessor | Subgraph Sampler | Split Generator | Trainer - # | Inference | Post Processor} so we can get more accurate cost measurements - # of each component. + # If COMPONENT is provided in cost_resource_group_tag, it will be automatically be replaced with one of + # {pre|sgs|spl|tra|inf|pos} standing for: {Preprocessor | Subgraph Sampler | Split Generator | Trainer + # | Inference | Post Processor} so we can get more accurate cost measurements of each component. + # See implementation: + # `python/gigl/src/common/types/pb_wrappers/gigl_resource_config.py#GiglResourceConfigWrapper.get_resource_labels` cost_resource_group_tag: dev_experiments_COMPONENT cost_resource_group: gigl_platform diff --git a/python/gigl/distributed/dataset/__init__.py b/examples/distributed/__init__.py similarity index 100% rename from python/gigl/distributed/dataset/__init__.py rename to examples/distributed/__init__.py diff --git a/examples/distributed/configs/e2e_cora_udl_glt_task_config.yaml b/examples/distributed/configs/e2e_cora_udl_glt_task_config.yaml new file mode 100644 index 0000000..3c2ec22 --- /dev/null +++ b/examples/distributed/configs/e2e_cora_udl_glt_task_config.yaml @@ -0,0 +1,57 @@ +graphMetadata: + condensedEdgeTypeMap: + '0': + dstNodeType: paper + relation: cites + srcNodeType: paper + condensedNodeTypeMap: + '0': paper + edgeTypes: + - dstNodeType: paper + relation: cites + srcNodeType: paper + nodeTypes: + - paper +datasetConfig: + dataPreprocessorConfig: + dataPreprocessorConfigClsPath: gigl.src.mocking.mocking_assets.passthrough_preprocessor_config_for_mocked_assets.PassthroughPreprocessorConfigForMockedAssets + dataPreprocessorArgs: + # Supported keys in python/tests/test_assets/dataset_mocking/lib/mocked_dataset_artifact_metadata.json + mocked_dataset_name: 'cora_homogeneous_node_anchor_edge_features_user_defined_labels' + # Below not used for GLT Inference + subgraphSamplerConfig: + numHops: 2 + numNeighborsToSample: 10 + numUserDefinedPositiveSamples: 1 + numUserDefinedNegativeSamples: 1 + splitGeneratorConfig: + assignerArgs: + seed: '42' + test_split: '0.2' + train_split: '0.7' + val_split: '0.1' + assignerClsPath: splitgenerator.lib.assigners.UserDefinedLabelsEdgeToLinkSplitHashingAssigner + splitStrategyClsPath: splitgenerator.lib.split_strategies.UserDefinedLabelsNodeAnchorBasedLinkPredictionSplitStrategy + # Above not used for GLT Inference +inferencerConfig: + inferencerArgs: + # Example argument to inferencer + log_every_n_batch: "50" + inferenceBatchSize: 512 + command: python -m examples.distributed.homogeneous_inference +sharedConfig: + shouldSkipAutomaticTempAssetCleanup: false + shouldSkipInference: false + shouldSkipTraining: true # GLT Task does not have a training phase that is being tested right now + shouldSkipModelEvaluation: true + trainedModelMetadata: + trainedModelUri: gs://public-gigl/mocked_assets/2024-07-15--21-30-07-UTC/cora_homogeneous_node_anchor_edge_features_user_defined_labels/trainer/models/model.pt +taskMetadata: + nodeAnchorBasedLinkPredictionTaskMetadata: + supervisionEdgeTypes: + - dstNodeType: paper + relation: cites + srcNodeType: paper +featureFlags: + should_run_glt_backend: 'True' + data_preprocessor_num_shards: '2' diff --git a/examples/distributed/homogeneous_inference.py b/examples/distributed/homogeneous_inference.py new file mode 100644 index 0000000..a9ec865 --- /dev/null +++ b/examples/distributed/homogeneous_inference.py @@ -0,0 +1,462 @@ +""" +This file contains an example for how to run inference on pretrained torch.nn.Module in GiGL (or elsewhere) using new +GLT (GraphLearn-for-PyTorch) bindings that GiGL has. Note that example should be applied to use cases which already have +some pretrained `nn.Module` and are looking to utilize cost-savings with GLT. While `run_example_inference` is coupled with +GiGL orchestration, the `_inference_process` function is generic and can be used as references +for writing inference for pipelines not dependent on GiGL orchestration. + +To run this file with GiGL orchestration, set the fields similar to below: + +inferencerConfig: + inferencerArgs: + # Example argument to inferencer + log_every_n_batch: "50" + inferenceBatchSize: 512 + command: python -m examples.distributed.homogeneous_inference +featureFlags: + should_run_glt_backend: 'True' + +You can run this example in a full pipeline with `make run_cora_glt_udl_kfp_test` from GiGL root. +""" + +import argparse +import gc +import time +from typing import Dict, List, Optional + +import torch +import torch.multiprocessing as mp +import torch.nn as nn +from graphlearn_torch.distributed import barrier, shutdown_rpc + +import gigl.distributed +import gigl.distributed.utils +from gigl.common import GcsUri, UriFactory +from gigl.common.data.export import EmbeddingExporter, load_embeddings_to_bigquery +from gigl.common.data.load_torch_tensors import SerializedGraphMetadata +from gigl.common.logger import Logger +from gigl.common.utils.gcs import GcsUtils +from gigl.common.utils.vertex_ai_context import connect_worker_pool +from gigl.distributed import ( + DistLinkPredictionDataset, + DistributedContext, + build_dataset, +) +from gigl.distributed.utils.serialized_graph_metadata_translator import ( + convert_pb_to_serialized_graph_metadata, +) +from gigl.src.common.models.pyg.homogeneous import GraphSAGE +from gigl.src.common.models.pyg.link_prediction import ( + LinkPredictionDecoder, + LinkPredictionGNN, +) +from gigl.src.common.types import AppliedTaskIdentifier +from gigl.src.common.types.graph_data import NodeType +from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper +from gigl.src.common.utils.bq import BqUtils +from gigl.src.common.utils.model import load_state_dict_from_uri +from gigl.src.inference.lib.assets import InferenceAssets + +logger = Logger() + + +def _init_example_gigl_model( + state_dict: Dict[str, torch.Tensor], + node_feature_dim: int, + edge_feature_dim: int, + inferencer_args: Dict[str, str], + device: Optional[torch.device] = None, +) -> LinkPredictionGNN: + """ + Initializes a hard-coded GiGL LinkPredictionGNN model, which inherits from `nn.Module`. Note that this is just an example -- + any `nn.Module` subclass can work with GLT. + This model is trained based on the following CORA UDL E2E config: + `python/gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml` + + Args: + state_dict (Dict[str, torch.Tensor]): State dictionary for pretrained model + node_feature_dim (int): Input node feature dimension for the model + edge_feature_dim (int): Input edge feature dimension for the model + inferencer_args (Dict[str, str]): Arguments for inferencer + device (Optional[torch.device]): Torch device of the model, if None defaults to CPU + Returns: + LinkPredictionGNN: Link Prediction model for inference + """ + # TODO (mkolodner-sc): Add asserts to ensure that model shape aligns with shape of state dict + + # We use the GiGL GraphSAGE implementation since the model shape needs to conform to the + # state_dict that the trained model used, which was done with the GiGL GraphSAGE + encoder_model = GraphSAGE( + in_dim=node_feature_dim, + hid_dim=int(inferencer_args.get("hid_dim", 16)), + out_dim=int(inferencer_args.get("out_dim", 16)), + edge_dim=edge_feature_dim if edge_feature_dim > 0 else None, + num_layers=int(inferencer_args.get("num_layers", 2)), + conv_kwargs={}, # Use default conv args for this model type + should_l2_normalize_embedding_layer_output=True, + ) + + decoder_model = LinkPredictionDecoder() # Defaults to inner product decoder + + model: LinkPredictionGNN = LinkPredictionGNN( + encoder=encoder_model, + decoder=decoder_model, + ) + + # Push the model to the specified device. + if device is None: + device = torch.device("cpu") + model.to(device) + + # Override the initiated model's parameters with the saved model's parameters. + model.load_state_dict(state_dict) + + return model + + +@torch.no_grad() +def _inference_process( + # When spawning processes, each process will be assigned a rank ranging + # from [0, num_processes). + process_number_on_current_machine: int, + num_inference_processes_per_machine: int, + distributed_context: DistributedContext, + embedding_gcs_path: GcsUri, + model_state_dict_uri: GcsUri, + inference_batch_size: int, + dataset: DistLinkPredictionDataset, + inferencer_args: Dict[str, str], + node_types: List[NodeType], + node_feature_dim: int, + edge_feature_dim: int, +): + """ + This function is spawned by multiple processes per machine and is responsible for: + 1. Intializing the dataLoader + 2. Running the inference loop to get the embeddings for each anchor node + 3. Writing embeddings to GCS + + Args: + process_number_on_current_machine (int): Process number on the current machine + num_inference_processes_per_machine (int): Number of inference processes spawned by each machine + distributed_context (DistributedContext): Distributed context containing information for master_ip_address, rank, and world size + embedding_gcs_path (GcsUri): GCS path to load embeddings from + model_state_dict_uri (GcsUri): GCS path to load model from + inference_batch_size (int): Batch size to use for inference + dataset (DistLinkPredictionDataset): Link prediction dataset built on current machine + inferencer_args (Dict[str, str]): Additional arguments for inferencer + node_types (List[NodeType]): Node Types in Graph + node_feature_dim (int): Input node feature dimension for the model + edge_feature_dim (int): Input edge feature dimension for the model + """ + + fanout_per_hop = int(inferencer_args.get("fanout_per_hop", "10")) + # This fanout is defaulted to match the fanout provided in the CORA UDL E2E Config: + # `python/gigl/src/mocking/configs/e2e_udl_node_anchor_based_link_prediction_template_gbml_config.yaml` + # Users can feel free to parse this argument from `inferencer_args` however they want if they want more + # customizability for their fanout strategy. + num_neighbors: List[int] = [fanout_per_hop, fanout_per_hop] + + # While the ideal value for `sampling_workers_per_inference_process` has been identified to be between `2` and `4`, this may need some tuning depending on the + # production pipeline. We default this value to `4` here for simplicity. + sampling_workers_per_inference_process: int = int( + inferencer_args.get("sampling_workers_per_inference_process", "4") + ) + + # This value represents the the shared-memory buffer size (bytes) allocated for the channel during sampling, and + # is the place to store pre-fetched data, so if it is too small then prefetching is limited. This parameter is a string + # with `{numeric_value}{storage_size}`, where storage size could be `MB`, `GB`, etc. We default this value to 4GB, + # but in production may need some tuning. + sampling_worker_shared_channel_size: str = inferencer_args.get( + "sampling_worker_shared_channel_size", "4GB" + ) + + log_every_n_batch = int(inferencer_args.get("log_every_n_batch", "50")) + + # This value defines the `node_type` tag that will be used for writing to GCS and BQ. We default to "user". + embedding_type = inferencer_args.get("embedding_type", "user") + + device = gigl.distributed.utils.get_available_device( + local_process_rank=process_number_on_current_machine, + ) # The device is automatically inferred based off the local process rank and the available devices + + data_loader = gigl.distributed.DistNeighborLoader( + dataset=dataset, + num_neighbors=num_neighbors, + context=distributed_context, + local_process_rank=process_number_on_current_machine, + local_process_world_size=num_inference_processes_per_machine, + input_nodes=None, # Since homogeneous, `None` defaults to using all nodes for inference loop + num_workers=sampling_workers_per_inference_process, + batch_size=inference_batch_size, + pin_memory_device=device, + worker_concurrency=sampling_workers_per_inference_process, + channel_size=sampling_worker_shared_channel_size, + ) + # Initialize a LinkPredictionGNN model and load parameters from + # the saved model. + model_state_dict = load_state_dict_from_uri( + load_from_uri=model_state_dict_uri, device=device + ) + model: nn.Module = _init_example_gigl_model( + state_dict=model_state_dict, + node_feature_dim=node_feature_dim, + edge_feature_dim=edge_feature_dim, + inferencer_args=inferencer_args, + device=device, + ) + + # Set the model to evaluation mode for inference. + model.eval() + + logger.info(f"Model initialized on device {device}") + + embedding_filename = f"machine_{distributed_context.global_rank}_local_process_number_{process_number_on_current_machine}" + + # Get temporary GCS folder to write outputs of inference to. GiGL orchestration automatic cleans this, but + # if running manually, you will need to clean this directory so that retries don't end up with stale files. + gcs_utils = GcsUtils() + gcs_base_uri = GcsUri.join(embedding_gcs_path, embedding_filename) + num_files_at_gcs_path = gcs_utils.count_blobs_in_gcs_path(gcs_base_uri) + if num_files_at_gcs_path > 0: + logger.warning( + f"{num_files_at_gcs_path} files already detected at base gcs path" + ) + + # GiGL class for exporting embeddings to GCS. This is achieved by writing ids and embeddings to an in-memory buffer which gets + # flushed to GCS. Setting the min_shard_size_threshold_bytes field of this class sets the frequency of flushing to GCS, and defaults + # to only flushing when flush_embeddings() is called explicitly or after exiting via a context manager. + exporter = EmbeddingExporter(export_dir=gcs_base_uri) + + # We add a barrier here so that all machines and processes have initialized their dataloader at the start of the inference loop. Otherwise, on-the-fly subgraph + # sampling may fail. + + barrier() + + t = time.time() + data_loading_start_time = time.time() + inference_start_time = time.time() + cumulative_data_loading_time = 0.0 + cumulative_inference_time = 0.0 + + # Begin inference loop + + # Iterating through the GLT dataloader yields a `torch_geometric.data.Data` type + for batch_idx, data in enumerate(data_loader): + cumulative_data_loading_time += time.time() - data_loading_start_time + + inference_start_time = time.time() + + # These arguments to forward are specific to the GiGL LinkPredictionGNN model. + # If just using a nn.Module, you can just use output = model(data) + output = model(data=data, output_node_types=node_types, device=device)[ + node_types[0] + ] + + # The anchor node IDs are contained inside of the .batch field of the data + node_ids = data.batch.cpu() + + # Only the first `batch_size` rows of the node embeddings contain the embeddings of the anchor nodes + node_embeddings = output[: data.batch_size].cpu() + + # We add ids and embeddings to the in-memory buffer + exporter.add_embedding( + id_batch=node_ids, + embedding_batch=node_embeddings, + embedding_type=embedding_type, + ) + + cumulative_inference_time += time.time() - inference_start_time + + if batch_idx > 0 and batch_idx % log_every_n_batch == 0: + logger.info( + f"Local rank {process_number_on_current_machine} processed {batch_idx} batches. " + f"{log_every_n_batch} batches took {time.time() - t:.2f} seconds. " + f"Among them, data loading took {cumulative_data_loading_time:.2f} seconds " + f"and model inference took {cumulative_inference_time:.2f} seconds." + ) + t = time.time() + cumulative_data_loading_time = 0 + cumulative_inference_time = 0 + + data_loading_start_time = time.time() + + logger.info( + f"--- Machine {distributed_context.global_rank} local rank {process_number_on_current_machine} finished inference." + ) + + write_embedding_start_time = time.time() + # Flushes all remaining embeddings to GCS + exporter.flush_embeddings() + + logger.info( + f"--- Machine {distributed_context.global_rank} local rank {process_number_on_current_machine} finished writing embeddings to GCS, which took {time.time()-write_embedding_start_time:.2f} seconds" + ) + + # We first call barrier to ensure that all machines and processes have finished inference. Only once this is ensured is it safe to delete the data loader on the current + # machine + process -- otherwise we may fail on processes which are still doing on-the-fly subgraph sampling. We then call `gc.collect()` to cleanup the memory + # used by the data_loader on the current machine. + + barrier() + + del data_loader + gc.collect() + + logger.info( + f"--- All machines local rank {process_number_on_current_machine} finished inference. Deleted data loader" + ) + + # Clean up for a graceful exit + shutdown_rpc() + + +def _run_example_inference( + job_name: str, + task_config_uri: str, +) -> None: + """ + Runs an example inference pipeline using GiGL Orchestration. + Args: + job_name (str): Name of current job + task_config_uri (str): Path to frozen GBMLConfigPbWrapper + """ + # All machines run this logic to connect together, and return a distributed context with: + # - the (GCP) internal IP address of the rank 0 machine, which will be used by GLT for building RPC connections. + # - the current machine rank + # - the total number of machines (world size) + distributed_context: DistributedContext = connect_worker_pool() + + # Read from GbmlConfig for preprocessed data metadata, GNN model uri, and bigquery embedding table path, and additional inference args + gbml_config_pb_wrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri( + gbml_config_uri=UriFactory.create_uri(task_config_uri) + ) + + model_uri = UriFactory.create_uri( + gbml_config_pb_wrapper.gbml_config_pb.shared_config.trained_model_metadata.trained_model_uri + ) + + graph_metadata = gbml_config_pb_wrapper.graph_metadata_pb_wrapper + + output_bq_table_path = InferenceAssets.get_enumerated_embedding_table_path( + gbml_config_pb_wrapper, graph_metadata.homogeneous_node_type + ) + + bq_project_id, bq_dataset_id, bq_table_name = BqUtils.parse_bq_table_path( + bq_table_path=output_bq_table_path + ) + + embedding_output_gcs_folder = InferenceAssets.get_gcs_asset_write_path_prefix( + applied_task_identifier=AppliedTaskIdentifier(job_name), + bq_table_path=output_bq_table_path, + ) + + node_feature_dim = gbml_config_pb_wrapper.preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_dim_map[ + graph_metadata.homogeneous_condensed_node_type + ] + + edge_feature_dim = gbml_config_pb_wrapper.preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_dim_map[ + graph_metadata.homogeneous_condensed_edge_type + ] + + inferencer_args = dict(gbml_config_pb_wrapper.inferencer_config.inferencer_args) + + # Should be a string which is either "in" or "out" + sample_edge_direction = inferencer_args.get("sample_edge_direction", "in") + + assert sample_edge_direction in ( + "in", + "out", + ), f"Provided edge direction from inference args must be one of `in` or `out`, got {sample_edge_direction}" + + inference_batch_size = gbml_config_pb_wrapper.inferencer_config.inference_batch_size + + num_inference_processes_per_machine = int( + inferencer_args.get("num_inference_processes_per_machine", "4") + ) # Current large-scale setting sets this value to 4 + + # We use a `SerializedGraphMetadata` object to store and organize information for loading serialized TFRecords from disk into memory. + # While this can be populated directly, we also provide a convenience utility `convert_pb_to_serialized_graph_metadata` to build the + # `SerializedGraphMetadata` object when using GiGL orchestration, leveraging fields of the GBMLConfigPbWrapper + + serialized_graph_metadata: SerializedGraphMetadata = convert_pb_to_serialized_graph_metadata( + preprocessed_metadata_pb_wrapper=gbml_config_pb_wrapper.preprocessed_metadata_pb_wrapper, + graph_metadata_pb_wrapper=gbml_config_pb_wrapper.graph_metadata_pb_wrapper, + ) + + ## Inference Start + + program_start_time = time.time() + + # We call a GiGL function to launch a process for loading TFRecords into memory, partitioning the graph across multiple machines, + # and registering that information to a DistLinkPredictionDataset class. + dataset: DistLinkPredictionDataset = build_dataset( + serialized_graph_metadata=serialized_graph_metadata, + distributed_context=distributed_context, + sample_edge_direction=sample_edge_direction, + ) + + inference_start_time = time.time() + + # When using mp.spawn with `nprocs`, the first argument is implicitly set to be the process number on the current machine. + mp.spawn( + fn=_inference_process, + args=( + num_inference_processes_per_machine, + distributed_context, + embedding_output_gcs_folder, + model_uri, + inference_batch_size, + dataset, + inferencer_args, + list(gbml_config_pb_wrapper.graph_metadata_pb_wrapper.node_types), + node_feature_dim, + edge_feature_dim, + ), + nprocs=num_inference_processes_per_machine, + join=True, + ) + + logger.info( + f"--- Inference finished on rank {distributed_context.global_rank}, which took {time.time()-inference_start_time:.2f} seconds" + ) + + # After inference is finished, we use the process on the Machine 0 to load embeddings from GCS to BQ. + if distributed_context.global_rank == 0: + logger.info("--- Machine 0 triggers loading embeddings from GCS to BigQuery") + load_embedding_start_time = time.time() + + load_embeddings_to_bigquery( + gcs_folder=embedding_output_gcs_folder, + project_id=bq_project_id, + dataset_id=bq_dataset_id, + table_id=bq_table_name, + ) + logger.info( + f"Finished loading embeddings to BigQuery, which took {time.time()-load_embedding_start_time:.2f} seconds" + ) + + logger.info( + f"--- Program finished, which took {time.time()-program_start_time:.2f} seconds" + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Arguments for GLT distributed model inference on VertexAI" + ) + parser.add_argument( + "--job_name", + type=str, + help="Inference job name", + ) + parser.add_argument("--task_config_uri", type=str, help="Gbml config uri") + + # We use parse_known_args instead of parse_args since we only need job_name and task_config_uri for GLT inference + args, unused_args = parser.parse_known_args() + logger.info(f"Unused arguments: {unused_args}") + + # We only need `job_name` and `task_config_uri` for running inference + _run_example_inference( + job_name=args.job_name, + task_config_uri=args.task_config_uri, + ) diff --git a/mypy.ini b/mypy.ini index f663cf7..3ee3821 100644 --- a/mypy.ini +++ b/mypy.ini @@ -141,4 +141,3 @@ ignore_missing_imports = True [mypy-graphlearn_torch.*] ignore_missing_imports = True - diff --git a/pull_request_template.md b/pull_request_template.md index 13028ae..c82658c 100644 --- a/pull_request_template.md +++ b/pull_request_template.md @@ -1,10 +1,9 @@ **Scope of work done** - + - Where is the documentation for this feature?: N/A Did you add automated tests or write a test plan? diff --git a/python/gigl/analytics/graph_validation/__init__.py b/python/gigl/analytics/graph_validation/__init__.py index cbc8afb..8cf855a 100644 --- a/python/gigl/analytics/graph_validation/__init__.py +++ b/python/gigl/analytics/graph_validation/__init__.py @@ -1,4 +1,3 @@ -# autoflake: skip_file """ Graph validation utilities which can be used to ensure graph structure is valid and can be used by GiGL. diff --git a/python/gigl/common/__init__.py b/python/gigl/common/__init__.py index cc9cbee..38354ef 100644 --- a/python/gigl/common/__init__.py +++ b/python/gigl/common/__init__.py @@ -1,4 +1,3 @@ -# autoflake: skip_file """ Common utilities, services, and tools used throughout GiGL. """ diff --git a/python/gigl/common/beam/better_tfrecordio.py b/python/gigl/common/beam/better_tfrecordio.py index 465de1a..44bfe1d 100644 --- a/python/gigl/common/beam/better_tfrecordio.py +++ b/python/gigl/common/beam/better_tfrecordio.py @@ -19,7 +19,7 @@ from apache_beam.io.tfrecordio import _TFRecordUtil from apache_beam.transforms import PTransform -from gigl.common.beam.coders import PassthroughCoder, RuntimeTFExampleProtoCoderFn +from gigl.common.beam.coders import PassthroughCoder, RecordBatchToTFExampleCoderFn from gigl.common.logger import Logger logger = Logger() @@ -119,9 +119,13 @@ def __init__( def expand(self, pcoll): if self._transformed_metadata: logger.info("Using transformed_metadata to encode samples.") - pcoll = pcoll | "Runtime Encode TfExamples" >> beam.ParDo( - RuntimeTFExampleProtoCoderFn(), - transformed_metadata=self._transformed_metadata, + pcoll = ( + pcoll + | "Encode pyarrow.RecordBatch as serialized tf.train.Example" + >> beam.ParDo( + RecordBatchToTFExampleCoderFn(), + transformed_metadata=self._transformed_metadata, + ) ) else: logger.info("Using default proto serialization to encode samples.") diff --git a/python/gigl/common/beam/coders.py b/python/gigl/common/beam/coders.py index 096006d..a6562c1 100644 --- a/python/gigl/common/beam/coders.py +++ b/python/gigl/common/beam/coders.py @@ -1,10 +1,12 @@ from typing import Any, Dict, Iterable import apache_beam as beam +import pyarrow as pa import tensorflow as tf import tensorflow_transform.tf_metadata.dataset_metadata from apache_beam import coders from tensorflow_transform import common_types +from tfx_bsl.public import tfxio class PassthroughCoder(coders.Coder): @@ -50,3 +52,38 @@ def process( transformed_metadata.schema ) yield self._coder.encode(element) + + +class RecordBatchToTFExampleCoderFn(beam.DoFn): + """Encode pyarrow.RecordBatch to serialized tf.train.Example(s)""" + + def __init__(self): + self._coder = None + + def process( + self, + element: pa.RecordBatch, + transformed_metadata: tensorflow_transform.tf_metadata.dataset_metadata.DatasetMetadata, + *args, + **kwargs, + ) -> Iterable[bytes]: + """Note that transformed_metadata needs to be passed in as side input, i.e., as an argument + of process function, instead of being passed to class init, since it could potentially materialize + (depending on whether it is read from file or built by tft_beam.AnalyzeDataset) after the + class is constructed. + + Args: + element (pa.RecordBatch): A batch of records, e.g., a batch of transformed features + transformed_metadata (tensorflow_transform.tf_metadata.dataset_metadata.DatasetMetadata): + containing the schema needed by RecordBatchToExamplesEncoder for encoding + + Yields: + bytes: serialized tf.Example + """ + if not self._coder: + self._coder = tfxio.RecordBatchToExamplesEncoder( + schema=transformed_metadata.schema + ) + encoded_examples = self._coder.encode(element) + for example in encoded_examples: + yield example diff --git a/python/gigl/common/collections/frozen_dict.py b/python/gigl/common/collections/frozen_dict.py index 0c900c8..84939b0 100644 --- a/python/gigl/common/collections/frozen_dict.py +++ b/python/gigl/common/collections/frozen_dict.py @@ -1,4 +1,5 @@ -from typing import Mapping, TypeVar +from collections.abc import Mapping +from typing import TypeVar KT = TypeVar("KT") VT = TypeVar("VT") diff --git a/python/gigl/common/collections/sorted_dict.py b/python/gigl/common/collections/sorted_dict.py index 37afa55..ae7fe36 100644 --- a/python/gigl/common/collections/sorted_dict.py +++ b/python/gigl/common/collections/sorted_dict.py @@ -1,4 +1,5 @@ -from typing import Mapping, TypeVar +from collections.abc import Mapping +from typing import TypeVar KT = TypeVar("KT") VT = TypeVar("VT") diff --git a/python/gigl/common/constants.py b/python/gigl/common/constants.py index 5cb48ed..fecb80d 100644 --- a/python/gigl/common/constants.py +++ b/python/gigl/common/constants.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Final +from typing import Dict, Final GIGL_ROOT_DIR: Final[Path] = ( Path(__file__).resolve().parent.parent.parent.parent @@ -11,6 +11,7 @@ GIGL_ROOT_DIR, "dep_vars.env" ).absolute() + def parse_makefile_vars(makefile_path: Path) -> Dict[str, str]: vars_dict: Dict[str, str] = {} with open(makefile_path, "r") as f: @@ -25,22 +26,26 @@ def parse_makefile_vars(makefile_path: Path) -> Dict[str, str]: _make_file_vars: Dict[str, str] = parse_makefile_vars(PATH_BASE_IMAGES_VARIABLE_FILE) -DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG: Final[str] = _make_file_vars[ +DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG: Final[str] = _make_file_vars[ "DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG" ] -DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG: Final[str] = _make_file_vars[ +DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG: Final[str] = _make_file_vars[ "DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG" ] -DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG: Final[str] = _make_file_vars[ +DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG: Final[str] = _make_file_vars[ "DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG" ] -SPARK_35_TFRECORD_JAR_GCS_PATH: Final[str] = _make_file_vars["SPARK_35_TFRECORD_JAR_GCS_PATH"] -SPARK_31_TFRECORD_JAR_GCS_PATH: Final[str] = _make_file_vars["SPARK_31_TFRECORD_JAR_GCS_PATH"] +SPARK_35_TFRECORD_JAR_GCS_PATH: Final[str] = _make_file_vars[ + "SPARK_35_TFRECORD_JAR_GCS_PATH" +] +SPARK_31_TFRECORD_JAR_GCS_PATH: Final[str] = _make_file_vars[ + "SPARK_31_TFRECORD_JAR_GCS_PATH" +] # Ensure that the local path is a fully resolved local path -SPARK_35_TFRECORD_JAR_LOCAL_PATH: Final[str] = str( +SPARK_35_TFRECORD_JAR_LOCAL_PATH: Final[str] = str( Path.joinpath(GIGL_ROOT_DIR, _make_file_vars["SPARK_35_TFRECORD_JAR_LOCAL_PATH"]) ) -SPARK_31_TFRECORD_JAR_LOCAL_PATH: Final[str] = str( +SPARK_31_TFRECORD_JAR_LOCAL_PATH: Final[str] = str( Path.joinpath(GIGL_ROOT_DIR, _make_file_vars["SPARK_31_TFRECORD_JAR_LOCAL_PATH"]) ) diff --git a/python/gigl/common/data/dataloaders.py b/python/gigl/common/data/dataloaders.py index 47ad236..f95f821 100644 --- a/python/gigl/common/data/dataloaders.py +++ b/python/gigl/common/data/dataloaders.py @@ -1,7 +1,8 @@ import time +from copy import deepcopy from dataclasses import dataclass from functools import partial -from typing import Callable, Dict, List, Literal, Optional, Sequence, Tuple, Union +from typing import Callable, Dict, List, Optional, Sequence, Tuple, Union import psutil import tensorflow as tf @@ -10,6 +11,7 @@ from gigl.common import Uri from gigl.common.logger import Logger +from gigl.common.utils.decorator import tf_on_cpu from gigl.src.common.types.features import FeatureTypes from gigl.src.common.utils.file_loader import FileLoader from gigl.src.data_preprocessor.lib.types import FeatureSpecDict @@ -28,14 +30,21 @@ class SerializedTFRecordInfo: tfrecord_uri_prefix: Uri # Feature names to load for the current entity feature_keys: Sequence[str] - # a dict of feature name -> FeatureSpec (eg. FixedLenFeature, VarlenFeature, SparseFeature, RaggedFeature) + # a dict of feature name -> FeatureSpec (eg. FixedLenFeature, VarlenFeature, SparseFeature, RaggedFeature). If entity keys are not present, we insert them during tensor loading feature_spec: FeatureSpecDict # Feature dimension of current entity feature_dim: int # Entity ID Key for current entity. If this is a Node Entity, this must be a string. If this is an edge entity, this must be a Tuple[str, str] for the source and destination ids. entity_key: Union[str, Tuple[str, str]] # The regex pattern to match the TFRecord files at the specified prefix - tfrecord_uri_pattern: str = ".*-of-.*.tfrecord(.gz)?$" + tfrecord_uri_pattern: str = ".*tfrecord(.gz)?$" + + @property + def is_node_entity(self) -> bool: + """ + Returns whether this serialized entity contains node or edge information by checking the type of entity_key + """ + return isinstance(self.entity_key, str) @dataclass(frozen=True) @@ -55,7 +64,6 @@ class TFDatasetOptions: deterministic (bool): Whether to use deterministic processing, if False then the order of elements can be non-deterministic. use_interleave (bool): Whether to use tf.data.Dataset.interleave to read files in parallel, if not set then `num_parallel_file_reads` will be used. num_parallel_file_reads (int): The number of files to read in parallel if `use_interleave` is False. - compression_type (Optional[Literal["", "ZLIB", "GZIP"]]): The compression type of the TFRecord files. If not provided then it's assumed the files are uncompressed. ram_budget_multiplier (float): The multiplier of the total system memory to set as the tf.data RAM budget.. """ @@ -64,7 +72,6 @@ class TFDatasetOptions: deterministic: bool = False use_interleave: bool = True num_parallel_file_reads: int = 64 - compression_type: Optional[Literal["", "ZLIB", "GZIP"]] = None ram_budget_multiplier: float = 0.5 @@ -143,7 +150,9 @@ def __init__(self, rank: int, world_size: int): self._world_size = world_size def _partition_children_uris( - self, uri: Uri, tfrecord_pattern: str + self, + uri: Uri, + tfrecord_pattern: str, ) -> Sequence[Uri]: """ Partition the children of `uri` evenly by world_size. The partitions differ in size by at most 1 file. @@ -156,6 +165,7 @@ def _partition_children_uris( Args: uri (Uri): The parent uri for whoms children should be partitioned. + tfrecord_pattern (str): Regex pattern to match for loading serialized tfrecords from uri prefix Returns: List[Uri]: The list of file Uris for the current partition. @@ -204,13 +214,14 @@ def _partition_children_uris( return uris[start_index:end_index] @staticmethod - def build_dataset_for_uris( + def _build_dataset_for_uris( uris: Sequence[Uri], feature_spec: FeatureSpecDict, opts: TFDatasetOptions = TFDatasetOptions(), ) -> tf.data.Dataset: """ - Builds a tf.data.Dataset to load tf.Examples serialized as TFRecord files into tf.Tensors. + Builds a tf.data.Dataset to load tf.Examples serialized as TFRecord files into tf.Tensors. This function will + automatically infer the compression type (if any) from the suffix of the files located at the TFRecord URI. Args: uris (Sequence[Uri]): The URIs of the TFRecord files to load. @@ -225,6 +236,10 @@ def build_dataset_for_uris( psutil.virtual_memory().total * opts.ram_budget_multiplier ) logger.info(f"Setting RAM budget to {data_opts.autotune.ram_budget}") + # TODO (mkolodner-sc): Throw error if we observe folder with mixed gz / tfrecord files + compression_type = ( + "GZIP" if all([uri.uri.endswith(".gz") for uri in uris]) else None + ) if opts.use_interleave: # Using .batch on the interleaved dataset provides a huge speed up (60%). # Using map on the interleaved dataset provides another smaller speedup (5%) @@ -233,7 +248,7 @@ def build_dataset_for_uris( .interleave( lambda uri: tf.data.TFRecordDataset( uri, - compression_type=opts.compression_type, + compression_type=compression_type, buffer_size=opts.file_buffer_size, ) .batch( @@ -251,7 +266,7 @@ def build_dataset_for_uris( else: dataset = tf.data.TFRecordDataset( [uri.uri for uri in uris], - compression_type=opts.compression_type, + compression_type=compression_type, buffer_size=opts.file_buffer_size, num_parallel_reads=opts.num_parallel_file_reads, ).batch( @@ -266,6 +281,7 @@ def build_dataset_for_uris( deterministic=opts.deterministic, ).prefetch(tf.data.AUTOTUNE) + @tf_on_cpu def load_as_torch_tensors( self, serialized_tf_record_info: SerializedTFRecordInfo, @@ -283,11 +299,26 @@ def load_as_torch_tensors( """ entity_key = serialized_tf_record_info.entity_key feature_keys = serialized_tf_record_info.feature_keys + + # We make a deep copy of the feature spec dict so that future modifications don't redirect to the input + + feature_spec_dict = deepcopy(serialized_tf_record_info.feature_spec) + if isinstance(entity_key, str): assert isinstance(entity_key, str) id_concat_axis = 0 proccess_id_tensor = lambda t: t[entity_key] entity_type = FeatureTypes.NODE + + # We manually inject the node id into the FeatureSpecDict so that the schema will include + # node ids in the produced batch when reading serialized tfrecords. + if entity_key not in feature_spec_dict: + logger.info( + f"Injecting entity key {entity_key} into feature spec dictionary with value `tf.io.FixedLenFeature(shape=[], dtype=tf.int64)`" + ) + feature_spec_dict[entity_key] = tf.io.FixedLenFeature( + shape=[], dtype=tf.int64 + ) else: id_concat_axis = 1 proccess_id_tensor = lambda t: tf.stack( @@ -295,6 +326,24 @@ def load_as_torch_tensors( ) entity_type = FeatureTypes.EDGE + # We manually inject the edge ids into the FeatureSpecDict so that the schema will include + # edge ids in the produced batch when reading serialized tfrecords. + if entity_key[0] not in feature_spec_dict: + logger.info( + f"Injecting entity key {entity_key[0]} into feature spec dictionary with value `tf.io.FixedLenFeature(shape=[], dtype=tf.int64)`" + ) + feature_spec_dict[entity_key[0]] = tf.io.FixedLenFeature( + shape=[], dtype=tf.int64 + ) + + if entity_key[1] not in feature_spec_dict: + logger.info( + f"Injecting entity key {entity_key[1]} into feature spec dictionary with value `tf.io.FixedLenFeature(shape=[], dtype=tf.int64)`" + ) + feature_spec_dict[entity_key[1]] = tf.io.FixedLenFeature( + shape=[], dtype=tf.int64 + ) + uris = self._partition_children_uris( serialized_tf_record_info.tfrecord_uri_prefix, serialized_tf_record_info.tfrecord_uri_pattern, @@ -315,11 +364,12 @@ def load_as_torch_tensors( ) return empty_entity, empty_feature - dataset = TFRecordDataLoader.build_dataset_for_uris( + dataset = TFRecordDataLoader._build_dataset_for_uris( uris=uris, - feature_spec=serialized_tf_record_info.feature_spec, + feature_spec=feature_spec_dict, opts=tf_dataset_options, ) + start_time = time.perf_counter() num_entities_processed = 0 id_tensors = [] diff --git a/python/gigl/common/data/export.py b/python/gigl/common/data/export.py index b429e28..394cc1c 100644 --- a/python/gigl/common/data/export.py +++ b/python/gigl/common/data/export.py @@ -6,8 +6,8 @@ However, if we switch to an on-line upload scheme, where we upload the embeddings as they are generated, then we should look into if parquet or orc files are more performant in that modality. """ - import io +import os import time from typing import Final, Optional, Sequence @@ -16,19 +16,19 @@ import torch from google.cloud import bigquery from google.cloud.exceptions import GoogleCloudError +from typing_extensions import Self -from gigl.common import GcsUri, Uri +from gigl.common import GcsUri from gigl.common.logger import Logger from gigl.common.utils.gcs import GcsUtils from gigl.common.utils.retry import retry -from gigl.src.common.utils.file_loader import FileLoader logger = Logger() # Shared key names between Avro and BigQuery schemas. -_NODE_ID_KEY = "node_id" -_EMBEDDING_TYPE_KEY = "node_type" -_EMBEDDING_KEY = "emb" +_NODE_ID_KEY: Final[str] = "node_id" +_EMBEDDING_TYPE_KEY: Final[str] = "node_type" +_EMBEDDING_KEY: Final[str] = "emb" # AVRO schema for embedding records. AVRO_SCHEMA: Final[fastavro.types.Schema] = { @@ -193,10 +193,10 @@ def flush_embeddings(self): return self._flush() - def __enter__(self) -> "EmbeddingExporter": + def __enter__(self) -> Self: if self._in_context: raise RuntimeError( - "EmbeddingExporter is already in a context. Do not call `with EmbeddingExporter:` in a nested manner." + f"{type(self).__name__} is already in a context. Do not call `with {type(self).__name__}:` in a nested manner." ) self._in_context = True @@ -213,6 +213,15 @@ def load_embeddings_to_bigquery( ) -> None: """ Loads multiple Avro files containing GNN embeddings from GCS into BigQuery. + + Note that this function will upload *all* Avro files in the GCS folder to BigQuery, recursively. + So if we have some nested directories, e.g.: + + gs://MY BUCKET/embeddings/shard_0000.avro + gs://MY BUCKET/embeddings/nested/shard_0001.avro + + Both files will be uploaded to BigQuery. + Args: gcs_folder (GcsUri): The GCS folder containing the Avro files with embeddings. project_id (str): The GCP project ID. @@ -220,6 +229,7 @@ def load_embeddings_to_bigquery( table_id (str): The BigQuery table ID. """ start = time.perf_counter() + logger.info(f"Loading embeddings from {gcs_folder} to BigQuery.") # Initialize the BigQuery client bigquery_client = bigquery.Client(project=project_id) @@ -227,9 +237,6 @@ def load_embeddings_to_bigquery( dataset_ref = bigquery_client.dataset(dataset_id) table_ref = dataset_ref.table(table_id) - loader = FileLoader() - file_uris: Sequence[Uri] = loader.list_children(gcs_folder, pattern=".*avro") - # Configure the load job job_config = bigquery.LoadJobConfig( source_format=bigquery.SourceFormat.AVRO, @@ -238,7 +245,7 @@ def load_embeddings_to_bigquery( ) load_job = bigquery_client.load_table_from_uri( - source_uris=[file_uri.uri for file_uri in file_uris], + source_uris=os.path.join(gcs_folder.uri, "*.avro"), destination=table_ref, job_config=job_config, ) diff --git a/python/gigl/common/data/load_torch_tensors.py b/python/gigl/common/data/load_torch_tensors.py new file mode 100644 index 0000000..45dafdf --- /dev/null +++ b/python/gigl/common/data/load_torch_tensors.py @@ -0,0 +1,344 @@ +import time +import traceback +from dataclasses import dataclass +from typing import Dict, MutableMapping, Optional, Union + +import torch +import torch.multiprocessing as mp +from graphlearn_torch.distributed.rpc import barrier, rpc_is_initialized +from torch.multiprocessing import Manager + +from gigl.common.data.dataloaders import ( + SerializedTFRecordInfo, + TFDatasetOptions, + TFRecordDataLoader, +) +from gigl.common.logger import Logger +from gigl.src.common.types.graph_data import EdgeType, NodeType +from gigl.types.data import LoadedGraphTensors +from gigl.types.distributed import ( + DEFAULT_HOMOGENEOUS_EDGE_TYPE, + DEFAULT_HOMOGENEOUS_NODE_TYPE, +) +from gigl.utils.share_memory import share_memory + +logger = Logger() + +_ID_FMT = "{entity}_ids" +_FEATURE_FMT = "{entity}_features" +_NODE_KEY = "node" +_EDGE_KEY = "edge" +_POSITIVE_LABEL_KEY = "positive_label" +_NEGATIVE_LABEL_KEY = "negative_label" + + +@dataclass(frozen=True) +class SerializedGraphMetadata: + """ + Stores information for all entities. If homogeneous, all types are of type SerializedTFRecordInfo. Otherwise, they are dictionaries with the corresponding mapping. + """ + + # Node Entity Info for loading node tensors, a SerializedTFRecordInfo for homogeneous and Dict[NodeType, SerializedTFRecordInfo] for heterogeneous cases + node_entity_info: Union[ + SerializedTFRecordInfo, Dict[NodeType, SerializedTFRecordInfo] + ] + # Edge Entity Info for loading edge tensors, a SerializedTFRecordInfo for homogeneous and Dict[EdgeType, SerializedTFRecordInfo] for heterogeneous cases + edge_entity_info: Union[ + SerializedTFRecordInfo, Dict[EdgeType, SerializedTFRecordInfo] + ] + # Positive Label Entity Info, if present, a SerializedTFRecordInfo for homogeneous and Dict[EdgeType, SerializedTFRecordInfo] for heterogeneous cases. May be None + # for specific edge types. If data has no positive labels across all edge types, this value is None + positive_label_entity_info: Optional[ + Union[SerializedTFRecordInfo, Dict[EdgeType, Optional[SerializedTFRecordInfo]]] + ] = None + # Negative Label Entity Info, if present, a SerializedTFRecordInfo for homogeneous and Dict[EdgeType, SerializedTFRecordInfo] for heterogeneous cases. May be None + # for specific edge types. If input has no negative labels across all edge types, this value is None. + negative_label_entity_info: Optional[ + Union[SerializedTFRecordInfo, Dict[EdgeType, Optional[SerializedTFRecordInfo]]] + ] = None + + +def _data_loading_process( + tf_record_dataloader: TFRecordDataLoader, + output_dict: MutableMapping[ + str, Union[torch.Tensor, Dict[Union[NodeType, EdgeType], torch.Tensor]] + ], + error_dict: MutableMapping[str, str], + entity_type: str, + serialized_tf_record_info: Union[ + SerializedTFRecordInfo, + Dict[Union[NodeType, EdgeType], SerializedTFRecordInfo], + ], + rank: int, + tf_dataset_options: TFDatasetOptions = TFDatasetOptions(), +) -> None: + """ + Spawned multiprocessing.Process which loads homogeneous or heterogeneous information for a specific entity type [node, edge, positive_label, negative_label] + and moves to shared memory. Also logs timing information for duration of loading. If an exception is thrown, its traceback will be stored in + the error_dict "error" field, since exceptions for spawned processes won't properly be raised to the parent process. + + Args: + tf_record_dataloader (TFRecordDataLoader): TFRecordDataloader used for loading tensors from serialized tfrecords + output_dict (MutableMapping[str, Union[torch.Tensor, Dict[Union[NodeType, EdgeType], torch.Tensor]]]): + Dictionary initialized by mp.Manager().dict() in which outputs of tensor loading will be written to + error_dict (MutableMapping[str, str]): Dictionary initialized by mp.Manager().dict() in which error of errors in current process will be written to + entity_type (str): Entity type to prefix ids, features, and error keys with when + writing to the output_dict and error_dict fields + serialized_tf_record_info (Union[SerializedTFRecordInfo, Dict[NodeType, SerializedTFRecordInfo], Dict[EdgeType, SerializedTFRecordInfo]]): + Serialized information for current entity + rank (int): Rank of the current machine + tf_dataset_options (TFDatasetOptions): The options to use when building the dataset. + """ + # We add a try - except clause here to ensure that exceptions are properly circulated back to the parent process + try: + # To simplify the logic to proceed on a singular code path, we convert homogeneous inputs to heterogeneous just within the scope of this function + if isinstance(serialized_tf_record_info, SerializedTFRecordInfo): + serialized_tf_record_info = ( + {DEFAULT_HOMOGENEOUS_NODE_TYPE: serialized_tf_record_info} + if serialized_tf_record_info.is_node_entity + else {DEFAULT_HOMOGENEOUS_EDGE_TYPE: serialized_tf_record_info} + ) + is_input_homogeneous = True + else: + is_input_homogeneous = False + + all_tf_record_uris = [ + serialized_entity.tfrecord_uri_prefix.uri + for serialized_entity in serialized_tf_record_info.values() + ] + + start_time = time.time() + + logger.info( + f"Rank {rank} has begun to load data from tfrecord directories: {all_tf_record_uris}" + ) + + ids: Dict[Union[NodeType, EdgeType], torch.Tensor] = {} + features: Dict[Union[NodeType, EdgeType], torch.Tensor] = {} + for ( + graph_type, + serialized_entity_tf_record_info, + ) in serialized_tf_record_info.items(): + ( + entity_ids, + entity_features, + ) = tf_record_dataloader.load_as_torch_tensors( + serialized_tf_record_info=serialized_entity_tf_record_info, + tf_dataset_options=tf_dataset_options, + ) + ids[graph_type] = entity_ids + logger.info( + f"Rank {rank} finished loading {entity_type} ids of shape {entity_ids.shape} for graph type {graph_type} from {serialized_entity_tf_record_info.tfrecord_uri_prefix.uri}" + ) + if entity_features is not None: + features[graph_type] = entity_features + logger.info( + f"Rank {rank} finished loading {entity_type} features of shape {entity_features.shape} for graph type {graph_type} from {serialized_entity_tf_record_info.tfrecord_uri_prefix.uri}" + ) + else: + logger.info( + f"Rank {rank} did not detect {entity_type} features for graph type {graph_type} from {serialized_entity_tf_record_info.tfrecord_uri_prefix.uri}" + ) + + logger.info( + f"Rank {rank} is attempting to share {entity_type} id memory for tfrecord directories: {all_tf_record_uris}" + ) + share_memory(ids) + # We convert the ids back to homogeneous from the default heterogeneous setup if our provided input was homogeneous + + if features: + logger.info( + f"Rank {rank} is attempting to share {entity_type} feature memory for tfrecord directories: {all_tf_record_uris}" + ) + share_memory(features) + # We convert the features back to homogeneous from the default heterogeneous setup if our provided input was homogeneous + + output_dict[_ID_FMT.format(entity=entity_type)] = ( + list(ids.values())[0] if is_input_homogeneous else ids + ) + if features: + output_dict[_FEATURE_FMT.format(entity=entity_type)] = ( + list(features.values())[0] if is_input_homogeneous else features + ) + + logger.info( + f"Rank {rank} has finished loading {entity_type} data from tfrecord directories: {all_tf_record_uris}, elapsed time: {time.time() - start_time:.2f} seconds" + ) + + except Exception: + error_dict[entity_type] = traceback.format_exc() + + +def load_torch_tensors_from_tf_record( + tf_record_dataloader: TFRecordDataLoader, + serialized_graph_metadata: SerializedGraphMetadata, + should_load_tensors_in_parallel: bool, + rank: int = 0, + tf_dataset_options: TFDatasetOptions = TFDatasetOptions(), +) -> LoadedGraphTensors: + """ + Loads all torch tensors from a SerializedGraphMetadata object for all entity [node, edge, positive_label, negative_label] and edge / node types. + + Running these processes in parallel slows the runtime of each individual process, but may still result in a net speedup across all entity types. As a result, + there is a tradeoff that needs to be made between parallel and sequential tensor loading, which is why we don't parallelize across node and edge types. We enable + the `should_load_tensors_in_parallel` to allow some customization for loading strategies based on the input data. + + Args: + tf_record_dataloader (TFRecordDataLoader): TFRecordDataloader used for loading tensors from serialized tfrecords + serialized_graph_metadata (SerializedGraphMetadata): Serialized graph metadata contained serialized information for loading tfrecords across node and edge types + should_load_tensors_in_parallel (bool): Whether tensors should be loaded from serialized information in parallel or in sequence across the [node, edge, pos_label, neg_label] entity types. + rank (int): Rank on current machine + tf_dataset_options (TFDatasetOptions): The options to use when building the dataset. + Returns: + loaded_graph_tensors (LoadedGraphTensors): Unpartitioned Graph Tensors + """ + + logger.info(f"Rank {rank} starting loading torch tensors from serialized info ...") + start_time = time.time() + + manager = Manager() + + # By default, torch processes are created using the `fork` method, which makes a copy of the entire process. This can be problematic in multi-threaded settings, + # especially when working with TensorFlow, since this includes all threads, which can lead to deadlocks or other synchronization issues. As a result, we set the + # start method to spawn, which creates a new Python interpreter process and is much safer with multi-threading applications. + ctx = mp.get_context("spawn") + + node_output_dict: MutableMapping[ + str, Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + ] = manager.dict() + + edge_output_dict: MutableMapping[ + str, Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + ] = manager.dict() + + error_dict: MutableMapping[str, str] = manager.dict() + + node_data_loading_process = ctx.Process( + target=_data_loading_process, + kwargs={ + "tf_record_dataloader": tf_record_dataloader, + "output_dict": node_output_dict, + "error_dict": error_dict, + "entity_type": _NODE_KEY, + "serialized_tf_record_info": serialized_graph_metadata.node_entity_info, + "rank": rank, + "tf_dataset_options": tf_dataset_options, + }, + ) + + edge_data_loading_process = ctx.Process( + target=_data_loading_process, + kwargs={ + "tf_record_dataloader": tf_record_dataloader, + "output_dict": edge_output_dict, + "error_dict": error_dict, + "entity_type": _EDGE_KEY, + "serialized_tf_record_info": serialized_graph_metadata.edge_entity_info, + "rank": rank, + "tf_dataset_options": tf_dataset_options, + }, + ) + + if serialized_graph_metadata.positive_label_entity_info is not None: + positive_label_data_loading_process = ctx.Process( + target=_data_loading_process, + kwargs={ + "tf_record_dataloader": tf_record_dataloader, + "output_dict": edge_output_dict, + "error_dict": error_dict, + "entity_type": _POSITIVE_LABEL_KEY, + "serialized_tf_record_info": serialized_graph_metadata.positive_label_entity_info, + "rank": rank, + "tf_dataset_options": tf_dataset_options, + }, + ) + else: + logger.info(f"No positive labels detected from input data") + + if serialized_graph_metadata.negative_label_entity_info is not None: + negative_label_data_loading_process = ctx.Process( + target=_data_loading_process, + kwargs={ + "tf_record_dataloader": tf_record_dataloader, + "output_dict": edge_output_dict, + "error_dict": error_dict, + "entity_type": _NEGATIVE_LABEL_KEY, + "serialized_tf_record_info": serialized_graph_metadata.negative_label_entity_info, + "rank": rank, + "tf_dataset_options": tf_dataset_options, + }, + ) + else: + logger.info(f"No negative labels detected from input data") + + if should_load_tensors_in_parallel: + # In this setting, we start all the processes at once and join them at the end to achieve parallelized tensor loading + logger.info("Loading Serialized TFRecord Data in Parallel ...") + node_data_loading_process.start() + edge_data_loading_process.start() + if serialized_graph_metadata.positive_label_entity_info is not None: + positive_label_data_loading_process.start() + if serialized_graph_metadata.negative_label_entity_info is not None: + negative_label_data_loading_process.start() + + node_data_loading_process.join() + edge_data_loading_process.join() + if serialized_graph_metadata.positive_label_entity_info is not None: + positive_label_data_loading_process.join() + if serialized_graph_metadata.negative_label_entity_info is not None: + negative_label_data_loading_process.join() + else: + # In this setting, we start and join each process one-at-a-time in order to achieve sequential tensor loading + logger.info("Loading Serialized TFRecord Data in Sequence ...") + node_data_loading_process.start() + node_data_loading_process.join() + edge_data_loading_process.start() + edge_data_loading_process.join() + if serialized_graph_metadata.positive_label_entity_info is not None: + positive_label_data_loading_process.start() + positive_label_data_loading_process.join() + if serialized_graph_metadata.negative_label_entity_info is not None: + negative_label_data_loading_process.start() + negative_label_data_loading_process.join() + + if error_dict: + for entity_type, traceback in error_dict.items(): + logger.error( + f"Identified error in {entity_type} data loading process: \n{traceback}" + ) + raise ValueError( + f"Raised error in data loading processes for entity types {error_dict.keys()}." + ) + + node_ids = node_output_dict[_ID_FMT.format(entity=_NODE_KEY)] + node_features = node_output_dict.get(_FEATURE_FMT.format(entity=_NODE_KEY), None) + + edge_index = edge_output_dict[_ID_FMT.format(entity=_EDGE_KEY)] + edge_features = edge_output_dict.get(_FEATURE_FMT.format(entity=_EDGE_KEY), None) + + positive_labels = edge_output_dict.get( + _ID_FMT.format(entity=_POSITIVE_LABEL_KEY), None + ) + + negative_labels = edge_output_dict.get( + _ID_FMT.format(entity=_NEGATIVE_LABEL_KEY), None + ) + + if rpc_is_initialized(): + logger.info( + f"Rank {rank} has finished loading data in {time.time() - start_time:.2f} seconds. Wait for other ranks to finish loading data from tfrecords" + ) + barrier() + + logger.info( + f"All ranks have finished loading data from tfrecords, rank {rank} finished in {time.time() - start_time:.2f} seconds" + ) + + return LoadedGraphTensors( + node_ids=node_ids, + node_features=node_features, + edge_index=edge_index, + edge_features=edge_features, + positive_label=positive_labels, + negative_label=negative_labels, + ) diff --git a/python/gigl/common/services/kfp.py b/python/gigl/common/services/kfp.py deleted file mode 100644 index f2b70b8..0000000 --- a/python/gigl/common/services/kfp.py +++ /dev/null @@ -1,293 +0,0 @@ -import json -import os -from enum import Enum -from typing import Dict, Optional - -import kfp # type: ignore -import requests -from kfp._auth import get_gcp_access_token -from kfp_server_api.models.api_list_experiments_response import ( - ApiListExperimentsResponse, -) -from kfp_server_api.models.api_list_runs_response import ApiListRunsResponse -from kfp_server_api.models.api_run import ApiRun -from kfp_server_api.models.api_run_detail import ApiRunDetail - -from gigl.common.logger import Logger -from gigl.common.types.wrappers.kfp_api import ApiRunDetailWrapper -from gigl.common.utils.func_tools import lru_cache - -logger = Logger() - - -class _KfpApiFilterOperations(Enum): - # See: https://github.com/kubeflow/pipelines/blob/ed9a5abe3a69c5e9269a375d334df16423ed5ca1/backend/api/v1beta1/filter.proto#L27 - UNKNOWN = 0 - EQUALS = 1 - NOT_EQUALS = 2 - GREATER_THAN = 3 - GREATER_THAN_EQUALS = 5 - LESS_THAN = 6 - LESS_THAN_EQUALS = 7 - IN = 8 - IS_SUBSTRING = 9 - - -class KFPService: - """ - A service class that provides methods to interact with Kubeflow Pipelines (KFP). - """ - - def __init__(self, kfp_host: str, k8_sa: str) -> None: - """ - Initializes a KFPService object. - - Args: - kfp_host (str): The host URL of the KFP instance. - k8_sa (str): The service account associated with the KFP instance. - """ - self.kfp_host = kfp_host - self.kfp_client = kfp.Client(host=self.kfp_host) - self.k8_sa = k8_sa - - def __get_auth_header(self) -> Dict[str, str]: - assert self.kfp_host.startswith( - "https://" - ), "Lets not send our tokens over unencrypted connections" - assert self.kfp_host.endswith( - ".pipelines.googleusercontent.com" - ), "Only GCP hosted KFP instances are supported" - header = { - "Authorization": f"Bearer {get_gcp_access_token()}", - } - return header - - def get_experiment_name_by_id(self, experiment_id: str) -> str: - """ - Retrieves the name of a Kubeflow Pipelines experiment given its ID. - - Args: - experiment_id (str): The ID of the experiment. - - Returns: - str: The name of the experiment. - """ - return self.kfp_client.get_experiment(experiment_id=experiment_id).name - - def run_pipeline( - self, - pipeline_bundle_path: str, - run_name: str, - experiment_id: str, - run_keyword_args: Dict[str, str], - ) -> str: - """ - Runs a pipeline using the KFP instance. - - Args: - pipeline_bundle_path (str): The path to the pipeline bundle file. - run_name (str): The name of the run. - experiment_id (str): The ID of the experiment. - run_keyword_args (Dict[str, str]): The keyword arguments for the run. - - Returns: - str: The ID of the run. - """ - if not os.path.isfile(pipeline_bundle_path): - raise RuntimeError( - f"Pipeline bundle file does not exist at {pipeline_bundle_path}" - ) - - experiment_name = ( - self.get_experiment_name_by_id(experiment_id) if experiment_id else None - ) - - pipeline_res = self.kfp_client.create_run_from_pipeline_package( - pipeline_file=pipeline_bundle_path, - arguments=run_keyword_args, - run_name=run_name, - experiment_name=experiment_name, - service_account=self.k8_sa, - ) - logger.info( - f"Created run @ {self.kfp_host}/#/runs/details/{pipeline_res.run_id}" - ) - - return pipeline_res.run_id - - def upload_pipeline_version( - self, pipeline_bundle_path: str, pipeline_id: str, pipeline_version_name: str - ) -> str: - """ - Uploads the pipeline to the Kubeflow Pipelines cluster. - - Args: - pipeline_bundle_path (str): The path to the pipeline bundle file. - pipeline_id (str): The ID of the pipeline. - pipeline_version_name (str): The name of the pipeline version. - - Returns: - str: The URL of the pipeline on the Kubeflow Pipelines cluster. - """ - if not os.path.isfile(pipeline_bundle_path): - raise RuntimeError( - f"Pipeline bundle file does not exist at {pipeline_bundle_path}" - ) - - pipeline_version = self.kfp_client.pipeline_uploads.upload_pipeline_version( - pipeline_bundle_path, - name=pipeline_version_name, - pipelineid=pipeline_id, - ) - - logger.info( - f"Uploaded version {pipeline_version.name} to pipeline id {pipeline_id}. " - f"{self.kfp_host}/#/pipelines/details/{pipeline_id}" - ) - - return f"{self.kfp_host}/#/pipelines/details/{pipeline_id}" - - def get_latest_experiment_from_name( - self, experiment_name: str - ) -> Optional[ApiListExperimentsResponse]: - """ - Retrieves the latest experiment with a given name. - - Args: - experiment_name (str): The name of the experiment. - - Returns: - Optional[ApiListExperimentsResponse]: The latest experiment with the given name, or None if not found. - """ - filter = json.dumps( - { - "predicates": [ - { - "key": "name", - "op": _KfpApiFilterOperations.EQUALS.value, - "string_value": experiment_name, - } - ] - } - ) - resp_api_list: ApiListExperimentsResponse = self.kfp_client.list_experiments( - sort_by="created_at desc", filter=filter - ) - if len(resp_api_list.experiments) > 0: - return resp_api_list.experiments[0] - return None - - def get_latest_run_with_name( - self, - kfp_run_name: str, - experiment_name: str, - ) -> Optional[ApiRunDetailWrapper]: - """ - Retrieves the latest run with a given name and experiment name. - - Args: - kfp_run_name (str): The name of the run. - experiment_name (str): The name of the experiment. - - Returns: - Optional[ApiRunDetailWrapper]: The latest run with the given name and experiment name, or None if not found. - """ - experiment: ApiListExperimentsResponse = self.get_latest_experiment_from_name( - experiment_name=experiment_name - ) - experiment_id = experiment.id if experiment else None - # filter tries to find runs with name == kfp_run_name - filter = json.dumps( - { - "predicates": [ - { - "key": "name", - "op": _KfpApiFilterOperations.EQUALS.value, - "string_value": kfp_run_name, - } - ] - } - ) - - resp_api_list: ApiListRunsResponse = self.kfp_client.list_runs( - experiment_id=experiment_id, sort_by="created_at desc", filter=filter - ) - if len(resp_api_list.runs) > 0: - api_run: ApiRun = resp_api_list.runs[0] - return self.get_run(run_id=api_run.id) - return None - - def get_run( - self, - run_id: str, - ) -> ApiRunDetailWrapper: - """ - Retrieves the details of a run given its ID. - - Args: - run_id (str): The ID of the run. - - Returns: - ApiRunDetailWrapper: The details of the run. - """ - resp_api_run: ApiRunDetail = self.kfp_client.get_run( - run_id=run_id, - ) - return ApiRunDetailWrapper(api_run=resp_api_run) - - def wait_for_run_completion(self, run_id: str, timeout: float = 7200) -> None: - """ - Waits for a run to complete. - - Args: - run_id (str): The ID of the run. - timeout (float): The maximum time to wait for the run to complete, in seconds. Defaults to 7200. - - Returns: - None - """ - try: - run_response = self.kfp_client.wait_for_run_completion( - run_id=run_id, timeout=timeout - ) - if run_response.run.status == "Succeeded": - logger.info("KFP finished with status Succeeded!") - return run_response.run.status - else: - raise RuntimeError( - f"KFP run stop with status: {run_response.run.status}. " - f"Please check the KFP page to trace down the error @ {self.kfp_host}/#/runs/details/{run_id}" - ) - except Exception as e: - logger.error( - f"Error when waiting on KFP run {self.kfp_host}/#/runs/details/{run_id} to finish:" - ) - raise e - - @lru_cache(maxsize=1) - def get_host_k8_cluster_name( - self, - ) -> str: - """ - Retrieves the name of the Kubernetes cluster that the KFP instance is running on. - - Returns: - str: The name of the Kubernetes cluster. - """ - request_url = f"{self.kfp_host}/system/cluster-name" - response = requests.get(request_url, headers=self.__get_auth_header()) - return response.text - - @lru_cache(maxsize=1) - def get_host_gcp_project_name( - self, - ) -> str: - """ - Retrieves the name of the GCP project that the KFP instance is running on. - - Returns: - str: The name of the GCP project. - """ - request_url = f"{self.kfp_host}/system/project-id" - response = requests.get(request_url, headers=self.__get_auth_header()) - return response.text diff --git a/python/gigl/common/services/vertex_ai.py b/python/gigl/common/services/vertex_ai.py index a1413f1..52c58dd 100644 --- a/python/gigl/common/services/vertex_ai.py +++ b/python/gigl/common/services/vertex_ai.py @@ -1,9 +1,68 @@ +"""Class for interacting with Vertex AI. + +Below are some brief definitions of the terminology used by Vertex AI Pipelines: + +Resource name: A globally unique identifier for the pipeline, follows https://google.aip.dev/122 and is of the form projects//locations//pipelineJobs/ +Job name: aka job_id aka PipelineJob.name the name of a pipeline run, must be unique for a given project and location +Display name: AFAICT purely cosmetic name for a pipeline, can be filtered on but does not show up in the UI +Pipeline name: The name for the pipeline supplied by the pipeline definition (pipeline.yaml). + +And a walkthrough to explain how the terminology is used: +```py +@kfp.dsl.component +def source() -> int: + return 42 + +@kfp.dsl.component +def doubler(a: int) -> int: + return a * 2 + +@kfp.dsl.component +def adder(a: int, b: int) -> int: + return a + b + +@kfp.dsl.pipeline +def get_pipeline() -> int: # NOTE: `get_pipeline` here is the Pipeline name + source_task = source() + double_task = doubler(a=source_task.output) + adder_task = adder(a=source_task.output, b=double_task.output) + return adder_task.output + +tempdir = tempfile.TemporaryDirectory() +tf = os.path.join(tempdir.name, "pipeline.yaml") +print(f"Writing pipeline definition to {tf}") +kfp.compiler.Compiler().compile(get_pipeline, tf) +job = aip.PipelineJob( + display_name="this_is_our_pipeline_display_name", + template_path=tf, + pipeline_root="gs://my-bucket/pipeline-root", +) + job.submit(service_account="my-sa@my-project.gserviceaccount.com") +``` + +Which outputs the following: +Creating PipelineJob +PipelineJob created. Resource name: projects/my-project-id/locations/us-central1/pipelineJobs/ +To use this PipelineJob in another session: +pipeline_job = aiplatform.PipelineJob.get('projects/my-project-id/locations/us-central1/pipelineJobs/') +View Pipeline Job: +https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/?project=my-project-id +Associating projects/my-project-id/locations/us-central1/pipelineJobs/ to Experiment: example-experiment + + +And `job` has some properties set as well: + +```py +print(f"{job.display_name=}") # job.display_name='this_is_our_pipeline_display_name' +print(f"{job.resource_name=}") # job.resource_name='projects/my-project-id/locations/us-central1/pipelineJobs/' +print(f"{job.name=}") # job.name='' # NOTE: by default, the "job name" is the pipeline name + datetime +``` +""" + import datetime -import os -import subprocess import time from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Dict, Final, List, Optional from google.cloud import aiplatform from google.cloud.aiplatform_v1.types import ( @@ -13,19 +72,18 @@ env_var, ) -from gigl.common import GcsUri +from gigl.common import GcsUri, Uri from gigl.common.logger import Logger -from gigl.common.utils.gcs import GcsUtils logger = Logger() +LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY: Final[ + str +] = "LEADER_WORKER_INTERNAL_IP_FILE_PATH" + -def _ping_host_ip(host_ip): - try: - subprocess.check_output(["ping", "-c", "1", host_ip]) - return True - except subprocess.CalledProcessError: - return False +DEFAULT_PIPELINE_TIMEOUT_S: Final[int] = 60 * 60 * 36 # 36 hours +DEFAULT_CUSTOM_JOB_TIMEOUT_S: Final[int] = 60 * 60 * 24 # 24 hours @dataclass @@ -40,7 +98,9 @@ class VertexAiJobConfig: accelerator_count: int = 0 replica_count: int = 1 labels: Optional[Dict[str, str]] = None - timeout_s: Optional[int] = None + timeout_s: Optional[ + int + ] = None # Will default to DEFAULT_CUSTOM_JOB_TIMEOUT_S if not provided enable_web_access: bool = True @@ -55,24 +115,33 @@ class VertexAIService: staging_bucket (str): The staging bucket for the service. """ - _LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY = "LEADER_WORKER_INTERNAL_IP_FILE_PATH" - def __init__( - self, project: str, location: str, service_account: str, staging_bucket: str + self, + project: str, + location: str, + service_account: str, + staging_bucket: str, ): - self.project = project - self.location = location - self.service_account = service_account - self.staging_bucket = staging_bucket + self._project = project + self._location = location + self._service_account = service_account + self._staging_bucket = staging_bucket aiplatform.init( - project=self.project, - location=self.location, - staging_bucket=self.staging_bucket, + project=self._project, + location=self._location, + staging_bucket=self._staging_bucket, ) - def run(self, job_config: VertexAiJobConfig) -> None: + @property + def project(self) -> str: + """The GCP project that is being used for this service.""" + return self._project + + def launch_job(self, job_config: VertexAiJobConfig) -> None: """ - Run a Vertex AI job. + Launch a Vertex AI CustomJob. + See the docs for more info. + https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomJob Args: job_config (VertexAiJobConfig): The configuration for the job. @@ -91,14 +160,14 @@ def run(self, job_config: VertexAiJobConfig) -> None: # read this file to get the leader worker's internal IP address. # See connect_worker_pool() implementation for more details. leader_worker_internal_ip_file_path = GcsUri.join( - self.staging_bucket, + self._staging_bucket, job_config.job_name, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"), "leader_worker_internal_ip.txt", ) env_vars = [ env_var.EnvVar( - name=VertexAIService._LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY, + name=LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY, value=leader_worker_internal_ip_file_path.uri, ) ] @@ -129,13 +198,14 @@ def run(self, job_config: VertexAiJobConfig) -> None: worker_pool_specs.append(worker_spec) logger.info( - f"Running Custom job {job_config.job_name} with worker_pool_specs {worker_pool_specs}, in project: {self.project}/{self.location} using staging bucket: {self.staging_bucket}, and attached labels: {job_config.labels}" + f"Running Custom job {job_config.job_name} with worker_pool_specs {worker_pool_specs}, in project: {self._project}/{self._location} using staging bucket: {self._staging_bucket}, and attached labels: {job_config.labels}" ) if not job_config.timeout_s: logger.info( - "No timeout set for Vertex AI job, using Vertex AI default timeout of 7 days." + f"No timeout set for Vertex AI job, setting default timeout to {DEFAULT_CUSTOM_JOB_TIMEOUT_S/60/60} hours" ) + job_config.timeout_s = DEFAULT_CUSTOM_JOB_TIMEOUT_S else: logger.info( f"Running Vertex AI job with timeout {job_config.timeout_s} seconds" @@ -144,136 +214,122 @@ def run(self, job_config: VertexAiJobConfig) -> None: job = aiplatform.CustomJob( display_name=job_config.job_name, worker_pool_specs=worker_pool_specs, - project=self.project, - location=self.location, + project=self._project, + location=self._location, labels=job_config.labels, - staging_bucket=self.staging_bucket, + staging_bucket=self._staging_bucket, ) - - job.run( - service_account=self.service_account, + job.submit( + service_account=self._service_account, timeout=job_config.timeout_s, enable_web_access=job_config.enable_web_access, ) - - @staticmethod - def is_currently_running_in_vertex_ai_job() -> bool: - """ - Check if the code is running in a Vertex AI job. - - Returns: - bool: True if running in a Vertex AI job, False otherwise. - """ - return VertexAIService.get_vertex_ai_job_id() is not None - - @staticmethod - def get_vertex_ai_job_id() -> Optional[str]: + job.wait_for_resource_creation() + logger.info(f"Created job: {job.resource_name}") + # Copying https://github.com/googleapis/python-aiplatform/blob/v1.48.0/google/cloud/aiplatform/jobs.py#L207-L215 + # Since for some reason upgrading from VertexAI v1.27.1 to v1.48.0 + # caused the logs to occasionally not be printed. + logger.info( + f"See job logs at: https://console.cloud.google.com/ai/platform/locations/{self._location}/training/{job.name}?project={self._project}" + ) + job.wait_for_completion() + + def run_pipeline( + self, + display_name: str, + template_path: Uri, + run_keyword_args: Dict[str, str], + job_id: Optional[str] = None, + experiment: Optional[str] = None, + ) -> aiplatform.PipelineJob: """ - Get the Vertex AI job ID. + Runs a pipeline using the Vertex AI Pipelines service. + For more info, see the Vertex AI docs + https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.PipelineJob#google_cloud_aiplatform_PipelineJob_submit + Args: + display_name (str): The display of the pipeline. + template_path (Uri): The path to the compiled pipeline YAML. + run_keyword_args (Dict[str, str]): Runtime arguements passed to your pipeline. + job_id (Optional[str]): The ID of the job. If not provided will be the *pipeline_name* + datetime. + Note: The pipeline_name and display_name are *not* the same. + Note: pipeline_name comes is defined in the `template_path` and ultimately comes from Python pipeline definition. + If provided, must be unique. + experiment (Optional[str]): The name of the experiment to associate the run with. Returns: - Optional[str]: The Vertex AI job ID, or None if not running in a Vertex AI job. + The PipelineJob created. """ - return os.getenv("CLOUD_ML_JOB_ID") - - @staticmethod - def get_host_name() -> Optional[str]: - """ - Get the current machines hostname. - """ - return os.getenv("HOSTNAME") + job = aiplatform.PipelineJob( + display_name=display_name, + template_path=template_path.uri, + parameter_values=run_keyword_args, + job_id=job_id, + project=self._project, + location=self._location, + ) + job.submit(service_account=self._service_account, experiment=experiment) + logger.info(f"Created run: {job.resource_name}") - @staticmethod - def get_leader_hostname() -> Optional[str]: - """ - Hostname of the machine that will host the process with rank 0. It is used - to synchronize the workers. - """ - return os.getenv("MASTER_ADDR") + return job - @staticmethod - def get_leader_port() -> Optional[str]: - """ - A free port on the machine that will host the process with rank 0. - """ - return os.getenv("MASTER_PORT") + def get_pipeline_job_from_job_name(self, job_name: str) -> aiplatform.PipelineJob: + """Fetches the pipeline job with the given job name.""" + return aiplatform.PipelineJob.get( + f"projects/{self._project}/locations/{self._location}/pipelineJobs/{job_name}" + ) @staticmethod - def get_world_size() -> Optional[str]: - """ - The total number of processes that VAI creates. Note that VAI only creates one process per machine. - It is the user's responsibility to create multiple processes per machine. - """ - return os.getenv("WORLD_SIZE") + def get_pipeline_run_url(project: str, location: str, job_name: str) -> str: + """Returns the URL for the pipeline run.""" + return f"https://console.cloud.google.com/vertex-ai/locations/{location}/pipelines/runs/{job_name}?project={project}" @staticmethod - def get_rank() -> Optional[str]: + def wait_for_run_completion( + resource_name: str, + timeout: float = DEFAULT_PIPELINE_TIMEOUT_S, + polling_period_s: int = 60, + ) -> None: """ - Rank of the current VAI process, so they will know whether it is the master or a worker. - Note: that VAI only creates one process per machine. It is the user's responsibility to - create multiple processes per machine. Meaning, this function will only return one integer - for the main process that VAI creates. - """ - return os.getenv("RANK") + Waits for a run to complete. - @staticmethod - def connect_worker_pool() -> str: - """ - Used to connect the worker pool. This function should be called by all workers - to get the leader worker's internal IP address and to ensure that the workers - can all communicate with the leader worker. + Args: + resource_name (str): The resource name of the run. + timeout (float): The maximum time to wait for the run to complete, in seconds. Defaults to 7200. + polling_period_s (int): The time to wait between polling the run status, in seconds. Defaults to 60. + Returns: + None """ - is_leader_worker = VertexAIService.get_rank() == "0" - ip_file_uri = GcsUri(VertexAIService._get_leader_worker_internal_ip_file_path()) - gcs_utils = GcsUtils() - host_ip: str - if is_leader_worker: - logger.info("Wait 180 seconds for the leader machine to settle down.") - time.sleep(180) - host_ip = subprocess.check_output(["hostname", "-i"]).decode().strip() - logger.info(f"Writing host IP address ({host_ip}) to {ip_file_uri}") - gcs_utils.upload_from_string(gcs_path=ip_file_uri, content=host_ip) - else: - max_retries = 60 - interval_s = 30 - for attempt_num in range(1, max_retries + 1): - logger.info( - f"Checking if {ip_file_uri} exists and reading HOST_IP (attempt {attempt_num})..." - ) - try: - host_ip = gcs_utils.read_from_gcs(ip_file_uri) - logger.info(f"Pinging host ip ({host_ip}) ...") - if _ping_host_ip(host_ip): - logger.info(f"Ping to host ip ({host_ip}) was successful.") - break - except Exception as e: - logger.info(e) - logger.info( - f"Retrieving host information and/or ping failed, retrying in {interval_s} seconds..." - ) - time.sleep(interval_s) - if attempt_num >= max_retries: - logger.info( - f"Failed to ping HOST_IP after {max_retries} attempts. Exiting." + start_time = time.time() + run = aiplatform.PipelineJob.get(resource_name=resource_name) + while start_time + timeout > time.time(): + # Note that accesses to `run.state` cause a network call under the hood. + # We should be careful with accessing this too frequently, and "cache" + # the state if we need to access it multiple times in short succession. + state = run.state + logger.info( + f"Run {resource_name} in state: {state.name if state else state}" + ) + if state == aiplatform.gapic.PipelineState.PIPELINE_STATE_SUCCEEDED: + logger.info("Vertex AI finished with status Succeeded!") + return + elif state in ( + aiplatform.gapic.PipelineState.PIPELINE_STATE_FAILED, + aiplatform.gapic.PipelineState.PIPELINE_STATE_CANCELLED, + ): + logger.warning(f"Vertex AI run stopped with status: {state.name}.") + logger.warning( + f"See run at: {VertexAIService.get_pipeline_run_url(run.project, run.location, run.name)}" ) - raise Exception(f"Failed to ping HOST_IP after {max_retries} attempts.") - - return host_ip - - @staticmethod - def _get_leader_worker_internal_ip_file_path() -> str: - """ - Get the file path to the leader worker's internal IP address. - """ - assert ( - VertexAIService.is_currently_running_in_vertex_ai_job() - ), "Not running in Vertex AI job." - internal_ip_file_path = os.getenv( - VertexAIService._LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY - ) - assert internal_ip_file_path is not None, ( - f"Internal IP file path ({VertexAIService._LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY}) " - + f"not found in environment variables. {os.environ}" - ) + raise RuntimeError(f"Vertex AI run stopped with status: {state.name}.") + time.sleep(polling_period_s) - return internal_ip_file_path + else: + logger.warning("Timeout reached. Stopping the run.") + logger.warning( + f"See run at: {VertexAIService.get_pipeline_run_url(run.project, run.location, run.name)}" + ) + run.cancel() + raise RuntimeError( + f"Vertex AI run stopped with status: {run.state}. " + f"Please check the Vertex AI page to trace down the error." + ) diff --git a/python/gigl/common/types/uri/local_uri.py b/python/gigl/common/types/uri/local_uri.py index f782437..1b6dd81 100644 --- a/python/gigl/common/types/uri/local_uri.py +++ b/python/gigl/common/types/uri/local_uri.py @@ -26,19 +26,6 @@ def join( joined_uri = super().join(token, *tokens) return cls(uri=joined_uri) - def get_file_name(self, assert_is_file: Optional[bool] = True) -> str: - """Returns the file name of the URI. - - Args: - assert_is_file (Optional[bool]): Whether to assert that the URI represents a file. Defaults to True. - - Returns: - str: The file name of the URI. - """ - if assert_is_file: - assert os.path.isfile(self.uri), f"{self.uri} is not a file" - return os.path.basename(self.uri) - @classmethod def is_valid( cls, uri: Union[str, Path, Uri], raise_exception: Optional[bool] = False diff --git a/python/gigl/common/types/uri/uri.py b/python/gigl/common/types/uri/uri.py index aafb922..b86fcec 100644 --- a/python/gigl/common/types/uri/uri.py +++ b/python/gigl/common/types/uri/uri.py @@ -65,6 +65,14 @@ def is_valid( f" for implementing custom is_valid logic." ) + def get_basename(self) -> str: + """ + The base name is the final component of the path, effectively extracting the file or directory name from a full path string. + i.e. get_basename("/foo/bar.txt") -> bar.txt + get_basename("gs://bucket/foo") -> foo + """ + return self.uri.split("/")[-1] + def __repr__(self) -> str: return self.uri diff --git a/python/gigl/common/types/wrappers/argo_workflow_manifest.py b/python/gigl/common/types/wrappers/argo_workflow_manifest.py deleted file mode 100644 index a57fedf..0000000 --- a/python/gigl/common/types/wrappers/argo_workflow_manifest.py +++ /dev/null @@ -1,90 +0,0 @@ -import json -from datetime import datetime -from typing import Dict - -import argo_workflows.models as models -from argo_workflows.model.io_argoproj_workflow_v1alpha1_node_status import ( - IoArgoprojWorkflowV1alpha1NodeStatus, -) -from argo_workflows.model.io_argoproj_workflow_v1alpha1_workflow_status import ( - IoArgoprojWorkflowV1alpha1WorkflowStatus, -) - -from gigl.common.utils.func_tools import lru_cache - -ARGO_WORFLOW_DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%SZ" - - -class ArgoWorkflowNodeStatus: - """Class allows us to create a strictly types wrapper around IoArgoprojWorkflowV1alpha1NodeStatus - to reduce cognitive load when working with the Argo Workflow API. - """ - - def __init__( - self, - io_argoproj_workflow_v1alpha1_node_status: IoArgoprojWorkflowV1alpha1NodeStatus, - ) -> None: - self._io_argoproj_workflow_v1alpha1_node_status: ( - IoArgoprojWorkflowV1alpha1NodeStatus - ) = io_argoproj_workflow_v1alpha1_node_status - - @property - def pod_name(self) -> str: - return self._io_argoproj_workflow_v1alpha1_node_status.id - - @property - def display_name(self) -> str: - return self._io_argoproj_workflow_v1alpha1_node_status.displayName - - @property - def finished_at(self) -> datetime: - return datetime.strptime( - self._io_argoproj_workflow_v1alpha1_node_status.finishedAt, - ARGO_WORFLOW_DATETIME_FORMAT, - ) - - @property - def started_at(self) -> datetime: - return datetime.strptime( - self._io_argoproj_workflow_v1alpha1_node_status.startedAt, - ARGO_WORFLOW_DATETIME_FORMAT, - ) - - -class ArgoWorkflowManifestWrapper: - def __init__(self, workflow_manifest_json_str: str) -> None: - manifest_dict = json.loads(workflow_manifest_json_str) - self.argo_workflow: models.IoArgoprojWorkflowV1alpha1Workflow = ( - models.IoArgoprojWorkflowV1alpha1Workflow( - **manifest_dict, _check_type=False - ) - ) - - @property # type: ignore - @lru_cache(maxsize=1) - def __node_status(self) -> Dict[str, IoArgoprojWorkflowV1alpha1NodeStatus]: - status = self.argo_workflow.status - workflow_status = IoArgoprojWorkflowV1alpha1WorkflowStatus( - **status, _check_type=False - ) - return { - k8_node_name: IoArgoprojWorkflowV1alpha1NodeStatus( - **pipeline_node_data, _check_type=False - ) - for k8_node_name, pipeline_node_data in workflow_status.nodes.items() - } - - @property # type: ignore - @lru_cache(maxsize=1) - def component_status_by_component_display_name( - self, - ) -> Dict[str, ArgoWorkflowNodeStatus]: - """ - - Returns: - Dict[str, ArgoWorkflowNodeStatus]: Note that the keys are the display name of the KFP component - """ - return { - node_status.displayName: ArgoWorkflowNodeStatus(node_status) - for node_status in self.__node_status.values() - } diff --git a/python/gigl/common/types/wrappers/kfp_api.py b/python/gigl/common/types/wrappers/kfp_api.py index f9cc12a..c2e5da8 100644 --- a/python/gigl/common/types/wrappers/kfp_api.py +++ b/python/gigl/common/types/wrappers/kfp_api.py @@ -1,46 +1,98 @@ import datetime -from typing import Dict +from typing import Dict, Optional -from kfp_server_api.models.api_parameter import ApiParameter -from kfp_server_api.models.api_pipeline_runtime import ApiPipelineRuntime -from kfp_server_api.models.api_pipeline_spec import ApiPipelineSpec -from kfp_server_api.models.api_run_detail import ApiRunDetail +import kfp_server_api +from kfp_server_api.models.v2beta1_pipeline_task_detail import V2beta1PipelineTaskDetail +from kfp_server_api.models.v2beta1_run_details import V2beta1RunDetails +from kfp_server_api.models.v2beta1_runtime_config import V2beta1RuntimeConfig -from gigl.common.types.wrappers.argo_workflow_manifest import ( - ArgoWorkflowManifestWrapper, -) +from gigl.common.logger import Logger from gigl.common.utils.func_tools import lru_cache +logger = Logger() + + +class KfpTaskDetails: + """Convenience class to access relevant task specific properties more easily.""" + + def __init__(self, pipeline_task_details: V2beta1PipelineTaskDetail) -> None: + self._pipeline_task_details: V2beta1PipelineTaskDetail = pipeline_task_details + + @property + def pod_name(self) -> Optional[str]: + pod_name = self._pipeline_task_details.pod_name + if pod_name is None: # Get child pod name instead + if len(self._pipeline_task_details.child_tasks) == 1: + pod_name = self._pipeline_task_details.child_tasks[0].pod_name + else: + logger.warning( + f"Multiple child tasks found. Unable to determine pod name for pipeline_task: {self._pipeline_task_details}." + ) + return pod_name + + @property + def display_name(self) -> str: + return self._pipeline_task_details.display_name + + @property + def finished_at(self) -> datetime.datetime: + return self._pipeline_task_details.end_time + + @property + def started_at(self) -> datetime.datetime: + return self._pipeline_task_details.start_time + + def __repr__(self) -> str: + return f""" + Task Details: + - Display Name: {self.display_name} + - Pod Name: {self.pod_name} + - Started At: {self.started_at} + - Finished At: {self.finished_at} + - V2beta1PipelineTaskDetail: {self._pipeline_task_details} + """ + class ApiRunDetailWrapper: - def __init__(self, api_run: ApiRunDetail) -> None: + def __init__(self, api_run: kfp_server_api.V2beta1Run) -> None: self._api_run = api_run @property - def api_run(self) -> ApiRunDetail: + def api_run(self) -> kfp_server_api.V2beta1Run: return self._api_run @property def created_at(self) -> datetime.datetime: - return self.api_run.run.created_at + return self.api_run.created_at @property def finished_at(self) -> datetime.datetime: - return self.api_run.run.finished_at + return self.api_run.finished_at - @property # type: ignore + @property @lru_cache(maxsize=1) def job_parameters(self) -> Dict[str, str]: parameters_dict: Dict[str, str] = {} - pipeline_spec: ApiPipelineSpec = self.api_run.run.pipeline_spec - param: ApiParameter - for param in pipeline_spec.parameters: - parameters_dict[param.name] = param.value + + runtime_config: V2beta1RuntimeConfig = self.api_run.runtime_config + for name, val in runtime_config.parameters.items(): + parameters_dict[name] = val return parameters_dict @property - def workflow_manifest(self) -> ArgoWorkflowManifestWrapper: - pipeline_runtime: ApiPipelineRuntime = self._api_run.pipeline_runtime - manifest = ArgoWorkflowManifestWrapper(pipeline_runtime.workflow_manifest) - return manifest + @lru_cache(maxsize=1) + def task_details_map( + self, + ) -> Dict[str, KfpTaskDetails]: + """ + + Returns: + Dict[str, KfpTaskDetails]: Note that the keys are the display name of the KFP component + """ + task_details_dict: Dict[str, KfpTaskDetails] = {} + run_details: V2beta1RunDetails = self.api_run.run_details + task: V2beta1PipelineTaskDetail + for task in run_details.task_details: + task_details_dict[task.display_name] = KfpTaskDetails(task) + return task_details_dict diff --git a/python/gigl/common/utils/compute/serialization/coder.py b/python/gigl/common/utils/compute/serialization/coder.py index de66e3c..86f9e23 100644 --- a/python/gigl/common/utils/compute/serialization/coder.py +++ b/python/gigl/common/utils/compute/serialization/coder.py @@ -4,6 +4,8 @@ class CoderProtocol(Protocol, Generic[T]): - def encode(self, obj: T) -> bytes: ... + def encode(self, obj: T) -> bytes: + ... - def decode(self, byte_str: bytes) -> T: ... + def decode(self, byte_str: bytes) -> T: + ... diff --git a/python/gigl/common/utils/decorator.py b/python/gigl/common/utils/decorator.py new file mode 100644 index 0000000..b8551a5 --- /dev/null +++ b/python/gigl/common/utils/decorator.py @@ -0,0 +1,18 @@ +from typing import Callable, TypeVar + +import tensorflow as tf + +_ReturnType = TypeVar("_ReturnType") # Generic Return Type of function for decorator + + +def tf_on_cpu(func: Callable[..., _ReturnType]) -> Callable[..., _ReturnType]: + """ + A decorator to run a function using TensorFlow's CPU device. + """ + + def wrapper(*args, **kwargs) -> _ReturnType: + with tf.device("/CPU:0"): + result = func(*args, **kwargs) + return result + + return wrapper diff --git a/python/gigl/common/utils/func_tools.py b/python/gigl/common/utils/func_tools.py index acfdbb3..5e42b2a 100644 --- a/python/gigl/common/utils/func_tools.py +++ b/python/gigl/common/utils/func_tools.py @@ -7,17 +7,20 @@ @overload -def lru_cache(maxsize: Callable[..., RT], typed: bool = False) -> Callable[..., RT]: ... +def lru_cache(maxsize: Callable[..., RT], typed: bool = False) -> Callable[..., RT]: + ... @overload -def lru_cache(maxsize: Optional[int], typed: bool = False) -> Callable[[RT], RT]: ... +def lru_cache(maxsize: Optional[int], typed: bool = False) -> Callable[[RT], RT]: + ... @overload def lru_cache( maxsize: Union[Callable[..., RT], Optional[int]], typed: bool = False -) -> Union[Callable[..., RT], Callable[[RT], RT]]: ... +) -> Union[Callable[..., RT], Callable[[RT], RT]]: + ... @no_type_check diff --git a/python/gigl/common/utils/vertex_ai_context.py b/python/gigl/common/utils/vertex_ai_context.py new file mode 100644 index 0000000..28db431 --- /dev/null +++ b/python/gigl/common/utils/vertex_ai_context.py @@ -0,0 +1,150 @@ +"""Utility functions to be used by machines running on Vertex AI.""" + +import os +import subprocess +import time + +from gigl.common import GcsUri +from gigl.common.logger import Logger +from gigl.common.services.vertex_ai import LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY +from gigl.common.utils.gcs import GcsUtils +from gigl.distributed import DistributedContext + +logger = Logger() + + +def is_currently_running_in_vertex_ai_job() -> bool: + """ + Check if the code is running in a Vertex AI job. + + Returns: + bool: True if running in a Vertex AI job, False otherwise. + """ + return "CLOUD_ML_JOB_ID" in os.environ + + +def get_vertex_ai_job_id() -> str: + """ + Get the Vertex AI job ID. + Throws if not on Vertex AI. + """ + return os.environ["CLOUD_ML_JOB_ID"] + + +def get_host_name() -> str: + """ + Get the current machines hostname. + Throws if not on Vertex AI. + """ + return os.environ["HOSTNAME"] + + +def get_leader_hostname() -> str: + """ + Hostname of the machine that will host the process with rank 0. It is used + to synchronize the workers. + Throws if not on Vertex AI. + """ + return os.environ["MASTER_ADDR"] + + +def get_leader_port() -> int: + """ + A free port on the machine that will host the process with rank 0. + Throws if not on Vertex AI. + """ + return int(os.environ["MASTER_PORT"]) + + +def get_world_size() -> int: + """ + The total number of processes that VAI creates. Note that VAI only creates one process per machine. + It is the user's responsibility to create multiple processes per machine. + Throws if not on Vertex AI. + """ + return int(os.environ["WORLD_SIZE"]) + + +def get_rank() -> int: + """ + Rank of the current VAI process, so they will know whether it is the master or a worker. + Note: that VAI only creates one process per machine. It is the user's responsibility to + create multiple processes per machine. Meaning, this function will only return one integer + for the main process that VAI creates. + Throws if not on Vertex AI. + """ + return int(os.environ["RANK"]) + + +def connect_worker_pool() -> DistributedContext: + """ + Used to connect the worker pool. This function should be called by all workers + to get the leader worker's internal IP address and to ensure that the workers + can all communicate with the leader worker. + """ + + global_rank = get_rank() + global_world_size = get_world_size() + + is_leader_worker = global_rank == 0 + ip_file_uri = GcsUri(_get_leader_worker_internal_ip_file_path()) + gcs_utils = GcsUtils() + host_ip: str + if is_leader_worker: + logger.info("Wait 180 seconds for the leader machine to settle down.") + time.sleep(180) + host_ip = subprocess.check_output(["hostname", "-i"]).decode().strip() + logger.info(f"Writing host IP address ({host_ip}) to {ip_file_uri}") + gcs_utils.upload_from_string(gcs_path=ip_file_uri, content=host_ip) + else: + max_retries = 60 + interval_s = 30 + for attempt_num in range(1, max_retries + 1): + logger.info( + f"Checking if {ip_file_uri} exists and reading HOST_IP (attempt {attempt_num})..." + ) + try: + host_ip = gcs_utils.read_from_gcs(ip_file_uri) + logger.info(f"Pinging host ip ({host_ip}) ...") + if _ping_host_ip(host_ip): + logger.info(f"Ping to host ip ({host_ip}) was successful.") + break + except Exception as e: + logger.info(e) + logger.info( + f"Retrieving host information and/or ping failed, retrying in {interval_s} seconds..." + ) + time.sleep(interval_s) + if attempt_num >= max_retries: + logger.info( + f"Failed to ping HOST_IP after {max_retries} attempts. Exiting." + ) + raise Exception(f"Failed to ping HOST_IP after {max_retries} attempts.") + + return DistributedContext( + main_worker_ip_address=host_ip, + global_rank=global_rank, + global_world_size=global_world_size, + ) + + +def _get_leader_worker_internal_ip_file_path() -> str: + """ + Get the file path to the leader worker's internal IP address. + """ + assert is_currently_running_in_vertex_ai_job(), "Not running in Vertex AI job." + internal_ip_file_path = os.getenv(LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY) + assert internal_ip_file_path is not None, ( + f"Internal IP file path ({LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY}) " + + f"not found in environment variables. {os.environ}" + ) + + return internal_ip_file_path + + +def _ping_host_ip(host_ip: str) -> bool: + try: + subprocess.check_output(["ping", "-c", "1", host_ip]) + return True + except subprocess.CalledProcessError: + return False diff --git a/python/gigl/distributed/__init__.py b/python/gigl/distributed/__init__.py index ea7c1ab..9809d7d 100644 --- a/python/gigl/distributed/__init__.py +++ b/python/gigl/distributed/__init__.py @@ -1,8 +1,11 @@ -# autoflake: skip_file """ GLT Distributed Classes implemented in GiGL """ -from gigl.distributed.partitioner.dist_link_prediction_data_partitioner import ( +from gigl.distributed.dataset_factory import build_dataset +from gigl.distributed.dist_context import DistributedContext +from gigl.distributed.dist_link_prediction_data_partitioner import ( DistLinkPredictionDataPartitioner, ) +from gigl.distributed.dist_link_prediction_dataset import DistLinkPredictionDataset +from gigl.distributed.distributed_neighborloader import DistNeighborLoader diff --git a/python/gigl/distributed/constants.py b/python/gigl/distributed/constants.py new file mode 100644 index 0000000..baf80c6 --- /dev/null +++ b/python/gigl/distributed/constants.py @@ -0,0 +1,7 @@ +# TODO (mkolodner-sc): Set these ports dynamically while ensuring no overlap +# Ports for various purposes, we need to make sure they do not overlap. +# Note that [master_port_for_inference, master_port_for_inference + num_inference_processes). +# ports are used. Same for master port for sampling. +DEFAULT_MASTER_INFERENCE_PORT = 20000 +DEFAULT_MASTER_SAMPLING_PORT = 30000 +DEFAULT_MASTER_DATA_BUILDING_PORT = 10000 diff --git a/python/gigl/distributed/dataset/dist_link_prediction_dataset.py b/python/gigl/distributed/dataset/dist_link_prediction_dataset.py deleted file mode 100644 index a2cf47d..0000000 --- a/python/gigl/distributed/dataset/dist_link_prediction_dataset.py +++ /dev/null @@ -1,66 +0,0 @@ -from dataclasses import dataclass -from typing import Dict, Optional, Union - -from gigl.common.data.dataloaders import SerializedTFRecordInfo -from gigl.src.common.types.graph_data import EdgeType, NodeType - - -@dataclass(frozen=True) -class DatasetInputMetadata: - """ - Stores information for all entities. If homogeneous, all types are of type SerializedTFRecordInfo. Otherwise, they are dictionaries with the corresponding mapping. - These fields are used to store inputs to GLT's DistDataset.load() function. This is done separate from existing GiGL constructs such as PreprocessedMetadataPbWrapper so that - there is not a strict coupling between GiGL orchestration and the GLT Dataset layer. - """ - - # Node Entity Info for loading node tensors, a SerializedTFRecordInfo for homogeneous and Dict[NodeType, SerializedTFRecordInfo] for heterogeneous cases - node_entity_info: Union[ - SerializedTFRecordInfo, Dict[NodeType, SerializedTFRecordInfo] - ] - # Edge Entity Info for loading edge tensors, a SerializedTFRecordInfo for homogeneous and Dict[EdgeType, SerializedTFRecordInfo] for heterogeneous cases - edge_entity_info: Union[ - SerializedTFRecordInfo, Dict[EdgeType, SerializedTFRecordInfo] - ] - # Positive Label Entity Info, if present, a SerializedTFRecordInfo for homogeneous and Dict[EdgeType, SerializedTFRecordInfo] for heterogeneous cases. May be None - # for specific edge types. If data has no positive labels across all edge types, this value is None - positive_label_entity_info: Optional[ - Union[SerializedTFRecordInfo, Dict[EdgeType, Optional[SerializedTFRecordInfo]]] - ] = None - # Negative Label Entity Info, if present, a SerializedTFRecordInfo for homogeneous and Dict[EdgeType, SerializedTFRecordInfo] for heterogeneous cases. May be None - # for specific edge types. If input has no negative labels across all edge types, this value is None. - negative_label_entity_info: Optional[ - Union[SerializedTFRecordInfo, Dict[EdgeType, Optional[SerializedTFRecordInfo]]] - ] = None - - -# TODO (mkolodner-sc): Remove below comment which showes example usage of DatasetInputMetadata when the DistLinkPredictionDataset class is ready -""" -Example usage of DatasetInputMetadata class for a GiGL-orchestrated job - -class DistLinkPredictionDataset(): - - ... - - def load(dataset_input_metadata: DatasetInputMetadata): - tfrecord_dataloader = TFRecordDataloader(rank=0, world_size=1) - - ... - - # Heterogeneous Example - for node_type in dataset_input_metadata.node_entity_info: - node_ids[node_type], node_features[node_type] = tfrecord_dataloader.load_as_torch_tensors(serialized_tf_record_info=dataset_input_metadata.node_entity_info[node_type]) - - ... - -dataset = DistLinkPredictionDataset(...) - -# Generating DatasetInputMetadata - -dataset_input_metadata = convert_pb_to_dataset_input_metadata(preprocessed_metadata_pb_wrapper, graph_metadata_pb_wrapper) - -# Passing DatasetInputMetadata into DistLinkPredictionDataset class, which loads, partitions, and stores all of the relevant information on the current rank - -dataset.load(dataset_input_metadata) - -# DistLinkPredictionDataset instance will be eventually passed into GLT's DistNeighborLoader in the training/inference loop for live subgraph sampling. -""" diff --git a/python/gigl/distributed/dataset_factory.py b/python/gigl/distributed/dataset_factory.py new file mode 100644 index 0000000..10f3654 --- /dev/null +++ b/python/gigl/distributed/dataset_factory.py @@ -0,0 +1,346 @@ +""" +DatasetFactory is responsible for building and returning a DistLinkPredictionDataset class or subclass. It does this by spawning a +process which initializes rpc + worker group, loads and builds a partitioned dataset, and shuts down the rpc + worker group. +""" +import time +from collections import abc +from typing import Dict, Literal, MutableMapping, Optional, Union + +import torch +import torch.multiprocessing as mp +from graphlearn_torch.distributed import ( + barrier, + get_context, + init_rpc, + init_worker_group, + rpc_is_initialized, + shutdown_rpc, +) + +from gigl.common.data.dataloaders import TFRecordDataLoader +from gigl.common.data.load_torch_tensors import ( + SerializedGraphMetadata, + TFDatasetOptions, + load_torch_tensors_from_tf_record, +) +from gigl.common.logger import Logger +from gigl.common.utils.decorator import tf_on_cpu +from gigl.distributed.constants import DEFAULT_MASTER_DATA_BUILDING_PORT +from gigl.distributed.dist_context import DistributedContext +from gigl.distributed.dist_link_prediction_data_partitioner import ( + DistLinkPredictionDataPartitioner, +) +from gigl.distributed.dist_link_prediction_dataset import DistLinkPredictionDataset +from gigl.distributed.utils import get_process_group_name +from gigl.src.common.types.graph_data import EdgeType +from gigl.types.distributed import GraphPartitionData +from gigl.utils.data_splitters import ( + NodeAnchorLinkSplitter, + select_ssl_positive_label_edges, +) + +logger = Logger() + + +@tf_on_cpu +def _load_and_build_partitioned_dataset( + serialized_graph_metadata: SerializedGraphMetadata, + should_load_tensors_in_parallel: bool, + edge_dir: Literal["in", "out"], + partitioner: Optional[DistLinkPredictionDataPartitioner], + dataset: Optional[DistLinkPredictionDataset], + tf_dataset_options: TFDatasetOptions, + splitter: Optional[NodeAnchorLinkSplitter] = None, + _ssl_positive_label_percentage: Optional[float] = None, +) -> DistLinkPredictionDataset: + """ + Given some information about serialized TFRecords, loads and builds a partitioned dataset into a DistLinkPredictionDataset class. + We require init_rpc and init_worker_group have been called to set up the rpc and context, respectively, prior to calling this function. If this is not + set up beforehand, this function will throw an error. + Args: + serialized_graph_metadata (SerializedGraphMetadata): Serialized Graph Metadata contains serialized information for loading TFRecords across node and edge types + should_load_tensors_in_parallel (bool): Whether tensors should be loaded from serialized information in parallel or in sequence across the [node, edge, pos_label, neg_label] entity types. + edge_dir (Literal["in", "out"]): Edge direction of the provided graph + partitioner (Optional[DistLinkPredictionDataPartitioner]): Initialized partitioner to partition the graph inputs. If provided, this must be a + DistLinkPredictionDataPartitioner or subclass of it. If not provided, will initialize a DistLinkPredictionDataPartitioner instance + using provided edge assign strategy. + dataset (Optional[DistLinkPredictionDataset]): Initialized dataset class to store the graph inputs. If provided, this must be a + DistLinkPredictionDataset or subclass of it. If not provided, will initialize a DistLinkPredictionDataset instance using provided edge_dir. + tf_dataset_options (TFDatasetOptions): Options provided to a tf.data.Dataset to tune how serialized data is read. + splitter (Optional[NodeAnchorLinkSplitter]): Optional splitter to use for splitting the graph data into train, val, and test sets. If not provided (None), no splitting will be performed. + _ssl_positive_label_percentage (Optional[float]): Percentage of edges to select as self-supervised labels. Must be None if supervised edge labels are provided in advance. + Slotted for refactor once this functionality is available in the transductive `splitter` directly + Returns: + DistLinkPredictionDataset: Initialized dataset with partitioned graph information + + """ + assert ( + get_context() is not None + ), "Context must be setup prior to calling `load_and_build_partitioned_dataset` through glt.distributed.init_worker_group()" + assert ( + rpc_is_initialized() + ), "RPC must be setup prior to calling `load_and_build_partitioned_dataset` through glt.distributed.init_rpc()" + + rank: int = get_context().rank + world_size: int = get_context().world_size + + tfrecord_data_loader = TFRecordDataLoader(rank=rank, world_size=world_size) + loaded_graph_tensors = load_torch_tensors_from_tf_record( + tf_record_dataloader=tfrecord_data_loader, + serialized_graph_metadata=serialized_graph_metadata, + should_load_tensors_in_parallel=should_load_tensors_in_parallel, + rank=rank, + tf_dataset_options=tf_dataset_options, + ) + + should_assign_edges_by_src_node: bool = False if edge_dir == "in" else True + + if partitioner is None: + if should_assign_edges_by_src_node: + logger.info( + f"Initializing DistLinkPredictionDataPartitioner instance while partitioning edges to its source node machine" + ) + else: + logger.info( + f"Initializing DistLinkPredictionDataPartitioner instance while partitioning edges to its destination node machine" + ) + partitioner = DistLinkPredictionDataPartitioner( + should_assign_edges_by_src_node=should_assign_edges_by_src_node + ) + + partitioner.register_node_ids(node_ids=loaded_graph_tensors.node_ids) + partitioner.register_edge_index(edge_index=loaded_graph_tensors.edge_index) + if loaded_graph_tensors.node_features is not None: + partitioner.register_node_features( + node_features=loaded_graph_tensors.node_features + ) + if loaded_graph_tensors.edge_features is not None: + partitioner.register_edge_features( + edge_features=loaded_graph_tensors.edge_features + ) + if loaded_graph_tensors.positive_label is not None: + partitioner.register_labels( + label_edge_index=loaded_graph_tensors.positive_label, is_positive=True + ) + if loaded_graph_tensors.negative_label is not None: + partitioner.register_labels( + label_edge_index=loaded_graph_tensors.negative_label, is_positive=False + ) + + # We call del so that the reference count of these registered fields is 1, + # allowing these intermediate assets to be cleaned up as necessary inside of the partitioner.partition() call + + del ( + loaded_graph_tensors.node_ids, + loaded_graph_tensors.node_features, + loaded_graph_tensors.edge_index, + loaded_graph_tensors.edge_features, + loaded_graph_tensors.positive_label, + loaded_graph_tensors.negative_label, + ) + del loaded_graph_tensors + + partition_output = partitioner.partition() + + # TODO (mkolodner-sc): Move this code block to transductive splitter once that is ready + if _ssl_positive_label_percentage is not None: + assert ( + partition_output.partitioned_positive_labels is None + and partition_output.partitioned_negative_labels is None + ), "Cannot have partitioned positive and negative labels when attempting to select self-supervised positive edges from edge index." + positive_label_edges: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + # TODO (mkolodner-sc): Only add necessary edge types to positive label dictionary, rather than all of the keys in the partitioned edge index + if isinstance(partition_output.partitioned_edge_index, abc.Mapping): + positive_label_edges = {} + for ( + edge_type, + graph_partition_data, + ) in partition_output.partitioned_edge_index.items(): + edge_index = graph_partition_data.edge_index + positive_label_edges[edge_type] = select_ssl_positive_label_edges( + edge_index=edge_index, + positive_label_percentage=_ssl_positive_label_percentage, + ) + elif isinstance(partition_output.partitioned_edge_index, GraphPartitionData): + positive_label_edges = select_ssl_positive_label_edges( + edge_index=partition_output.partitioned_edge_index.edge_index, + positive_label_percentage=_ssl_positive_label_percentage, + ) + else: + raise ValueError( + "Found no partitioned edge index when attempting to select positive labels" + ) + + partition_output.partitioned_positive_labels = positive_label_edges + + if dataset is None: + logger.info( + f"Initializing DistLinkPredictionDataset instance with edge direction {edge_dir}" + ) + dataset = DistLinkPredictionDataset( + rank=rank, world_size=world_size, edge_dir=edge_dir + ) + + dataset.build( + partition_output=partition_output, + splitter=splitter, + ) + + return dataset + + +def _build_dataset_process( + process_number_on_current_machine: int, + output_dict: MutableMapping[str, DistLinkPredictionDataset], + serialized_graph_metadata: SerializedGraphMetadata, + distributed_context: DistributedContext, + dataset_building_port: int, + sample_edge_direction: Literal["in", "out"], + should_load_tensors_in_parallel: bool, + partitioner: Optional[DistLinkPredictionDataPartitioner], + dataset: Optional[DistLinkPredictionDataset], + tf_dataset_options: TFDatasetOptions, + splitter: Optional[NodeAnchorLinkSplitter] = None, + _ssl_positive_label_percentage: Optional[float] = None, +) -> None: + """ + This function is spawned by a single process per machine and is responsible for: + 1. Initializing worker group and rpc connections + 2. Loading Torch tensors from serialized TFRecords + 3. Partition loaded Torch tensors across multiple machines + 4. Loading and formatting graph and feature partition data into a `DistLinkPredictionDataset` class, which will be used during inference + 5. Tearing down these connections + Steps 2-4 are done by the `load_and_build_partitioned_dataset` function. + + We wrap this logic inside of a `mp.spawn` process so that that assets from these steps are properly cleaned up after the dataset has been built. Without + it, we observe inference performance degradation via cached entities that remain during the inference loop. As such, using a `mp.spawn` process is an easy + way to ensure all cached entities are cleaned up. We use `mp.spawn` instead of `mp.Process` so that any exceptions thrown in this function will be correctly + propogated to the parent process. + + This step currently only is supported on CPU. + + Args: + process_number_on_current_machine (int): Process number on current machine. This parameter is required and provided by mp.spawn. + This is always set to 1 for dataset building. + output_dict (MutableMapping[str, DistLinkPredictionDataset]): A dictionary spawned by a mp.manager which the built dataset + will be written to for use by the parent process + serialized_graph_metadata (SerializedGraphMetadata): Metadata about TFRecords that are serialized to disk + distributed_context (DistributedContext): Distributed context containing information for master_ip_address, rank, and world size + dataset_building_port (int): RPC port to use to build the dataset + sample_edge_direction (Literal["in", "out"]): Whether edges in the graph are directed inward or outward + should_load_tensors_in_parallel (bool): Whether tensors should be loaded from serialized information in parallel or in sequence across the [node, edge, pos_label, neg_label] entity types. + partitioner (Optional[DistLinkPredictionDataPartitioner]): Initialized partitioner to partition the graph inputs. If provided, this must be a + DistLinkPredictionDataPartitioner or subclass of it. If not provided, will initialize a DistLinkPredictionDataPartitioner instance + using provided edge assign strategy. + dataset (Optional[DistLinkPredictionDataset]): Initialized dataset class to store the graph inputs. If provided, this must be a + DistLinkPredictionDataset or subclass of it. If not provided, will initialize a DistLinkPredictionDataset instance using provided edge_dir. + tf_dataset_options (TFDatasetOptions): Options provided to a tf.data.Dataset to tune how serialized data is read. + splitter (Optional[NodeAnchorLinkSplitter]): Optional splitter to use for splitting the graph data into train, val, and test sets. If not provided (None), no splitting will be performed. + _ssl_positive_label_percentage (Optional[float]): Percentage of edges to select as self-supervised labels. Must be None if supervised edge labels are provided in advance. + Slotted for refactor once this functionality is available in the transductive `splitter` directly + """ + + # Sets up the worker group and rpc connection. We need to ensure we cleanup by calling shutdown_rpc() after we no longer need the rpc connection. + init_worker_group( + world_size=distributed_context.global_world_size, + rank=distributed_context.global_rank, + group_name=get_process_group_name(process_number_on_current_machine), + ) + + init_rpc( + master_addr=distributed_context.main_worker_ip_address, + master_port=dataset_building_port, + num_rpc_threads=4, + ) + + output_dataset: DistLinkPredictionDataset = _load_and_build_partitioned_dataset( + serialized_graph_metadata=serialized_graph_metadata, + should_load_tensors_in_parallel=should_load_tensors_in_parallel, + edge_dir=sample_edge_direction, + partitioner=partitioner, + dataset=dataset, + tf_dataset_options=tf_dataset_options, + splitter=splitter, + _ssl_positive_label_percentage=_ssl_positive_label_percentage, + ) + + output_dict["dataset"] = output_dataset + + # We add a barrier here so that all processes end and exit this function at the same time. Without this, we may have some machines call shutdown_rpc() while other + # machines may require rpc setup for partitioning, which will result in failure. + barrier() + shutdown_rpc() + + +def build_dataset( + serialized_graph_metadata: SerializedGraphMetadata, + distributed_context: DistributedContext, + sample_edge_direction: Union[Literal["in", "out"], str], + should_load_tensors_in_parallel: bool = True, + partitioner: Optional[DistLinkPredictionDataPartitioner] = None, + dataset: Optional[DistLinkPredictionDataset] = None, + tf_dataset_options: TFDatasetOptions = TFDatasetOptions(), + splitter: Optional[NodeAnchorLinkSplitter] = None, + _ssl_positive_label_percentage: Optional[float] = None, + _dataset_building_port: int = DEFAULT_MASTER_DATA_BUILDING_PORT, +) -> DistLinkPredictionDataset: + """ + Launches a spawned process for building and returning a DistLinkPredictionDataset instance provided some SerializedGraphMetadata + Args: + serialized_graph_metadata (SerializedGraphMetadata): Metadata about TFRecords that are serialized to disk + distributed_context (DistributedContext): Distributed context containing information for master_ip_address, rank, and world size + sample_edge_direction (Union[Literal["in", "out"], str]): Whether edges in the graph are directed inward or outward. Note that this is + listed as a possible string to satisfy type check, but in practice must be a Literal["in", "out"]. + should_load_tensors_in_parallel (bool): Whether tensors should be loaded from serialized information in parallel or in sequence across the [node, edge, pos_label, neg_label] entity types. + partitioner (Optional[DistLinkPredictionDataPartitioner]): Initialized partitioner to partition the graph inputs. If provided, this must be a + DistLinkPredictionDataPartitioner or subclass of it. If not provided, will initialize a DistLinkPredictionDataPartitioner instance + using provided edge assign strategy. + dataset (Optional[DistLinkPredictionDataset]): Initialized dataset class to store the graph inputs. If provided, this must be a + DistLinkPredictionDataset or subclass of it. If not provided, will initialize a DistLinkPredictionDataset instance using provided edge_dir. + tf_dataset_options (TFDatasetOptions): Options provided to a tf.data.Dataset to tune how serialized data is read. + splitter (Optional[NodeAnchorLinkSplitter]): Optional splitter to use for splitting the graph data into train, val, and test sets. If not provided (None), no splitting will be performed. + _ssl_positive_label_percentage (Optional[float]): Percentage of edges to select as self-supervised labels. Must be None if supervised edge labels are provided in advance. + Slotted for refactor once this functionality is available in the transductive `splitter` directly + _dataset_building_port (int): WARNING: You don't need to configure this unless port conflict issues. Slotted for refactor. + The RPC port to use to build the dataset. In future, the port will be automatically assigned based on availability. + Currently defaults to: gigl.distributed.constants.DEFAULT_MASTER_DATA_BUILDING_PORT + + Returns: + DistLinkPredictionDataset: Built GraphLearn-for-PyTorch Dataset class + """ + assert ( + sample_edge_direction == "in" or sample_edge_direction == "out" + ), f"Provided edge direction from inference args must be one of `in` or `out`, got {sample_edge_direction}" + + manager = mp.Manager() + + dataset_building_start_time = time.time() + + # Used for directing the outputs of the dataset building process back to the parent process + output_dict = manager.dict() + + # Launches process for loading serialized TFRecords from disk into memory, partitioning the data across machines, and storing data inside a GLT dataset class + mp.spawn( + fn=_build_dataset_process, + args=( + output_dict, + serialized_graph_metadata, + distributed_context, + _dataset_building_port, + sample_edge_direction, + should_load_tensors_in_parallel, + partitioner, + dataset, + tf_dataset_options, + splitter, + _ssl_positive_label_percentage, + ), + ) + + output_dataset: DistLinkPredictionDataset = output_dict["dataset"] + + logger.info( + f"--- Dataset Building finished on rank {distributed_context.global_rank}, which took {time.time()-dataset_building_start_time:.2f} seconds" + ) + + return output_dataset diff --git a/python/gigl/distributed/dist_context.py b/python/gigl/distributed/dist_context.py new file mode 100644 index 0000000..da513ab --- /dev/null +++ b/python/gigl/distributed/dist_context.py @@ -0,0 +1,19 @@ +from dataclasses import dataclass + + +@dataclass(frozen=True) +class DistributedContext: + """ + GiGL Distributed Context + """ + + # TODO (mkolodner-sc): Investigate adding local rank and local world size + + # Main Worker's IP Address for RPC communication + main_worker_ip_address: str + + # Rank of machine + global_rank: int + + # Total number of machines + global_world_size: int diff --git a/python/gigl/distributed/partitioner/dist_link_prediction_data_partitioner.py b/python/gigl/distributed/dist_link_prediction_data_partitioner.py similarity index 82% rename from python/gigl/distributed/partitioner/dist_link_prediction_data_partitioner.py rename to python/gigl/distributed/dist_link_prediction_data_partitioner.py index 3f776e7..70f738a 100644 --- a/python/gigl/distributed/partitioner/dist_link_prediction_data_partitioner.py +++ b/python/gigl/distributed/dist_link_prediction_data_partitioner.py @@ -8,6 +8,7 @@ import torch from graphlearn_torch.distributed.dist_context import get_context from graphlearn_torch.distributed.dist_random_partitioner import DistPartitionManager +from graphlearn_torch.partition import PartitionBook from graphlearn_torch.utils import convert_to_tensor, index_select from gigl.common.logger import Logger @@ -15,10 +16,11 @@ from gigl.types.distributed import ( DEFAULT_HOMOGENEOUS_EDGE_TYPE, DEFAULT_HOMOGENEOUS_NODE_TYPE, - EdgeAssignStrategy, FeaturePartitionData, GraphPartitionData, PartitionOutput, + to_heterogeneous_edge, + to_heterogeneous_node, ) logger = Logger() @@ -26,11 +28,34 @@ class _DistLinkPredicitonPartitionManager(DistPartitionManager): """ - Inherited from GLT's DistPartitionManager class. We only implement this here to override the reset function. This is because - GLT's partition book generates a partition book tensor of type int64, which is expensive in memory and uneccessary if world_size < 256. - In this function, we modify this partition book tensor to be of type uint8 when being generated. + Inherited from GLT's DistPartitionManager, this class is responsible for handling partitioning for tensor-based partition books. + We implement this here to override the reset function. + This is because we can save a lot of memory by using the minimum required dtype for our partition books. """ + def __init__( + self, world_size: int, total_val_size: int = 1, generate_pb: bool = True + ): + """ + Initializes the partition manager. + Args: + world_size (int): The number of partitions (the total number of machines). + total_val_size (int): The number of entities we should generate a partition book for. + generate_pb (bool): Whether we should generate a partition book + """ + if world_size <= 255: + self._pb_dtype = torch.uint8 + elif world_size <= 65535: # 2 ^ 16 - 1 + self._pb_dtype = torch.uint16 + elif world_size <= 4294967295: # 2 ^ 32 - 1 + self._pb_dtype = torch.uint32 + else: + self._pb_dtype = torch.uint64 + logger.info( + f"Since the world size is {world_size}, using dtype of {self._pb_dtype} for partition book" + ) + super().__init__(total_val_size, generate_pb) + def reset(self, total_val_size: int, generate_pb: bool = True): """ Resets the partition book and current values for partitioning. @@ -45,7 +70,7 @@ def reset(self, total_val_size: int, generate_pb: bool = True): self.cur_part_val_list: List[Tuple[torch.Tensor, ...]] = [] if self.generate_pb: # This is the only difference from DistPartitionManager's reset() function. - self.partition_book = torch.zeros(total_val_size, dtype=torch.uint8) + self.partition_book = torch.zeros(total_val_size, dtype=self._pb_dtype) else: self.partition_book = None @@ -116,7 +141,7 @@ class DistLinkPredictionDataPartitioner: def __init__( self, - edge_assign_strategy: EdgeAssignStrategy = EdgeAssignStrategy.BY_DESTINATION_NODE, + should_assign_edges_by_src_node: bool = False, node_ids: Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]] = None, node_features: Optional[ Union[torch.Tensor, Dict[NodeType, torch.Tensor]] @@ -134,10 +159,11 @@ def __init__( ): """ Initializes the parameters of the partitioner. Also optionally takes in node and edge tensors as arguments and registers them to the partitioner. Registered - entities should be a dictionary of Dict[[NodeType or EdgeType], torch.Tensor] if heterogeneous or a torch.Tensor if homogeneous. This class assumes the distributed - context has already been initialized outside of this class with the glt.distributed.init_worker_group() function and that rpc has been initialized with glt_distributed.init_rpc(). + entities should be a dictionary of Dict[[NodeType or EdgeType], torch.Tensor] if heterogeneous or a torch.Tensor if homogeneous. + This class assumes the distributed context has already been initialized outside of this class with the glt.distributed.init_worker_group() + function and that rpc has been initialized with glt_distributed.init_rpc(). Args: - edge_assign_strategy (EdgeAssignStrategy): The assignment strategy when partitioning edges, should be 'by_source_node' or 'by_destination_node'. + should_assign_edges_by_src_node (bool): Whether edges should be assigned to the machine of the source nodes during partitioning node_ids (Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]]): Optionally registered node ids from input. Tensors should be of shape [num_nodes_on_current_rank] node_features (Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]]): Optionally registered node feats from input. Tensors should be of shope [num_nodes_on_current_rank, node_feat_dim] edge_index (Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]): Optionally registered edge indexes from input. Tensors should be of shape [2, num_edges_on_current_rank] @@ -145,19 +171,15 @@ def __init__( positive_labels (Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]): Optionally registered positive labels from input. Tensors should be of shape [2, num_pos_labels_on_current_rank] negative_labels (Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]): Optionally registered negative labels from input. Tensors should be of shape [2, num_neg_labels_on_current_rank] """ - assert ( - get_context() is not None - ), "Distributed context must be initialized prior to using the partitioner by calling glt.distributed.init_worker_group()" - assert ( - glt_rpc.rpc_is_initialized() - ), "rpc must be initialized prior to partitioning by calling glt.distributed.init_rpc()" + self._world_size: int + self._rank: int + self._partition_mgr: _DistLinkPredicitonPartitionManager - self._world_size = get_context().world_size - self._rank = get_context().rank + self._is_rpc_initialized: bool = False self._is_input_homogeneous: Optional[bool] = None - self._edge_assign_strategy: EdgeAssignStrategy = edge_assign_strategy + self._should_assign_edges_by_src_node: bool = should_assign_edges_by_src_node self._edge_types: List[EdgeType] = [] self._node_types: List[NodeType] = [] self._num_nodes: Optional[Dict[NodeType, int]] = None @@ -175,13 +197,6 @@ def __init__( self._positive_label_edge_index: Optional[Dict[EdgeType, torch.Tensor]] = None self._negative_label_edge_index: Optional[Dict[EdgeType, torch.Tensor]] = None - # 256 is the maximum world size for a uint8 partition book - if self._world_size >= 256: - # TODO (mkolodner-sc): Investigate alternatives beyond using DistPartitionManager for large world sizes, as int64 is still too large - self._partition_mgr = DistPartitionManager() - else: - self._partition_mgr = _DistLinkPredicitonPartitionManager() - if node_ids is not None: self.register_node_ids(node_ids=node_ids) @@ -204,51 +219,83 @@ def __assert_data_type_consistency( self, input_entity: abc.Mapping, is_node_entity: bool, + is_subset: bool, ) -> None: """ - Checks that the keys of the input_entity, which must be a dictionary, align with other registered fields. + Checks that the keys of the input_entity, which must be a dictionary, align with registered node or edge types. - This function will set the `node_types` and `edge_types` properties of the partitioner. If they have already been registered, it will - check that the registered node/edge types align with the input tensor's node/edge types. The function determines whether to check/set node - or edge types through the provided `is_node_entity` argument. + This function will check that the registered node/edge types align with the input tensor's node/edge types. + The function determines whether to check node or edge types through the provided `is_node_entity` argument. Args: input_entity (abc.Mapping): Input entity, which must be a dictionary is_node_entity (bool): Whether the current input entity containing node information, if False the input entity is assumed to be for edges. + is_subset (bool): Whether the current input should be a subset of the entity types, if False will check that it is an exact match """ - if is_node_entity: - # Case where input is node data, meaning we need to check node type alignment - if len(self._node_types) == 0: - # If node types have not yet been registered, we register them here. - # We sort the node types to guarantee the same ordering across multiple workers, as dictionaries keys are inherently unsorted - self._node_types = sorted(input_entity.keys()) + # We check that the input tensor node types match the registered node types. + if is_subset: + node_diff = set(input_entity.keys()) - set(self._node_types) + assert ( + not node_diff + ), f"Found node types {node_diff} not contained in registered node types {self._node_types}" else: - # Otherwise, we check that the input tensor node types match the registered node types, sorting for the same reason as above. assert self._node_types == sorted( input_entity.keys() ), f"Found different node input types {sorted(input_entity.keys())} from registered node types {self._node_types}" else: - # Case where input is edge data, meaning we need to check edge type alignment - if len(self._edge_types) == 0: - # If edge types have not yet been registered, we register them here. - # We sort the edge types to guarantee the same ordering across multiple workers, as dictionaries keys are inherently unsorted - self._edge_types = sorted(input_entity.keys()) - # Otherwise, we check that the input tensor edge types match the registered edge types, sorting for the same reason as above. + # We check that the input tensor edge types match the registered edge types. + if is_subset: + edge_diff = set(input_entity.keys()) - set(self._edge_types) + assert ( + not edge_diff + ), f"Found edge types {edge_diff} not contained in registered edge types {self._edge_types}" + else: assert self._edge_types == sorted( input_entity.keys() ), f"Found different edge input types {sorted(input_entity.keys())} from registered edge types {self._edge_types}" + def __assert_and_get_rpc_setup(self) -> None: + """ + Asserts RPC and worker context have been initialized. If this is the first time this is called, + sets the rank, world_size, and partition manager fields of the partitioner class from the distributed context. + """ + assert ( + get_context() is not None + ), "Distributed context must be initialized by the user prior to partitioning or registering fields by calling glt.distributed.init_worker_group()" + + assert ( + glt_rpc.rpc_is_initialized() + ), "rpc must be initialized by the user prior to partitioning or registering fields to partitioning by calling glt.distributed.init_rpc()" + + # If rank, world size, and partition_manager are not set, we set them once we know the context and rpc has been initialized. + if not self._is_rpc_initialized: + logger.info( + f"Machine {get_context().rank} setting up partition manager ..." + ) + self._rank = get_context().rank + self._world_size = get_context().world_size + self._partition_mgr = _DistLinkPredicitonPartitionManager( + world_size=self._world_size + ) + self._is_rpc_initialized = True + def __convert_node_entity_to_heterogeneous_format( - self, input_node_entity: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] - ) -> Dict[NodeType, torch.Tensor]: + self, + input_node_entity: Union[ + torch.Tensor, + PartitionBook, + Dict[NodeType, torch.Tensor], + Dict[NodeType, PartitionBook], + ], + ) -> Union[Dict[NodeType, torch.Tensor], Dict[NodeType, PartitionBook]]: """ Converts input_node_entity into heterogeneous format if it is not already. If input is homogeneous, this will be a dictionary with Node Type DEFAULT_HOMOGENEOUS_NODE_TYPE. This is done so that the logical can be simplified for partitioning to just the heterogeneous case. Homogeneous inputs are re-converted back to non-dictionary formats when returning the outputs of partitioning through the `self._is_input_homogeneous` variable. """ - if not isinstance(input_node_entity, abc.Mapping): + if isinstance(input_node_entity, (PartitionBook, torch.Tensor)): if ( self._is_input_homogeneous is not None and not self._is_input_homogeneous @@ -257,36 +304,40 @@ def __convert_node_entity_to_heterogeneous_format( "Registering homogeneous field when previously registered entity was heterogeneous" ) self._is_input_homogeneous = True - return {DEFAULT_HOMOGENEOUS_NODE_TYPE: input_node_entity} else: if self._is_input_homogeneous is not None and self._is_input_homogeneous: raise ValueError( "Registering heterogeneous field when previously registered entity was heterogeneous" ) self._is_input_homogeneous = False - return input_node_entity + return to_heterogeneous_node(input_node_entity) def __convert_edge_entity_to_heterogeneous_format( - self, input_edge_entity: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] - ) -> Dict[EdgeType, torch.Tensor]: + self, + input_edge_entity: Union[ + torch.Tensor, + PartitionBook, + Dict[EdgeType, torch.Tensor], + Dict[EdgeType, PartitionBook], + ], + ) -> Union[Dict[EdgeType, torch.Tensor], Dict[EdgeType, PartitionBook]]: """ Converts input_edge_entity into heterogeneous format if it is not already. If input is homogeneous, this will be a dictionary with Edge Type DEFAULT_HOMOGENEOUS_EDGE_TYPE. """ - if not isinstance(input_edge_entity, abc.Mapping): + if isinstance(input_edge_entity, (PartitionBook, torch.Tensor)): if not self._is_input_homogeneous: raise ValueError( "Registering homogeneous field when previously registered entity was heterogeneous" ) self._is_input_homogeneous = True - return {DEFAULT_HOMOGENEOUS_EDGE_TYPE: input_edge_entity} else: if self._is_input_homogeneous: raise ValueError( "Registering heterogeneous field when previously registered entity was heterogeneous" ) self._is_input_homogeneous = False - return input_edge_entity + return to_heterogeneous_edge(input_edge_entity) def register_node_ids( self, node_ids: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] @@ -299,14 +350,19 @@ def register_node_ids( Args: node_ids (Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): Input node_ids which is either a torch.Tensor if homogeneous or a Dict if heterogeneous """ + + self.__assert_and_get_rpc_setup() + logger.info("Registering Nodes ...") input_node_ids = self.__convert_node_entity_to_heterogeneous_format( input_node_entity=node_ids ) - self.__assert_data_type_consistency( - input_entity=input_node_ids, is_node_entity=True - ) + assert ( + input_node_ids + ), "Node ids is an empty dictionary. Please provide node ids to register." + + self._node_types = sorted(input_node_ids.keys()) self._node_ids = convert_to_tensor(input_node_ids, dtype=torch.int64) @@ -318,7 +374,7 @@ def register_node_ids( node_type_to_num_nodes: Dict[NodeType, int] = { node_type: input_node_ids[node_type].size(0) - for node_type in self._node_types + for node_type in sorted(input_node_ids.keys()) } # Gathering to compute the number of nodes on each rank for each node type @@ -345,14 +401,20 @@ def register_edge_index( Args: edge_index (Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]): Input edge index which is either a torch.Tensor if homogeneous or a Dict if heterogeneous """ + + self.__assert_and_get_rpc_setup() + logger.info("Registering Edge Indices ...") input_edge_index = self.__convert_edge_entity_to_heterogeneous_format( input_edge_entity=edge_index ) - self.__assert_data_type_consistency( - input_entity=input_edge_index, is_node_entity=False - ) + + assert ( + input_edge_index + ), "Edge Index is an empty dictionary. Please provide edge indices to register." + + self._edge_types = sorted(input_edge_index.keys()) self._edge_index = convert_to_tensor(input_edge_index, dtype=torch.int64) @@ -362,11 +424,11 @@ def register_edge_index( gathered_edge_info: Dict[str, Tuple[int, Dict[EdgeType, int]]] self._num_edges = {} edge_ids: Dict[EdgeType, torch.Tensor] = {} - edge_type_to_num_edges: Dict[EdgeType, int] = { edge_type: input_edge_index[edge_type].size(1) - for edge_type in self._edge_types + for edge_type in sorted(input_edge_index.keys()) } + # Gathering to compute the number of edges on each rank for each edge type gathered_edge_info = glt_rpc.all_gather((self._rank, edge_type_to_num_edges)) @@ -407,13 +469,18 @@ def register_node_features( Args: node_features(Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): Input node features which is either a torch.Tensor if homogeneous or a Dict if heterogeneous """ + + self.__assert_and_get_rpc_setup() + logger.info("Registering Node Features ...") + input_node_features = self.__convert_node_entity_to_heterogeneous_format( input_node_entity=node_features ) - self.__assert_data_type_consistency( - input_entity=input_node_features, is_node_entity=True - ) + + assert ( + input_node_features + ), "Node features is an empty dictionary. Please provide node features to register." self._node_feat = convert_to_tensor(input_node_features, dtype=torch.float32) self._node_feat_dim = {} @@ -433,13 +500,18 @@ def register_edge_features( Args: edge_features(Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]): Input edge features which is either a torch.Tensor if homogeneous or a Dict if heterogeneous """ + + self.__assert_and_get_rpc_setup() + logger.info("Registering Edge Features ...") + input_edge_features = self.__convert_edge_entity_to_heterogeneous_format( input_edge_entity=edge_features ) - self.__assert_data_type_consistency( - input_entity=input_edge_features, is_node_entity=False - ) + + assert ( + input_edge_features + ), "Edge features is an empty dictionary. Please provide edge features to register." self._edge_feat = convert_to_tensor(input_edge_features, dtype=torch.float32) self._edge_feat_dim = {} @@ -458,17 +530,21 @@ def register_labels( For optimal memory management, it is recommended that the reference to the label tensor be deleted after calling this function using del , as maintaining both original and intermediate tensors can cause OOM concerns. We do not need to perform `all_gather` calls here since register_edge_index is responsible for determining total number of edges - across all ranks and inferrring edge ids. + across all ranks and inferring edge ids. Args: label_edge_index (Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]): Input positive or negative labels which is either a torch.Tensor if homogeneous or a Dict if heterogeneous is_positive (bool): Whether positive labels are currently being registered. If False, labels will be registered as negative """ + + self.__assert_and_get_rpc_setup() + input_label_edge_index = self.__convert_edge_entity_to_heterogeneous_format( input_edge_entity=label_edge_index ) - self.__assert_data_type_consistency( - input_entity=input_label_edge_index, is_node_entity=False - ) + + assert ( + input_label_edge_index + ), "Label edge index is an empty dictionary. Please provide label edge indices to register." if is_positive: logger.info("Registering Positive Labels ...") @@ -528,7 +604,7 @@ def __partition_by_chunk( partition_function: Callable[[torch.Tensor, Tuple[int, int]], torch.Tensor], total_val_size: int, generate_pb: bool = True, - ) -> Tuple[List[Tuple[torch.Tensor, ...]], Optional[torch.Tensor]]: + ) -> Tuple[List[Tuple[torch.Tensor, ...]], Optional[PartitionBook]]: r"""Partitions input data chunk by chunk. Args: input_data (Optional[Tuple[torch.Tensor, ...]]): generic data type of items to be partitioned across machine, which any information that should be partitioned across machines. @@ -541,7 +617,7 @@ def __partition_by_chunk( and should be false if partitioning node features or edge features. Return: List[Tuple[torch.Tensor, ...]]: Partitioned results of the input generic data type - torch.Tensor: Partition Book if `generate_pb` is True, returns None if `generate_pb` is False + Optional[PartitionBook]: Partition Book if `generate_pb` is True, returns None if `generate_pb` is False """ # TODO (mkolodner-sc): Investigate range-based partitioning num_items = len(rank_indices) @@ -587,14 +663,14 @@ def __partition_by_chunk( self._partition_mgr.partition_book, ) - def __partition_node(self, node_type: NodeType) -> torch.Tensor: + def __partition_node(self, node_type: NodeType) -> PartitionBook: r"""Partition graph nodes of a specify node type. Args: node_type (NodeType): The node type for input nodes Returns: - torch.Tensor: The partition book of graph nodes. + PartitionBook: The partition book of graph nodes. """ assert ( @@ -639,14 +715,14 @@ def _node_pfn(n_ids, _): def __partition_node_features( self, - node_partition_book: Dict[NodeType, torch.Tensor], + node_partition_book: Dict[NodeType, PartitionBook], node_type: NodeType, ) -> FeaturePartitionData: """ Partitions node features according to the node partition book. Args: - node_partition_book (Dict[NodeType, torch.Tensor]): The partition book of nodes + node_partition_book (Dict[NodeType, PartitionBook]): The partition book of nodes node_type (NodeType): Node type of input data Returns: @@ -664,6 +740,7 @@ def __partition_node_features( node_features = self._node_feat[node_type] node_ids = self._node_ids[node_type] num_nodes = self._num_nodes[node_type] + node_feat_dim = self._node_feat_dim[node_type] def _node_feature_partition_fn(node_feature_ids, _): return target_node_partition_book[node_feature_ids] @@ -697,7 +774,7 @@ def _node_feature_partition_fn(node_feature_ids, _): if len(partitioned_results) == 0: feature_partition_data = FeaturePartitionData( - feats=torch.empty((0, self._node_feat_dim[node_type])), + feats=torch.empty((0, node_feat_dim)), ids=torch.empty(0), ) else: @@ -706,7 +783,7 @@ def _node_feature_partition_fn(node_feature_ids, _): ids=torch.cat([r[1] for r in partitioned_results]), ) - del self._node_feat_dim[node_type] + del self._node_feat_dim[node_type], node_feat_dim if len(self._node_feat_dim) == 0: self._node_feat_dim = None @@ -718,18 +795,18 @@ def _node_feature_partition_fn(node_feature_ids, _): def __partition_edge( self, - node_partition_book: Dict[NodeType, torch.Tensor], + node_partition_book: Dict[NodeType, PartitionBook], edge_type: EdgeType, - ) -> Tuple[GraphPartitionData, torch.Tensor]: + ) -> Tuple[GraphPartitionData, PartitionBook]: r"""Partition graph topology of a specify edge type. Args: - node_partition_book (Dict[NodeType, torch.Tensor]): The partition books of all graph nodes. + node_partition_book (Dict[NodeType, PartitionBook]): The partition books of all graph nodes. edge_type (EdgeType): The edge type for input edges Returns: GraphPartitionData: The graph data of the current partition. - torch.Tensor: The partition book of graph edges. + PartitionBook: The partition book of graph edges. """ assert ( @@ -742,7 +819,7 @@ def __partition_edge( edge_ids = self._edge_ids[edge_type] num_edges = self._num_edges[edge_type] - if self._edge_assign_strategy == EdgeAssignStrategy.BY_SOURCE_NODE: + if self._should_assign_edges_by_src_node: target_node_partition_book = node_partition_book[edge_type.src_node_type] target_indices = edge_index[0] else: @@ -798,14 +875,14 @@ def _edge_pfn(_, chunk_range): def __partition_edge_features( self, - edge_partition_book: Dict[EdgeType, torch.Tensor], + edge_partition_book: Dict[EdgeType, PartitionBook], edge_type: EdgeType, ) -> FeaturePartitionData: """ Partitions node features according to the node partition book. Args: - edge_partition_book (Dict[EdgeType, torch.Tensor]): The partition book of edges + edge_partition_book (Dict[EdgeType, PartitionBook]): The partition book of edges edge_type (EdgeType): Edge type of input data Returns: @@ -823,6 +900,7 @@ def __partition_edge_features( edge_feat = self._edge_feat[edge_type] edge_ids = self._edge_ids[edge_type] num_edges = self._num_edges[edge_type] + edge_feat_dim = self._edge_feat_dim[edge_type] def _edge_feature_partition_fn(edge_feature_ids, _): return target_edge_partition_book[edge_feature_ids] @@ -855,7 +933,7 @@ def _edge_feature_partition_fn(edge_feature_ids, _): if len(partitioned_results) == 0: feature_partition_data = FeaturePartitionData( - feats=torch.empty((0, self._edge_feat_dim[edge_type])), + feats=torch.empty((0, edge_feat_dim)), ids=torch.empty(0), ) else: @@ -864,7 +942,7 @@ def _edge_feature_partition_fn(edge_feature_ids, _): ids=torch.cat([r[1] for r in partitioned_results]), ) - del self._edge_feat_dim[edge_type] + del self._edge_feat_dim[edge_type], edge_feat_dim if len(self._edge_feat_dim) == 0: self._edge_feat_dim = None @@ -876,7 +954,7 @@ def _edge_feature_partition_fn(edge_feature_ids, _): def __partition_label_edge_index( self, - node_partition_book: Dict[NodeType, torch.Tensor], + node_partition_book: Dict[NodeType, PartitionBook], is_positive: bool, edge_type: EdgeType, ) -> torch.Tensor: @@ -884,7 +962,7 @@ def __partition_label_edge_index( Partitions labels according to the node partition book. Args: - node_partition_book (Dict[NodeType, torch.Tensor]): The partition book of nodes + node_partition_book (Dict[NodeType, PartitionBook]): The partition book of nodes is_positive (bool): Whether positive labels are currently being registered. If False, negative labels will be partitioned. edge_type (EdgeType): Edge type of input data, must be specified if heterogeneous @@ -892,12 +970,7 @@ def __partition_label_edge_index( torch.Tensor: Edge index tensor of positive or negative labels, depending on is_positive flag """ - src_node_type = edge_type.src_node_type - assert ( - src_node_type in node_partition_book - ), f"Label source node type {src_node_type} not found in node partition book keys {node_partition_book.keys()}" - - target_node_partition_book = node_partition_book[src_node_type] + target_node_partition_book = node_partition_book[edge_type.src_node_type] if is_positive: assert ( self._positive_label_edge_index is not None @@ -970,13 +1043,16 @@ def _label_partition_fn(source_node_ids, _): return partitioned_label_edge_index - def partition_node(self) -> Union[torch.Tensor, Dict[NodeType, torch.Tensor]]: + def partition_node(self) -> Union[PartitionBook, Dict[NodeType, PartitionBook]]: """ Partitions nodes of a graph. If heterogeneous, partitions nodes for all node types. Returns: - Union[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]]: Partition Book of input nodes or Dict if heterogeneous + Union[PartitionBook, Dict[NodeType, PartitionBook]]: Partition Book of input nodes or Dict if heterogeneous """ + + self.__assert_and_get_rpc_setup() + assert ( self._num_nodes is not None ), "Must have registered nodes prior to partitioning them" @@ -985,10 +1061,10 @@ def partition_node(self) -> Union[torch.Tensor, Dict[NodeType, torch.Tensor]]: start_time = time.time() self.__assert_data_type_consistency( - input_entity=self._num_nodes, is_node_entity=True + input_entity=self._num_nodes, is_node_entity=True, is_subset=False ) - node_partition_book: Dict[NodeType, torch.Tensor] = {} + node_partition_book: Dict[NodeType, PartitionBook] = {} for node_type in self._node_types: node_partition_book[node_type] = self.__partition_node(node_type=node_type) @@ -1002,14 +1078,14 @@ def partition_node(self) -> Union[torch.Tensor, Dict[NodeType, torch.Tensor]]: return node_partition_book def partition_node_features( - self, node_partition_book: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + self, node_partition_book: Union[PartitionBook, Dict[NodeType, PartitionBook]] ) -> Union[FeaturePartitionData, Dict[NodeType, FeaturePartitionData]]: """ Partitions node features of a graph. If heterogeneous, partitions features for all node type. Must call `partition_node` first to get the node partition book as input. Args: - node_partition_book (Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): The Computed Node Partition Book + node_partition_book (Union[PartitionBook, Dict[NodeType, PartitionBook]]): The Computed Node Partition Book Returns: Union[FeaturePartitionData, Dict[NodeType, FeaturePartitionData]]: Feature Partition Data of ids and features or Dict if heterogeneous. """ @@ -1019,6 +1095,8 @@ def partition_node_features( and self._node_ids is not None ), "Node features and ids must be registered prior to partitioning." + self.__assert_and_get_rpc_setup() + logger.info("Partitioning Node Feats ...") start_time = time.time() @@ -1029,17 +1107,21 @@ def partition_node_features( ) self.__assert_data_type_consistency( - input_entity=transformed_node_partition_book, is_node_entity=True + input_entity=transformed_node_partition_book, + is_node_entity=True, + is_subset=False, ) self.__assert_data_type_consistency( - input_entity=self._node_feat, is_node_entity=True + input_entity=self._node_feat, is_node_entity=True, is_subset=True ) self.__assert_data_type_consistency( - input_entity=self._node_ids, is_node_entity=True + input_entity=self._node_ids, is_node_entity=True, is_subset=False ) + node_feature_types = sorted(self._node_feat.keys()) + partitioned_node_features: Dict[NodeType, FeaturePartitionData] = {} - for node_type in self._node_types: + for node_type in node_feature_types: partitioned_node_features[node_type] = self.__partition_node_features( node_partition_book=transformed_node_partition_book, node_type=node_type ) @@ -1048,29 +1130,30 @@ def partition_node_features( logger.info(f"Node Feature Partitioning finished, took {elapsed_time:.3f}s") if self._is_input_homogeneous: - # Converting heterogeneous input back to homogeneous return partitioned_node_features[DEFAULT_HOMOGENEOUS_NODE_TYPE] else: return partitioned_node_features def partition_edge( - self, node_partition_book: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + self, node_partition_book: Union[PartitionBook, Dict[NodeType, PartitionBook]] ) -> Union[ - Tuple[GraphPartitionData, torch.Tensor], - Tuple[Dict[EdgeType, GraphPartitionData], Dict[EdgeType, torch.Tensor]], + Tuple[GraphPartitionData, PartitionBook], + Tuple[Dict[EdgeType, GraphPartitionData], Dict[EdgeType, PartitionBook]], ]: """ Partitions edges of a graph. If heterogeneous, partitions edges for all edge type. Must call `partition_node` first to get the node partition book as input. Args: - node_partition_book (Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): The computed Node Partition Book + node_partition_book (Union[PartitionBook, Dict[NodeType, PartitionBook]]): The computed Node Partition Book Returns: Union[ - Tuple[GraphPartitionData, torch.Tensor], - Tuple[Dict[EdgeType, GraphPartitionData], Dict[EdgeType, trorch.Tensor]], + Tuple[GraphPartitionData, PartitionBook], + Tuple[Dict[EdgeType, GraphPartitionData], Dict[EdgeType, PartitionBook]], ]: Partitioned Graph Data and corresponding edge partition book, is a dictionary if heterogeneous """ + self.__assert_and_get_rpc_setup() + assert ( self._edge_index is not None and self._edge_ids is not None @@ -1087,22 +1170,24 @@ def partition_edge( ) self.__assert_data_type_consistency( - input_entity=transformed_node_partition_book, is_node_entity=True + input_entity=transformed_node_partition_book, + is_node_entity=True, + is_subset=False, ) self.__assert_data_type_consistency( - input_entity=self._edge_index, is_node_entity=False + input_entity=self._edge_index, is_node_entity=False, is_subset=False ) self.__assert_data_type_consistency( - input_entity=self._edge_ids, is_node_entity=False + input_entity=self._edge_ids, is_node_entity=False, is_subset=False ) self.__assert_data_type_consistency( - input_entity=self._num_edges, is_node_entity=False + input_entity=self._num_edges, is_node_entity=False, is_subset=False ) - edge_partition_book: Dict[EdgeType, torch.Tensor] = {} + edge_partition_book: Dict[EdgeType, PartitionBook] = {} partitioned_edge_index: Dict[EdgeType, GraphPartitionData] = {} for edge_type in self._edge_types: ( @@ -1126,16 +1211,19 @@ def partition_edge( return partitioned_edge_index, edge_partition_book def partition_edge_features( - self, edge_partition_book: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + self, edge_partition_book: Union[PartitionBook, Dict[EdgeType, PartitionBook]] ) -> Union[FeaturePartitionData, Dict[EdgeType, FeaturePartitionData]]: """ Partitions edge features of a graph. If heterogeneous, partitions edge features for all edge type. Must call `partition_edge` first to get the edge partition book as input. Args: - edge_partition_book (Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]): The computed Edge Partition Book + edge_partition_book (Union[PartitionBook, Dict[EdgeType, PartitionBook]]): The computed Edge Partition Book Returns: Union[FeaturePartitionData, Dict[EdgeType, FeaturePartitionData]]: Feature Partition Data of ids and features or Dict if heterogeneous. """ + + self.__assert_and_get_rpc_setup() + assert ( self._edge_feat is not None and self._edge_ids is not None @@ -1152,19 +1240,23 @@ def partition_edge_features( ) self.__assert_data_type_consistency( - input_entity=transformed_edge_partition_book, is_node_entity=False + input_entity=transformed_edge_partition_book, + is_node_entity=False, + is_subset=False, ) self.__assert_data_type_consistency( - input_entity=self._edge_feat, is_node_entity=False + input_entity=self._edge_feat, is_node_entity=False, is_subset=True ) self.__assert_data_type_consistency( - input_entity=self._edge_ids, is_node_entity=False + input_entity=self._edge_ids, is_node_entity=False, is_subset=False ) + edge_feature_types = sorted(self._edge_feat.keys()) + partitioned_edge_features: Dict[EdgeType, FeaturePartitionData] = {} - for edge_type in self._edge_types: + for edge_type in edge_feature_types: partitioned_edge_features[edge_type] = self.__partition_edge_features( edge_partition_book=transformed_edge_partition_book, edge_type=edge_type ) @@ -1179,39 +1271,51 @@ def partition_edge_features( def partition_labels( self, - node_partition_book: Union[torch.Tensor, Dict[NodeType, torch.Tensor]], + node_partition_book: Union[PartitionBook, Dict[NodeType, PartitionBook]], is_positive: bool, ) -> Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]: """ Partitions positive or negative labels of a graph. If heterogeneous, partitions labels for all edge type. Must call `partition_node` first to get the node partition book as input. Args: - node_partition_book (Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): The computed Node Partition Book + node_partition_book (Union[PartitionBook, Dict[NodeType, PartitionBook]]): The computed Node Partition Book is_positive (bool): Whether positive labels are currently being registered. If False, negative labels will be partitioned. Returns: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]: Returns the edge indices for partitioned positive or negative label, dependent on the is_positive flag """ + + self.__assert_and_get_rpc_setup() + if is_positive: assert ( self._positive_label_edge_index is not None ), "Must register positive labels prior to partitioning them" - logger.info("Partitioning Positive Labels ...") - self.__assert_data_type_consistency( - input_entity=self._positive_label_edge_index, is_node_entity=False + input_entity=self._positive_label_edge_index, + is_node_entity=False, + is_subset=True, ) + + edge_label_types = sorted(self._positive_label_edge_index.keys()) + + logger.info("Partitioning Positive Labels ...") + else: assert ( self._negative_label_edge_index is not None ), "Must register negative labels partitioning them" - logger.info("Partitioning Negative Labels ...") - self.__assert_data_type_consistency( - input_entity=self._negative_label_edge_index, is_node_entity=False + input_entity=self._negative_label_edge_index, + is_node_entity=False, + is_subset=True, ) + edge_label_types = sorted(self._negative_label_edge_index.keys()) + + logger.info("Partitioning Negative Labels ...") + start_time = time.time() transformed_node_partition_book = ( @@ -1221,11 +1325,13 @@ def partition_labels( ) self.__assert_data_type_consistency( - input_entity=transformed_node_partition_book, is_node_entity=True + input_entity=transformed_node_partition_book, + is_node_entity=True, + is_subset=False, ) partitioned_label_edge_index: Dict[EdgeType, torch.Tensor] = {} - for edge_type in self._edge_types: + for edge_type in edge_label_types: partitioned_label_edge_index[edge_type] = self.__partition_label_edge_index( node_partition_book=transformed_node_partition_book, is_positive=is_positive, @@ -1256,6 +1362,11 @@ def partition( PartitionOutput: Reshuffled Outputs of Partitioning """ + self.__assert_and_get_rpc_setup() + + logger.info(f"Rank {self._rank} starting partitioning ...") + start_time = time.time() + # Node partition should happen at the very beginning, as edge partition # and label partition depends on node partition book. node_partition_book = self.partition_node() @@ -1315,6 +1426,10 @@ def partition( else: partitioned_negative_edge_index = None + logger.info( + f"Rank {self._rank} finished partitioning in {time.time() - start_time:.2f} seconds" + ) + return PartitionOutput( node_partition_book=node_partition_book, edge_partition_book=edge_partition_book, diff --git a/python/gigl/distributed/dist_link_prediction_dataset.py b/python/gigl/distributed/dist_link_prediction_dataset.py new file mode 100644 index 0000000..4ac7c88 --- /dev/null +++ b/python/gigl/distributed/dist_link_prediction_dataset.py @@ -0,0 +1,725 @@ +# Originally taken from https://github.com/alibaba/graphlearn-for-pytorch/blob/main/graphlearn_torch/python/distributed/dist_dataset.py + +import gc +import time +from collections import abc +from multiprocessing.reduction import ForkingPickler +from typing import Dict, Literal, Optional, Tuple, Union + +import torch +from graphlearn_torch.data import Feature, Graph +from graphlearn_torch.distributed.dist_dataset import DistDataset +from graphlearn_torch.partition import PartitionBook +from graphlearn_torch.utils import apply_to_all_tensor, id2idx + +from gigl.common.logger import Logger +from gigl.src.common.types.graph_data import ( # TODO (mkolodner-sc): Change to use torch_geometric.typing + EdgeType, + NodeType, +) +from gigl.types.distributed import ( + FeaturePartitionData, + GraphPartitionData, + PartitionOutput, +) +from gigl.utils.data_splitters import NodeAnchorLinkSplitter +from gigl.utils.share_memory import share_memory + +logger = Logger() + + +class DistLinkPredictionDataset(DistDataset): + """ + This class is inherited from GraphLearn-for-PyTorch's DistDataset class. We override the __init__ functionality to support positive and + negative edges and labels. We also override the share_ipc function to correctly serialize these new fields. We additionally introduce + a `build` function for storing the partitioned inside of this class. We assume data in this class is only in the CPU RAM, and do not support + data on GPU memory, thus simplifying the logic and tooling required compared to the base DistDataset class. + """ + + def __init__( + self, + rank: int, + world_size: int, + edge_dir: Literal["in", "out"], + graph_partition: Optional[Union[Graph, Dict[EdgeType, Graph]]] = None, + node_feature_partition: Optional[ + Union[Feature, Dict[NodeType, Feature]] + ] = None, + edge_feature_partition: Optional[ + Union[Feature, Dict[EdgeType, Feature]] + ] = None, + node_partition_book: Optional[ + Union[PartitionBook, Dict[NodeType, PartitionBook]] + ] = None, + edge_partition_book: Optional[ + Union[PartitionBook, Dict[EdgeType, PartitionBook]] + ] = None, + positive_edge_label: Optional[ + Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + ] = None, + negative_edge_label: Optional[ + Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + ] = None, + node_ids: Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]] = None, + num_train: Optional[Union[int, Dict[NodeType, int]]] = None, + num_val: Optional[Union[int, Dict[NodeType, int]]] = None, + num_test: Optional[Union[int, Dict[NodeType, int]]] = None, + ) -> None: + """ + Initializes the fields of the DistLinkPredictionDataset class. This function is called upon each serialization of the DistLinkPredictionDataset instance. + Args: + rank (int): Rank of the current process + world_size (int): World size of the current process + edge_dir (Literal["in", "out"]): Edge direction of the provied graph + The below arguments are only expected to be provided when re-serializing an instance of the DistLinkPredictionDataset class after build() has been called + graph_partition (Optional[Union[Graph, Dict[EdgeType, Graph]]]): Partitioned Graph Data + node_feature_partition (Optional[Union[Feature, Dict[NodeType, Feature]]]): Partitioned Node Feature Data + edge_feature_partition (Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]): Partitioned Edge Feature Data + node_partition_book (Optional[Union[PartitionBook, Dict[NodeType, PartitionBook]]]): Node Partition Book + edge_partition_book (Optional[Union[PartitionBook, Dict[EdgeType, PartitionBook]]]): Edge Partition Book + positive_edge_label (Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]): Positive Edge Label Tensor + negative_edge_label (Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]): Negative Edge Label Tensor + node_ids (Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]]): Node IDs on the current machine + num_train: (Optional[Mapping[NodeType, int]]): Number of training nodes on the current machine. Will be a dict if heterogeneous. + num_val: (Optional[Mapping[NodeType, int]]): Number of validation nodes on the current machine. Will be a dict if heterogeneous. + num_test: (Optional[Mapping[NodeType, int]]): Number of test nodes on the current machine. Will be a dict if heterogeneous. + """ + self._rank: int = rank + self._world_size: int = world_size + self._edge_dir: Literal["in", "out"] = edge_dir + + super().__init__( + num_partitions=world_size, + partition_idx=rank, + graph_partition=graph_partition, + node_feature_partition=node_feature_partition, + edge_feature_partition=edge_feature_partition, + node_pb=node_partition_book, + edge_pb=edge_partition_book, + edge_dir=edge_dir, + ) + self._positive_edge_label: Optional[ + Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + ] = positive_edge_label + self._negative_edge_label: Optional[ + Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + ] = negative_edge_label + + self._node_ids: Optional[ + Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + ] = node_ids + + self._num_train = num_train + self._num_val = num_val + self._num_test = num_test + + # TODO (mkolodner-sc): Modify so that we don't need to rely on GLT's base variable naming (i.e. partition_idx, num_partitions) in favor of more clear + # naming (i.e. rank, world_size). + + @property + def partition_idx(self) -> int: + return self._rank + + @partition_idx.setter + def partition_idx(self, new_partition_idx: int): + self._rank = new_partition_idx + + @property + def num_partitions(self) -> int: + return self._world_size + + @num_partitions.setter + def num_partitions(self, new_num_partitions: int): + self._world_size = new_num_partitions + + @property + def edge_dir(self) -> Literal["in", "out"]: + return self._edge_dir + + @edge_dir.setter + def edge_dir(self, new_edge_dir: Literal["in", "out"]): + self._edge_dir = new_edge_dir + + @property + def graph(self) -> Optional[Union[Graph, Dict[EdgeType, Graph]]]: + return self._graph + + @graph.setter + def graph(self, new_graph: Optional[Union[Graph, Dict[EdgeType, Graph]]]): + self._graph = new_graph + + @property + def node_features(self) -> Optional[Union[Feature, Dict[NodeType, Feature]]]: + """ + During serializiation, the initialized `Feature` type does not immediately contain the feature and id2index tensors. These + fields are initially set to None, and are only populated when we retrieve the size, retrieve the shape, or index into one of these tensors. + This can also be done manually with the feature.lazy_init_with_ipc_handle() function. + """ + return self._node_features + + @node_features.setter + def node_features( + self, new_node_features: Optional[Union[Feature, Dict[NodeType, Feature]]] + ): + self._node_features = new_node_features + + @property + def edge_features(self) -> Optional[Union[Feature, Dict[EdgeType, Feature]]]: + """ + During serializiation, the initialized `Feature` type does not immediately contain the feature and id2index tensors. These + fields are initially set to None, and are only populated when we retrieve the size, retrieve the shape, or index into one of these tensors. + This can also be done manually with the feature.lazy_init_with_ipc_handle() function. + """ + return self._edge_features + + @edge_features.setter + def edge_features( + self, new_edge_features: Optional[Union[Feature, Dict[EdgeType, Feature]]] + ): + self._edge_features = new_edge_features + + @property + def node_pb( + self, + ) -> Optional[Union[PartitionBook, Dict[NodeType, PartitionBook]]]: + return self._node_partition_book + + @node_pb.setter + def node_pb( + self, + new_node_pb: Optional[Union[PartitionBook, Dict[NodeType, PartitionBook]]], + ): + self._node_partition_book = new_node_pb + + @property + def edge_pb( + self, + ) -> Optional[Union[PartitionBook, Dict[EdgeType, PartitionBook]]]: + return self._edge_partition_book + + @edge_pb.setter + def edge_pb( + self, + new_edge_pb: Optional[Union[PartitionBook, Dict[EdgeType, PartitionBook]]], + ): + self._edge_partition_book = new_edge_pb + + @property + def positive_edge_label( + self, + ) -> Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]: + return self._positive_edge_label + + @property + def negative_edge_label( + self, + ) -> Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]: + return self._negative_edge_label + + @property + def node_ids(self) -> Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]]: + return self._node_ids + + @property + def train_node_ids( + self, + ) -> Optional[Union[torch.Tensor, abc.Mapping[NodeType, torch.Tensor]]]: + if self._num_train is None: + return None + elif isinstance(self._num_train, int) and isinstance( + self._node_ids, torch.Tensor + ): + return self._node_ids[: self._num_train] + elif isinstance(self._num_train, abc.Mapping) and isinstance( + self._node_ids, abc.Mapping + ): + node_ids = {} + for node_type, num_train in self._num_train.items(): + node_ids[node_type] = self._node_ids[node_type][:num_train] + return node_ids + else: + raise ValueError( + f"We have num_train as {type(self._num_train)} and node_ids as {type(self._node_ids)}, and don't know how to deal with them! If you are using the constructor make sure all data is either homogeneous or heterogeneous. If you are using `build()` this is likely a bug, please report it." + ) + + @property + def val_node_ids( + self, + ) -> Optional[Union[torch.Tensor, abc.Mapping[NodeType, torch.Tensor]]]: + if self._num_val is None: + return None + if self._num_train is None: + raise ValueError( + "num_train must be set if num_val is set. If you are using the constructor make sure all data is either homogeneous or heterogeneous. If you are using `build()` this is likely a bug, please report it." + ) + elif ( + isinstance(self._num_train, int) + and isinstance(self._num_val, int) + and isinstance(self._node_ids, torch.Tensor) + ): + idx = slice(self._num_train, self._num_train + self._num_val) + return self._node_ids[idx] + elif ( + isinstance(self._num_train, abc.Mapping) + and isinstance(self._num_val, abc.Mapping) + and isinstance(self._node_ids, abc.Mapping) + ): + node_ids = {} + for node_type, num_val in self._num_val.items(): + idx = slice( + self._num_train[node_type], self._num_train[node_type] + num_val + ) + node_ids[node_type] = self._node_ids[node_type][idx] + return node_ids + else: + raise ValueError( + f"We have num_val as {type(self._num_val)} and node_ids as {type(self._node_ids)}, and don't know how to deal with them! If you are using the constructor make sure all data is either homogeneous or heterogeneous. If you are using `build()` this is likely a bug, please report it." + ) + + @property + def test_node_ids( + self, + ) -> Optional[Union[torch.Tensor, abc.Mapping[NodeType, torch.Tensor]]]: + if self._num_test is None: + return None + if self._num_train is None or self._num_val is None: + raise ValueError( + "num_train and num_val must be set if num_test is set. If you are using the constructor make sure all data is either homogeneous or heterogeneous. If you are using `build()` this is likely a bug, please report it." + ) + elif ( + isinstance(self._num_train, int) + and isinstance(self._num_val, int) + and isinstance(self._num_test, int) + and isinstance(self._node_ids, torch.Tensor) + ): + idx = slice( + self._num_train + self._num_val, + self._num_train + self._num_val + self._num_test, + ) + return self._node_ids[idx] + elif ( + isinstance(self._num_train, abc.Mapping) + and isinstance(self._num_val, abc.Mapping) + and isinstance(self._num_test, abc.Mapping) + and isinstance(self._node_ids, abc.Mapping) + ): + node_ids = {} + for node_type, num_test in self._num_test.items(): + idx = slice( + self._num_train[node_type] + self._num_val[node_type], + self._num_train[node_type] + self._num_val[node_type] + num_test, + ) + node_ids[node_type] = self._node_ids[node_type][idx] + return node_ids + else: + raise ValueError( + f"We have num_val as {type(self._num_val)} and node_ids as {type(self._node_ids)}, and don't know how to deal with them! If you are using the constructor make sure all data is either homogeneous or heterogeneous. If you are using `build()` this is likely a bug, please report it." + ) + + def load(self, *args, **kwargs): + raise NotImplementedError( + f"load() is not supported for the {type(self)} class. Please use build() instead." + ) + + def build( + self, + partition_output: PartitionOutput, + splitter: Optional[NodeAnchorLinkSplitter] = None, + ) -> None: + """ + Provided some partition graph information, this method stores these tensors inside of the class for + subsequent live subgraph sampling using a GraphLearn-for-PyTorch NeighborLoader. + + Note that this method will clear the following fields from the provided partition_output: + * `partitioned_edge_index` + * `partitioned_node_features` + * `partitioned_edge_features` + We do this to decrease the peak memory usage during the build process by removing these intermediate assets. + + Args: + partition_output (PartitionOutput): Partitioned Graph to be stored in the DistLinkPredictionDataset class + splitter (Optional[NodeAnchorLinkSplitter]): A function that takes in an edge index and returns: + * a tuple of train, val, and test node ids, if heterogeneous + * a dict[NodeType, tuple[train, val, test]] of node ids, if homogeneous + Optional as not all datasets need to be split on, e.g. if we're doing inference. + """ + + logger.info( + f"Rank {self._rank} starting building dataset class from partitioned graph ..." + ) + + start_time = time.time() + + self._node_partition_book = partition_output.node_partition_book + self._edge_partition_book = partition_output.edge_partition_book + + partitioned_edge_index: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + partitioned_edge_ids: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + partitioned_node_features: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + partitioned_node_feature_ids: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + partitioned_edge_features: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + partitioned_edge_feature_ids: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + + # Homogeneous Case + if isinstance(partition_output.partitioned_edge_index, GraphPartitionData): + # Edge Index refers to the [2, num_edges] tensor representing pairs of nodes connecting each edge + # Edge IDs refers to the [num_edges] tensor representing the unique integer assigned to each edge + partitioned_edge_index = partition_output.partitioned_edge_index.edge_index + partitioned_edge_ids = partition_output.partitioned_edge_index.edge_ids + if partition_output.partitioned_node_features is not None: + assert isinstance( + partition_output.partitioned_node_features, FeaturePartitionData + ) + partitioned_node_features = ( + partition_output.partitioned_node_features.feats + ) + partitioned_node_feature_ids = ( + partition_output.partitioned_node_features.ids + ) + if partition_output.partitioned_edge_features is not None: + assert isinstance( + partition_output.partitioned_edge_features, FeaturePartitionData + ) + partitioned_edge_features = ( + partition_output.partitioned_edge_features.feats + ) + partitioned_edge_feature_ids = ( + partition_output.partitioned_edge_features.ids + ) + # Heterogeneous Case + else: + assert isinstance(partition_output.partitioned_edge_index, abc.Mapping) + # Edge Index refers to the [2, num_edges] tensor representing pairs of nodes connecting each edge + # Edge IDs refers to the [num_edges] tensor representing the unique integer assigned to each edge + partitioned_edge_index = { + edge_type: graph_partition_data.edge_index + for edge_type, graph_partition_data in partition_output.partitioned_edge_index.items() + } + partitioned_edge_ids = { + edge_type: graph_partition_data.edge_ids + for edge_type, graph_partition_data in partition_output.partitioned_edge_index.items() + } + if partition_output.partitioned_node_features is not None: + assert isinstance( + partition_output.partitioned_node_features, abc.Mapping + ) + partitioned_node_features = { + node_type: feature_partition_data.feats + for node_type, feature_partition_data in partition_output.partitioned_node_features.items() + } + partitioned_node_feature_ids = { + node_type: feature_partition_data.ids + for node_type, feature_partition_data in partition_output.partitioned_node_features.items() + } + + if partition_output.partitioned_edge_features is not None: + assert isinstance( + partition_output.partitioned_edge_features, abc.Mapping + ) + partitioned_edge_features = { + edge_type: feature_partition_data.feats + for edge_type, feature_partition_data in partition_output.partitioned_edge_features.items() + } + partitioned_edge_feature_ids = { + edge_type: feature_partition_data.ids + for edge_type, feature_partition_data in partition_output.partitioned_edge_features.items() + } + + if splitter is not None: + split_start = time.time() + logger.info("Starting splitting edges...") + splits = splitter(edge_index=partitioned_edge_index) + logger.info( + f"Finished splitting edges in {time.time() - split_start:.2f} seconds." + ) + else: + splits = None + # TODO (mkolodner-sc): Enable custom params for init_graph, init_node_features, and init_edge_features + + self.init_graph( + edge_index=partitioned_edge_index, + edge_ids=partitioned_edge_ids, + graph_mode="CPU", + directed=True, + ) + + partition_output.partitioned_edge_index = None + del ( + partitioned_edge_index, + partitioned_edge_ids, + ) + gc.collect() + + # We compute the node ids on the current machine, which will be used as input to the DistNeighborLoader. + # If the nodes were split, then we set the total number of nodes in each split here. + # Additionally, we append any node ids, for a given node type, that were *not* split to the end of "node ids" + # so that all node ids on a given machine are included in the dataset. + # This is done with `_append_non_split_node_ids`. + # An example here is if we have: + # train_nodes: [1, 2, 3] + # val_nodes: [3, 4] # Note dupes are ok! + # test_nodes: [5, 6] + # node_ids_on_machine: [0, 1, 2, 3, 4, 5, 6, 7, 8] + # We would then append [7, 8] as they are not in any split. + # We do all of this as if a user provides labels, they may be for some subset of edges + # on a given machine, but we still want to store all node ids for the given machine. + # TODO(kmonte): We may not need to store all node ids (either for all types - if we split, or the "extras" as described above). + # Look into this and see if we can remove this. + + # For tensor based partitioning, the partition_book will be a torch.Tensor under-the-hood. We need to check if this is a torch.Tensor + # here, as it will not be recognized by `isinstance` as a `PartitionBook` since torch.Tensor doesn't directly inherit from `PartitionBook`. + if isinstance(self._node_partition_book, torch.Tensor): + node_ids_on_machine = torch.nonzero( + self._node_partition_book == self._rank + ).squeeze() + if splits is not None: + logger.info("Using node ids that we got from the splitter.") + if not isinstance(splits, tuple): + if len(splits) == 1: + logger.warning( + f"Got splits as a mapping, which is intended for heterogeneous graphs. We recieved the node types: {splits.keys()}. Since we only got one key, we will use it as the node type." + ) + train_nodes, val_nodes, test_nodes = next(iter(splits.values())) + else: + raise ValueError( + f"Got splits as a mapping, which is intended for heterogeneous graphs. We recieved the node types: {splits.keys()}. Please use a splitter that returns a tuple of tensors." + ) + else: + train_nodes, val_nodes, test_nodes = splits + self._num_train = train_nodes.numel() + self._num_val = val_nodes.numel() + self._num_test = test_nodes.numel() + self._node_ids = _append_non_split_node_ids( + train_nodes, val_nodes, test_nodes, node_ids_on_machine + ) + # do gc to save memory. + del train_nodes, val_nodes, test_nodes, node_ids_on_machine + gc.collect() + else: + logger.info( + "Node ids will be all nodes on this machine, derived from the partition book." + ) + self._node_ids = node_ids_on_machine + + # For range-based partitioning, the partition book will be a `RangePartitionBook` under-the-hood, which subclasses `PartitionBook`, + # so we can check if its a `PartitionBook` instance this time. + elif isinstance(self._node_partition_book, PartitionBook): + raise NotImplementedError( + "TODO(mkolodner-sc): Implement range based partitioning" + ) + else: + # TODO (mkolodner-sc): Support heterogeneous range-based partitioning + node_ids_by_node_type: dict[NodeType, torch.Tensor] = {} + num_train_by_node_type: dict[NodeType, int] = {} + num_val_by_node_type: dict[NodeType, int] = {} + num_test_by_node_type: dict[NodeType, int] = {} + if splits is not None and isinstance(splits, tuple): + raise ValueError( + f"Got splits as a tuple, which is intended for homogeneous graphs. We recieved the node types: {self._node_partition_book.keys()}. Please use a splitter that returns a mapping of tensors." + ) + for node_type, node_partition_book in self._node_partition_book.items(): + node_ids_on_machine = torch.nonzero( + node_partition_book == self._rank + ).squeeze() + if splits is None or node_type not in splits: + logger.info(f"Did not split for node type {node_type}.") + node_ids_by_node_type[node_type] = node_ids_on_machine + elif splits is not None: + logger.info( + f"Using node ids that we got from the splitter for node type {node_type}." + ) + train_nodes, val_nodes, test_nodes = splits[node_type] + num_train_by_node_type[node_type] = train_nodes.numel() + num_val_by_node_type[node_type] = val_nodes.numel() + num_test_by_node_type[node_type] = test_nodes.numel() + node_ids_by_node_type[node_type] = _append_non_split_node_ids( + train_nodes, val_nodes, test_nodes, node_ids_on_machine + ) + # do gc to save memory. + del train_nodes, val_nodes, test_nodes, node_ids_on_machine + gc.collect() + else: + raise ValueError(f"We should not get here, whoops!") + self._node_ids = node_ids_by_node_type + self._num_train = num_train_by_node_type + self._num_val = num_val_by_node_type + self._num_test = num_test_by_node_type + + if partition_output.partitioned_node_features is not None: + self.init_node_features( + node_feature_data=partitioned_node_features, + id2idx=apply_to_all_tensor(partitioned_node_feature_ids, id2idx), + with_gpu=False, + ) + partition_output.partitioned_node_features = None + del ( + partitioned_node_features, + partitioned_node_feature_ids, + ) + gc.collect() + + if partition_output.partitioned_edge_features is not None: + self.init_edge_features( + edge_feature_data=partitioned_edge_features, + id2idx=apply_to_all_tensor(partitioned_edge_feature_ids, id2idx), + with_gpu=False, + ) + + partition_output.partitioned_edge_features = None + del ( + partitioned_edge_features, + partitioned_edge_feature_ids, + ) + gc.collect() + + self._positive_edge_label = partition_output.partitioned_positive_labels + self._negative_edge_label = partition_output.partitioned_negative_labels + + logger.info( + f"Rank {self._rank} finished building dataset class from partitioned graph in {time.time() - start_time:.2f} seconds. Waiting for other ranks to finish ..." + ) + + def share_ipc( + self, + ) -> Tuple[ + int, + int, + Literal["in", "out"], + Optional[Union[Graph, Dict[EdgeType, Graph]]], + Optional[Union[Feature, Dict[NodeType, Feature]]], + Optional[Union[Feature, Dict[EdgeType, Feature]]], + Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]], + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]], + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]], + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]], + Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]], + Optional[Union[int, Dict[NodeType, int]]], + Optional[Union[int, Dict[NodeType, int]]], + Optional[Union[int, Dict[NodeType, int]]], + ]: + """ + Serializes the member variables of the DistLinkPredictionDatasetClass + Returns: + int: Rank on current machine + int: World size across all machines + Literal["in", "out"]: Graph Edge Direction + Optional[Union[Graph, Dict[EdgeType, Graph]]]: Partitioned Graph Data + Optional[Union[Feature, Dict[NodeType, Feature]]]: Partitioned Node Feature Data + Optional[Union[Feature, Dict[EdgeType, Feature]]]: Partitioned Edge Feature Data + Optional[Union[torch.Tensor, Dict[NodeType, torch.Tensor]]]: Node Partition Book Tensor + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]: Edge Partition Book Tensor + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]: Positive Edge Label Tensor + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]]: Negative Edge Label Tensor + Optional[Union[int, Dict[NodeType, int]]]: Number of training nodes on the current machine. Will be a dict if heterogeneous. + Optional[Union[int, Dict[NodeType, int]]]: Number of validation nodes on the current machine. Will be a dict if heterogeneous. + Optional[Union[int, Dict[NodeType, int]]]: Number of test nodes on the current machine. Will be a dict if heterogeneous. + """ + # TODO (mkolodner-sc): Investigate moving share_memory calls to the build() function + + share_memory(entity=self._node_partition_book) + share_memory(entity=self._edge_partition_book) + share_memory(entity=self._positive_edge_label) + share_memory(entity=self._negative_edge_label) + share_memory(entity=self._node_ids) + ipc_handle = ( + self._rank, + self._world_size, + self._edge_dir, + self._graph, + self._node_features, + self._edge_features, + self._node_partition_book, + self._edge_partition_book, + self._positive_edge_label, # Additional field unique to DistLinkPredictionDataset class + self._negative_edge_label, # Additional field unique to DistLinkPredictionDataset class + self._node_ids, # Additional field unique to DistLinkPredictionDataset class + self._num_train, # Additional field unique to DistLinkPredictionDataset class + self._num_val, # Additional field unique to DistLinkPredictionDataset class + self._num_test, # Additional field unique to DistLinkPredictionDataset class + ) + return ipc_handle + + +def _append_non_split_node_ids( + train_node_ids: torch.Tensor, + val_node_ids: torch.Tensor, + test_node_ids: torch.Tensor, + node_ids_on_machine: torch.Tensor, +) -> torch.Tensor: + """Given some node ids that that are in splits, and the node ids on a machine, concats the node ids on the machine that were not in a split onto the splits. + + Ex: _append_non_split_node_ids([2], [3], [4], [0, 1, 2, 3, 4, 5, 6]) -> [2, 3, 4, 0, 1, 5, 6] + """ + # Do this as the splits may be empty, and without it we see errors like: + # RuntimeError: max(): Expected reduction dim to be specified for input.numel() == 0. Specify the reduction dim with the 'dim' argument. + node_ids_to_get_max = [node_ids_on_machine] + if train_node_ids.numel(): + node_ids_to_get_max.append(train_node_ids) + if val_node_ids.numel(): + node_ids_to_get_max.append(val_node_ids) + if test_node_ids.numel(): + node_ids_to_get_max.append(test_node_ids) + max_node_id = int(max(n.max().item() for n in node_ids_to_get_max)) + 1 + split_counts = torch.bincount(train_node_ids, minlength=max_node_id) + split_counts.add_(torch.bincount(val_node_ids, minlength=max_node_id)) + split_counts.add_(torch.bincount(test_node_ids, minlength=max_node_id)) + # Count all instances of node ids, then subtract the counts of the node ids in the split from the ones in the machines. + # Since splits are not guaranteed to be unique, we check where the count is greater than zero. + node_id_indices_not_in_split = ( + torch.bincount(node_ids_on_machine, minlength=max_node_id).sub_(split_counts) + > 0 + ) + # Then convert the indices to the original node ids + node_ids_not_in_split = torch.nonzero(node_id_indices_not_in_split).squeeze(dim=1) + logger.info( + f"We found {node_ids_not_in_split.numel()} nodes that are not in the split." + ) + if node_ids_not_in_split.numel() == 0: + logger.info("Found no nodes that are not in the splits.") + return torch.cat([train_node_ids, val_node_ids, test_node_ids]) + else: + return torch.cat( + [train_node_ids, val_node_ids, test_node_ids, node_ids_not_in_split] + ) + + +## Pickling Registration +# The serialization function (share_ipc) first pushes all member variable tensors +# to the shared memory, and then packages all references to the tensors in one ipc +# handle and sends the handle to another process. The deserialization function +# (from_ipc_handle) calls the class constructor with the ipc_handle. Therefore, the +# order of variables in the ipc_handle needs to be the same with the constructor +# interface. + +# Since we add the self.positive_label and self.negative_label fields to the dataset class and remove several unused fields for link prediction task +# and cpu-only sampling, we override the `share_ipc` function to handle our custom member variables. + + +def _rebuild_dist_link_prediction_dataset( + ipc_handle: Tuple[ + int, + int, + Literal["in", "out"], + Optional[Union[Graph, Dict[EdgeType, Graph]]], + Optional[Union[Feature, Dict[NodeType, Feature]]], + Optional[Union[Feature, Dict[EdgeType, Feature]]], + Optional[Union[PartitionBook, Dict[NodeType, PartitionBook]]], + Optional[Union[PartitionBook, Dict[EdgeType, PartitionBook]]], + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]], + Optional[Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]], + Optional[int], + Optional[int], + Optional[int], + Optional[NodeType], + ] +): + dataset = DistLinkPredictionDataset.from_ipc_handle(ipc_handle) + return dataset + + +def _reduce_dist_link_prediction_dataset(dataset: DistLinkPredictionDataset): + ipc_handle = dataset.share_ipc() + return (_rebuild_dist_link_prediction_dataset, (ipc_handle,)) + + +ForkingPickler.register(DistLinkPredictionDataset, _reduce_dist_link_prediction_dataset) diff --git a/python/gigl/distributed/distributed_neighborloader.py b/python/gigl/distributed/distributed_neighborloader.py new file mode 100644 index 0000000..511f134 --- /dev/null +++ b/python/gigl/distributed/distributed_neighborloader.py @@ -0,0 +1,216 @@ +from collections import abc +from typing import Dict, List, Optional, Tuple, Union + +import graphlearn_torch +import torch +from torch_geometric.typing import EdgeType + +import gigl.distributed.utils +from gigl.common.logger import Logger +from gigl.distributed import DistributedContext +from gigl.distributed.constants import ( + DEFAULT_MASTER_INFERENCE_PORT, + DEFAULT_MASTER_SAMPLING_PORT, +) +from gigl.distributed.dist_link_prediction_dataset import DistLinkPredictionDataset +from gigl.src.common.types.graph_data import ( + NodeType, # TODO (mkolodner-sc): Change to use torch_geometric.typing +) + +logger = Logger() + +# When using CPU based inference/training, we default cpu threads for neighborloading on top of the per process parallelism. +DEFAULT_NUM_CPU_THREADS = 2 + + +class DistNeighborLoader(graphlearn_torch.distributed.DistNeighborLoader): + def __init__( + self, + dataset: DistLinkPredictionDataset, + num_neighbors: Union[List[int], Dict[EdgeType, List[int]]], + context: DistributedContext, + local_process_rank: int, # TODO: Move this to DistributedContext + local_process_world_size: int, # TODO: Move this to DistributedContext + input_nodes: Optional[ + Union[torch.Tensor, Tuple[NodeType, torch.Tensor]] + ] = None, + num_workers: int = 1, + batch_size: int = 1, + pin_memory_device: Optional[torch.device] = None, + worker_concurrency: int = 4, + channel_size: str = "4GB", + process_start_gap_seconds: int = 60, + num_cpu_threads: Optional[int] = None, + _main_inference_port: int = DEFAULT_MASTER_INFERENCE_PORT, + _main_sampling_port: int = DEFAULT_MASTER_SAMPLING_PORT, + ): + """ + Note: We try to adhere to pyg dataloader api as much as possible. + See the following for reference: + https://pytorch-geometric.readthedocs.io/en/2.5.2/_modules/torch_geometric/loader/node_loader.html#NodeLoader + https://pytorch-geometric.readthedocs.io/en/2.5.2/_modules/torch_geometric/distributed/dist_neighbor_loader.html#DistNeighborLoader + + Args: + dataset (DistLinkPredictionDataset): The dataset to sample from. + num_neighbors (List[int] or Dict[Tuple[str, str, str], List[int]]): + The number of neighbors to sample for each node in each iteration. + If an entry is set to `-1`, all neighbors will be included. + In heterogeneous graphs, may also take in a dictionary denoting + the amount of neighbors to sample for each individual edge type. + context (DistributedContext): Distributed context information of the current process. + local_process_rank (int): The local rank of the current process within a node. + local_process_world_size (int): The total number of processes within a node. + input_nodes (torch.Tensor or Tuple[str, torch.Tensor]): The + indices of seed nodes to start sampling from. + It is of type `torch.LongTensor` for homogeneous graphs. + If set to `None` for homogeneous settings, all nodes will be considered. + In heterogeneous graphs, this flag must be passed in as a tuple that holds + the node type and node indices. (default: `None`) + num_workers (int): How many workers to use (subprocesses to spwan) for + distributed neighbor sampling of the current process. (default: ``1``). + batch_size (int, optional): how many samples per batch to load + (default: ``1``). + pin_memory_device (str, optional): The target device that the sampled + results should be copied to. If set to ``None``, the device is inferred based off of + (got by ``gigl.distributed.utils.device.get_available_device``). Which uses the + local_process_rank and torch.cuda.device_count() to assign the device. If cuda is not available, + the cpu device will be used. (default: ``None``). + worker_concurrency (int): The max sampling concurrency for each sampling + worker. Load testing has showed that setting worker_concurrency to 4 yields the best performance + for sampling. Although, you may whish to explore higher/lower settings when performance tuning. + (default: `4`). + channel_size (int or str): The shared-memory buffer size (bytes) allocated + for the channel. Can be modified for performance tuning; a good starting point is: ``num_workers * 64MB`` + (default: "4GB"). + process_start_gap_seconds (float): Delay between each process for initializing neighbor loader. At large scales, + it is recommended to set this value to be between 60 and 120 seconds -- otherwise multiple processes may + attempt to initialize dataloaders at overlapping times, which can cause CPU memory OOM. + num_cpu_threads (Optional[int]): Number of cpu threads PyTorch should use for CPU training/inference + neighbor loading; on top of the per process parallelism. + Defaults to `2` if set to `None` when using cpu training/inference. + _main_inference_port (int): WARNING: You don't need to configure this unless port conflict issues. Slotted for refactor. + The port number to use for inference processes. + In future, the port will be automatically assigned based on availability. + Currently defaults to: gigl.distributed.constants.DEFAULT_MASTER_INFERENCE_PORT + _main_sampling_port (int): WARNING: You don't need to configure this unless port conflict issues. Slotted for refactor. + The port number to use for sampling processes. + In future, the port will be automatically assigned based on availability. + Currently defaults to: gigl.distributed.constants.DEFAULT_MASTER_SAMPLING_PORT + """ + + if input_nodes is None: + if dataset.node_ids is None: + raise ValueError( + "Dataset must have node ids if input_nodes are not provided." + ) + if isinstance(dataset.node_ids, abc.Mapping): + raise ValueError( + f"input_nodes must be provided for heterogeneous datasets, received node_ids of type: {dataset.node_ids.keys()}" + ) + input_nodes = dataset.node_ids + + if isinstance(num_neighbors, abc.Mapping): + # TODO(kmonte): We should enable this. We have two blockers: + # 1. We need to treat `EdgeType` as a proper tuple, not the GiGL`EdgeType`. + # 2. There are (likely) some GLT bugs around https://github.com/alibaba/graphlearn-for-pytorch/blob/26fe3d4e050b081bc51a79dc9547f244f5d314da/graphlearn_torch/python/distributed/dist_neighbor_sampler.py#L317-L318 + # Where if num_neighbors is a dict then we index into it improperly. + raise ValueError( + f"num_neighbors must be a list of integers, received: {num_neighbors}" + ) + curr_process_nodes = _shard_nodes_by_process( + input_nodes=input_nodes, + local_process_rank=local_process_rank, + local_process_world_size=local_process_world_size, + ) + device = ( + pin_memory_device + if pin_memory_device + else gigl.distributed.utils.get_available_device( + local_process_rank=local_process_rank + ) + ) + # Sets up processes and torch device for initializing the GLT DistNeighborLoader, setting up RPC and worker groups to minimize + # the memory overhead and CPU contention. + logger.info( + f"Initializing neighbor loader worker in process: {local_process_rank}/{local_process_world_size} using device: {device}" + ) + should_use_cpu_workers = device.type == "cpu" + if should_use_cpu_workers and num_cpu_threads is None: + logger.info( + "Using CPU workers, but found num_cpu_threads to be None. " + f"Will default setting num_cpu_threads to {DEFAULT_NUM_CPU_THREADS}." + ) + num_cpu_threads = DEFAULT_NUM_CPU_THREADS + gigl.distributed.utils.init_neighbor_loader_worker( + master_ip_address=context.main_worker_ip_address, + local_process_rank=local_process_rank, + local_process_world_size=local_process_world_size, + rank=context.global_rank, + world_size=context.global_world_size, + master_worker_port=_main_inference_port, + device=device, + should_use_cpu_workers=should_use_cpu_workers, + # Lever to explore tuning for CPU based inference + num_cpu_threads=num_cpu_threads, + process_start_gap_seconds=process_start_gap_seconds, + ) + logger.info( + f"Finished initializing neighbor loader worker: {local_process_rank}/{local_process_world_size}" + ) + + # Sets up worker options for the dataloader + worker_options = graphlearn_torch.distributed.MpDistSamplingWorkerOptions( + num_workers=num_workers, + worker_devices=[torch.device("cpu") for _ in range(num_workers)], + worker_concurrency=worker_concurrency, + # Each worker will spawn several sampling workers, and all sampling workers spawned by workers in one group + # need to be connected. Thus, we need master ip address and master port to + # initate the connection. + # Note that different groups of workers are independent, and thus + # the sampling processes in different groups should be independent, and should + # use different master ports. + master_addr=context.main_worker_ip_address, + master_port=_main_sampling_port + local_process_rank, + # Load testing show that when num_rpc_threads exceed 16, the performance + # will degrade. + num_rpc_threads=min(dataset.num_partitions, 16), + rpc_timeout=600, + channel_size=channel_size, + pin_memory=device.type == "cuda", + ) + + super().__init__( + data=dataset, + num_neighbors=num_neighbors, + input_nodes=curr_process_nodes, + batch_size=batch_size, + with_edge=True, + edge_dir=dataset.edge_dir, + collect_features=True, + to_device=device, + worker_options=worker_options, + ) + + +def _shard_nodes_by_process( + input_nodes: Union[torch.Tensor, Tuple[str, torch.Tensor]], + local_process_rank: int, + local_process_world_size: int, +) -> Union[torch.Tensor, Tuple[str, torch.Tensor]]: + def shard(nodes: torch.Tensor) -> torch.Tensor: + num_node_ids_per_process = nodes.size(0) // local_process_world_size + start_index = local_process_rank * num_node_ids_per_process + end_index = ( + nodes.size(0) + if local_process_rank == local_process_world_size - 1 + else start_index + num_node_ids_per_process + ) + nodes_for_current_process = nodes[start_index:end_index] + return nodes_for_current_process + + if isinstance(input_nodes, torch.Tensor): + return shard(input_nodes) + else: + node_type, node_ids = input_nodes + node_ids = shard(node_ids) + return (node_type, node_ids) diff --git a/python/gigl/distributed/utils/__init__.py b/python/gigl/distributed/utils/__init__.py index e69de29..7c3aca5 100644 --- a/python/gigl/distributed/utils/__init__.py +++ b/python/gigl/distributed/utils/__init__.py @@ -0,0 +1,5 @@ +from .device import get_available_device +from .init_neighbor_loader_worker import ( + get_process_group_name, + init_neighbor_loader_worker, +) diff --git a/python/gigl/distributed/utils/device.py b/python/gigl/distributed/utils/device.py new file mode 100644 index 0000000..fb8a03b --- /dev/null +++ b/python/gigl/distributed/utils/device.py @@ -0,0 +1,19 @@ +import torch + + +def get_available_device(local_process_rank: int) -> torch.device: + r"""Returns the available device for the current process. + + Args: + local_process_rank (int): The local rank of the current process within a node. + Returns: + torch.device: The device to use. + """ + device = torch.device( + "cpu" + if not torch.cuda.is_available() + # If the number of processes are larger than the available GPU, + # we assign each process to one GPU in a round robin manner. + else f"cuda:{local_process_rank % torch.cuda.device_count()}" + ) + return device diff --git a/python/gigl/distributed/utils/init_neighbor_loader_worker.py b/python/gigl/distributed/utils/init_neighbor_loader_worker.py new file mode 100644 index 0000000..b26482e --- /dev/null +++ b/python/gigl/distributed/utils/init_neighbor_loader_worker.py @@ -0,0 +1,175 @@ +import time +from functools import lru_cache +from typing import Optional + +import psutil +import torch +from graphlearn_torch.distributed import init_rpc, init_worker_group + +from gigl.common.logger import Logger + +logger = Logger() + + +def get_process_group_name(process_rank: int) -> str: + """ + Returns the name of the process group for the given process rank. + Args: + process_rank (int): The rank of the process. + Returns: + str: The name of the process group. + """ + return f"distributed-process-{process_rank}" + + +# torch.set_num_interop_threads() can only be called once, otherwise we see: +# RuntimeError: Error: cannot set number of interop threads after parallel work has started or set_num_interop_threads called +# Since we don't need to re-setup the identical worker pools, etc, we can just "cache" this call. +# That way the "side-effects" of the call are only executed once. +@lru_cache(maxsize=1) +def init_neighbor_loader_worker( + master_ip_address: str, + local_process_rank: int, + local_process_world_size: int, + rank: int, + world_size: int, + master_worker_port: int, + device: torch.device, + should_use_cpu_workers: bool = False, + num_cpu_threads: Optional[int] = None, + process_start_gap_seconds: float = 60.0, +) -> None: + """ + Sets up processes and torch device for initializing the GLT DistNeighborLoader, setting up RPC and worker groups to minimize + the memory overhead and CPU contention. Returns the torch device which current worker is assigned to. + Args: + master_ip_address (str): Master IP Address to manage processes + local_process_rank (int): Process number on the current machine + local_process_world_size (int): Total number of processes on the current machine + rank (int): Rank of current machine + world_size (int): Total number of machines + master_worker_port (int): Master port to use for communicating between workers during training or inference + device (torch.device): The device where you want to load the data onto - i.e. where is your model? + should_use_cpu_workers (bool): Whether we should do CPU training or inference. + num_cpu_threads (Optional[int]): Number of cpu threads PyTorch should use for CPU training or inference. + Must be set if should_use_cpu_workers is True. + process_start_gap_seconds (float): Delay between each process for initializing neighbor loader. At large scales, it is recommended to set + this value to be between 60 and 120 seconds -- otherwise multiple processes may attempt to initialize dataloaders at overlapping timesß, + which can cause CPU memory OOM. + Returns: + torch.device: Device which current worker is assigned to + """ + + # When initiating data loader(s), there will be a spike of memory usage lasting for ~30s. + # The current hypothesis is making connections across machines require a lot of memory. + # If we start all data loaders in all processes simultaneously, the spike of memory + # usage will add up and cause CPU memory OOM. Hence, we initiate the data loaders group by group + # to smooth the memory usage. The definition of group is discussed below. + logger.info( + f"---Machine {rank} local process number {local_process_rank} preparing to sleep for {process_start_gap_seconds * local_process_rank} seconds" + ) + time.sleep(process_start_gap_seconds * local_process_rank) + logger.info(f"---Machine {rank} local process number {local_process_rank} started") + if not should_use_cpu_workers: + assert ( + torch.cuda.device_count() > 0 + ), f"Must have at least 1 GPU available for GPU Training or inference, got {torch.cuda.device_count()}" + + if should_use_cpu_workers: + assert ( + num_cpu_threads is not None + ), "Must provide number of cpu threads when using cpu workers" + # Assign processes to disjoint physical cores. Since training or inference is computation + # bound instead of I/O bound, logical core segmentation is not enough, as two + # hyperthreads on the same physical core could still compete for resources. + + # Compute the range of physical cores the process should run on. + total_physical_cores = psutil.cpu_count(logical=False) + physical_cores_per_process = total_physical_cores // local_process_world_size + start_physical_core = local_process_rank * physical_cores_per_process + end_physical_core = ( + total_physical_cores + if local_process_rank == local_process_world_size - 1 + else start_physical_core + physical_cores_per_process + ) + + # Essentially we could only specify the logical cores the process should run + # on, so we have to map physical cores to logical cores. For GCP machines, + # logical cores are assigned to physical cores in a round robin manner, i.e., + # if there are 4 physical cores, logical cores 0, 1, 2, 3, will be assigned + # to physical cores 0, 1, 2, 3. Logical core 4 will be assigned to physical + # core 0, logical core 5 will be assigned to physical core 1, etc. However, + # this mapping does not always hold. Some VM assigns logical cores 0 and 1 to + # physical core 0, and assigns logical cores 2, 3 to physical core 1. We could + # to check it by running `lscpu -p` command in the terminal. + first_logical_core_range = list(range(start_physical_core, end_physical_core)) + second_logical_core_range = list( + range( + start_physical_core + total_physical_cores, + end_physical_core + total_physical_cores, + ) + ) + logical_cores = first_logical_core_range + second_logical_core_range + + # Set the logical cpu cores the current process shoud run on. Note + # that the sampling process spawned by the process will inherit + # this setting, meaning that sampling process will run on the same group + # of logical cores. However, the sampling process is network bound so + # it may not heavily compete resouce with model training or inference. + p = psutil.Process() + p.cpu_affinity(logical_cores) + + torch.set_num_threads(num_cpu_threads) + torch.set_num_interop_threads(num_cpu_threads) + else: + # Setting the default CUDA device for the current process to be the + # device. Without it, there will be a process created on cuda:0 device, and + # another process created on the device. Consequently, there will be + # more processes running on cuda:0 than other cuda devices. The processes on + # cuda:0 will compete for memory and could cause CUDA OOM. + torch.cuda.set_device(device) + torch.cuda.empty_cache() + logger.info( + f"Machine {rank} local rank {local_process_rank} uses device {torch.cuda.current_device()} by default" + ) + + # Group of workers. Each process is a worker. Each + # worker will initiate one model and at least one data loader. Each data loader + # will spawn several sampling processes (a.k.a. sampling workers). + # Instead of combining all workers into one group, we define N groups where + # N is the number of processes on each machine. Specifically, we have + # Group 0: (Machine 0, process 0), (Machine 1, process 0),..., (Machine M, process 0) + # Group 1: (Machine 0, process 1), (Machine 1, process 1),..., (Machine M, process 1) + # ... + # Group N-1: (Machine 0, process N-1), (Machine 1, process N-1),..., (Machine M, process N-1) + # We do this as we want to start different groups in different times to smooth + # the spike of memory usage as mentioned above. + + group_name = get_process_group_name(local_process_rank) + logger.info( + f"Init worker group with: world_size={world_size}, rank={rank}, group_name={group_name}, " + ) + init_worker_group( + world_size=world_size, + rank=rank, + group_name=group_name, + ) + + # Initialize the communication channel across all workers in one group, so + # that we could add barrier and wait all workers to finish before quitting. + # Note that all sampling workers across all processeses in one group need to + # be connected for graph sampling. Thus, a worker needs to wait others even + # if it finishes, as quiting process will shutdown the correpsonding sampling + # workers, and break the connection with other sampling workers. + # Note that different process groups are independent of each other. Therefore, + # they have to use different master ports. + logger.info( + f"Initing worker group with: world_size={world_size}, rank={rank}, group_name={group_name}, " + ) + init_rpc( + master_addr=master_ip_address, + master_port=master_worker_port + local_process_rank, + rpc_timeout=600, + ) + + logger.info(f"Group {group_name} with rpc is initiated") diff --git a/python/gigl/distributed/utils/partition_book.py b/python/gigl/distributed/utils/partition_book.py new file mode 100644 index 0000000..ba11f9f --- /dev/null +++ b/python/gigl/distributed/utils/partition_book.py @@ -0,0 +1,35 @@ +from typing import Union + +import torch +from graphlearn_torch.partition import PartitionBook, RangePartitionBook + + +def _get_ids_from_range_partition_book( + range_partition_book: PartitionBook, rank: int +) -> torch.Tensor: + """ + This function is very similar to RangePartitionBook.id_filter(). However, we re-implement this here, since the usage-pattern for that is a bit strange + i.e. range_partition_book.id_filter(node_pb=range_partition_book, partition_idx=rank). + """ + assert isinstance(range_partition_book, RangePartitionBook) + start_node_id = range_partition_book.partition_bounds[rank - 1] if rank > 0 else 0 + end_node_id = range_partition_book.partition_bounds[rank] + return torch.arange(start_node_id, end_node_id, dtype=torch.int64) + + +def get_ids_on_rank( + partition_book: Union[torch.Tensor, PartitionBook], + rank: int, +) -> torch.Tensor: + """ + Provided a tensor-based partition book or a range-based bartition book and a rank, returns all the ids that are stored on that rank. + Args: + partition_book (Union[torch.Tensor, PartitionBook]): Tensor or range-based partition book + rank (int): Rank of current machine + """ + if isinstance(partition_book, torch.Tensor): + return torch.nonzero(partition_book == rank).squeeze(dim=1) + else: + return _get_ids_from_range_partition_book( + range_partition_book=partition_book, rank=rank + ) diff --git a/python/gigl/distributed/utils/dataset_input_metadata_translator.py b/python/gigl/distributed/utils/serialized_graph_metadata_translator.py similarity index 67% rename from python/gigl/distributed/utils/dataset_input_metadata_translator.py rename to python/gigl/distributed/utils/serialized_graph_metadata_translator.py index 986adba..f126aa5 100644 --- a/python/gigl/distributed/utils/dataset_input_metadata_translator.py +++ b/python/gigl/distributed/utils/serialized_graph_metadata_translator.py @@ -2,13 +2,14 @@ from gigl.common import UriFactory from gigl.common.data.dataloaders import SerializedTFRecordInfo -from gigl.distributed.dataset.dist_link_prediction_dataset import DatasetInputMetadata +from gigl.common.data.load_torch_tensors import SerializedGraphMetadata from gigl.src.common.types.graph_data import EdgeType, NodeType from gigl.src.common.types.pb_wrappers.graph_metadata import GraphMetadataPbWrapper from gigl.src.common.types.pb_wrappers.preprocessed_metadata import ( PreprocessedMetadataPbWrapper, ) from gigl.src.data_preprocessor.lib.types import FeatureSpecDict +from gigl.types.distributed import to_homogeneous from snapchat.research.gbml.preprocessed_metadata_pb2 import PreprocessedMetadata @@ -18,6 +19,7 @@ def _build_serialized_tfrecord_entity_info( ], feature_spec_dict: FeatureSpecDict, entity_key: Union[str, Tuple[str, str]], + tfrecord_uri_pattern: str, ) -> SerializedTFRecordInfo: """ Populates a SerializedTFRecordInfo field from provided arguments for either a node or edge entity of a single node/edge type. @@ -27,6 +29,7 @@ def _build_serialized_tfrecord_entity_info( ]): Preprocessed metadata pb for either NodeMetadataOutput or EdgeMetadataInfo feature_spec_dict (FeatureSpecDict): Feature spec to register to SerializedTFRecordInfo entity_key (Union[str, Tuple[str, str]]): Entity key to register to SerializedTFRecordInfo, is a str if Node entity or Tuple[str, str] if Edge entity + tfrecord_uri_pattern (str): Regex pattern for loading serialized tf records Returns: SerializedTFRecordInfo: Stored metadata for current entity """ @@ -34,24 +37,27 @@ def _build_serialized_tfrecord_entity_info( tfrecord_uri_prefix=UriFactory.create_uri( preprocessed_metadata.tfrecord_uri_prefix ), - feature_keys=preprocessed_metadata.feature_keys, + feature_keys=list(preprocessed_metadata.feature_keys), feature_spec=feature_spec_dict, feature_dim=preprocessed_metadata.feature_dim, entity_key=entity_key, + tfrecord_uri_pattern=tfrecord_uri_pattern, ) -def convert_pb_to_dataset_input_metadata( +def convert_pb_to_serialized_graph_metadata( preprocessed_metadata_pb_wrapper: PreprocessedMetadataPbWrapper, graph_metadata_pb_wrapper: GraphMetadataPbWrapper, -) -> DatasetInputMetadata: + tfrecord_uri_pattern: str = ".*tfrecord(.gz)?$", +) -> SerializedGraphMetadata: """ - Populates a DatasetInputMetadata field from PreprocessedMetadataPbWrapper and GraphMetadataPbWrapper, containing information for loading tensors for all entities and node/edge types. + Populates a SerializedGraphMetadata field from PreprocessedMetadataPbWrapper and GraphMetadataPbWrapper, containing information for loading tensors for all entities and node/edge types. Args: - preprocessed_metadata_pb_wrapper (PreprocessedMetadataPbWrapper): Preprocessed Metadata Pb Wrapper to translate into DatasetInputMetadata + preprocessed_metadata_pb_wrapper (PreprocessedMetadataPbWrapper): Preprocessed Metadata Pb Wrapper to translate into SerializedGraphMetadata graph_metadata_pb_wrapper (GraphMetadataPbWrapper): Graph Metadata Pb Wrapper to translate into Dataset Metadata + tfrecord_uri_pattern (str): Regex pattern for loading serialized tf records Returns: - DatasetInputMetadata: Dataset Metadata for all entity and node/edge types. + SerializedGraphMetadata: Dataset Metadata for all entity and node/edge types. """ node_entity_info: Dict[NodeType, SerializedTFRecordInfo] = {} @@ -70,17 +76,20 @@ def convert_pb_to_dataset_input_metadata( condensed_node_type ] ) + node_feature_spec_dict = ( preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_schema_map[ condensed_node_type ].feature_spec ) + node_key = node_metadata.node_id_key node_entity_info[node_type] = _build_serialized_tfrecord_entity_info( preprocessed_metadata=node_metadata, feature_spec_dict=node_feature_spec_dict, entity_key=node_key, + tfrecord_uri_pattern=tfrecord_uri_pattern, ) for edge_type in graph_metadata_pb_wrapper.edge_types: @@ -93,11 +102,13 @@ def convert_pb_to_dataset_input_metadata( condensed_edge_type ] ) + edge_feature_spec_dict = ( preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_schema_map[ condensed_edge_type ].feature_spec ) + edge_key = ( edge_metadata.src_node_id_key, edge_metadata.dst_node_id_key, @@ -107,6 +118,7 @@ def convert_pb_to_dataset_input_metadata( preprocessed_metadata=edge_metadata.main_edge_info, feature_spec_dict=edge_feature_spec_dict, entity_key=edge_key, + tfrecord_uri_pattern=tfrecord_uri_pattern, ) if preprocessed_metadata_pb_wrapper.has_pos_edge_features( @@ -116,12 +128,13 @@ def convert_pb_to_dataset_input_metadata( condensed_edge_type ].feature_spec - positive_label_entity_info[edge_type] = ( - _build_serialized_tfrecord_entity_info( - preprocessed_metadata=edge_metadata.positive_edge_info, - feature_spec_dict=pos_edge_feature_spec_dict, - entity_key=edge_key, - ) + positive_label_entity_info[ + edge_type + ] = _build_serialized_tfrecord_entity_info( + preprocessed_metadata=edge_metadata.positive_edge_info, + feature_spec_dict=pos_edge_feature_spec_dict, + entity_key=edge_key, + tfrecord_uri_pattern=tfrecord_uri_pattern, ) else: positive_label_entity_info[edge_type] = None @@ -133,42 +146,39 @@ def convert_pb_to_dataset_input_metadata( condensed_edge_type ].feature_spec - negative_label_entity_info[edge_type] = ( - _build_serialized_tfrecord_entity_info( - preprocessed_metadata=edge_metadata.negative_edge_info, - feature_spec_dict=hard_neg_edge_feature_spec_dict, - entity_key=edge_key, - ) + negative_label_entity_info[ + edge_type + ] = _build_serialized_tfrecord_entity_info( + preprocessed_metadata=edge_metadata.negative_edge_info, + feature_spec_dict=hard_neg_edge_feature_spec_dict, + entity_key=edge_key, + tfrecord_uri_pattern=tfrecord_uri_pattern, ) else: negative_label_entity_info[edge_type] = None if not graph_metadata_pb_wrapper.is_heterogeneous: # If our input is homogeneous, we remove the node/edge type component of the metadata fields. - return DatasetInputMetadata( - node_entity_info=list(node_entity_info.values())[0], - edge_entity_info=list(edge_entity_info.values())[0], - positive_label_entity_info=list(positive_label_entity_info.values())[0], - negative_label_entity_info=list(negative_label_entity_info.values())[0], + return SerializedGraphMetadata( + node_entity_info=to_homogeneous(node_entity_info), + edge_entity_info=to_homogeneous(edge_entity_info), + positive_label_entity_info=to_homogeneous(positive_label_entity_info), + negative_label_entity_info=to_homogeneous(negative_label_entity_info), ) else: - return DatasetInputMetadata( + return SerializedGraphMetadata( node_entity_info=node_entity_info, edge_entity_info=edge_entity_info, - positive_label_entity_info=( - positive_label_entity_info - if not all( - entity_info is None - for entity_info in positive_label_entity_info.values() - ) - else None - ), - negative_label_entity_info=( - negative_label_entity_info - if not all( - entity_info is None - for entity_info in negative_label_entity_info.values() - ) - else None - ), + positive_label_entity_info=positive_label_entity_info + if not all( + entity_info is None + for entity_info in positive_label_entity_info.values() + ) + else None, + negative_label_entity_info=negative_label_entity_info + if not all( + entity_info is None + for entity_info in negative_label_entity_info.values() + ) + else None, ) diff --git a/python/gigl/env/dep_constants.py b/python/gigl/env/dep_constants.py index 4253efa..f6e76a1 100644 --- a/python/gigl/env/dep_constants.py +++ b/python/gigl/env/dep_constants.py @@ -6,22 +6,19 @@ from gigl.common import LocalUri from gigl.src.common.constants.components import GiGLComponents -# TODO: (Open Source) Make these publically accesible -GIGL_SRC_IMAGE_CUDA = ( - "gcr.io/external-snap-ci-github-gigl/gigl_src_images/gigl_src_cuda:0.0.6" -) -GIGL_SRC_IMAGE_CPU = ( - "gcr.io/external-snap-ci-github-gigl/gigl_src_images/gigl_src_cpu:0.0.6" -) -GIGL_DATAFLOW_IMAGE = ( - "gcr.io/external-snap-ci-github-gigl/gigl_src_images/gigl_src_dataflow:0.0.6" -) - +# TODO: (Open Source) Make these publicly accesible +# We will first need to release new images to PUBLIC GCR - these are temp holds +# Note, nothing breaks as long as images are provided by user; in the default case where user does +# not provide images we will fail here. +GIGL_SRC_IMAGE_CUDA = "TODO - make this public" +GIGL_SRC_IMAGE_CPU = "TODO - make this public" +GIGL_DATAFLOW_IMAGE = "TODO - make this public" _SPARK_35_DIR_NAME = "scala_spark35" _SPARK_DIR_NAME = "scala" GIGL_PUBLIC_BUCKET_NAME = "public-gigl" +GIGL_PUBLIC_DATASET_NAME = "external-snap-ci-github-gigl.public_gigl" GIGL_TEST_BUCKET_NAME = "gigl-cicd-temp" diff --git a/python/gigl/orchestration/kubeflow/components/config_populator/component.yaml b/python/gigl/orchestration/kubeflow/components/config_populator/component.yaml index cd37fd9..2b9bd94 100644 --- a/python/gigl/orchestration/kubeflow/components/config_populator/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/config_populator/component.yaml @@ -9,7 +9,7 @@ outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.config_populator.config_populator, --job_name, {inputValue: job_name}, diff --git a/python/gigl/orchestration/kubeflow/components/config_validator/component.yaml b/python/gigl/orchestration/kubeflow/components/config_validator/component.yaml index 48d0ccf..2dd9743 100644 --- a/python/gigl/orchestration/kubeflow/components/config_validator/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/config_validator/component.yaml @@ -10,7 +10,7 @@ outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.validation_check.config_validator, --job_name, {inputValue: job_name}, diff --git a/python/gigl/orchestration/kubeflow/components/data_preprocessor/component.yaml b/python/gigl/orchestration/kubeflow/components/data_preprocessor/component.yaml index c9cde28..d0039b6 100644 --- a/python/gigl/orchestration/kubeflow/components/data_preprocessor/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/data_preprocessor/component.yaml @@ -9,7 +9,7 @@ outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.data_preprocessor.data_preprocessor, --job_name, {inputValue: job_name}, diff --git a/python/gigl/orchestration/kubeflow/components/inferencer/component.yaml b/python/gigl/orchestration/kubeflow/components/inferencer/component.yaml index e49038b..9cb0815 100644 --- a/python/gigl/orchestration/kubeflow/components/inferencer/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/inferencer/component.yaml @@ -11,7 +11,7 @@ outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.inference.inferencer, --job_name, {inputValue: job_name}, diff --git a/python/gigl/orchestration/kubeflow/components/post_processor/component.yaml b/python/gigl/orchestration/kubeflow/components/post_processor/component.yaml index 22cd972..ef6516a 100644 --- a/python/gigl/orchestration/kubeflow/components/post_processor/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/post_processor/component.yaml @@ -8,7 +8,7 @@ outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.post_process.post_processor, --job_name, {inputValue: job_name}, diff --git a/python/gigl/orchestration/kubeflow/components/split_generator/component.yaml b/python/gigl/orchestration/kubeflow/components/split_generator/component.yaml index 3ab1d61..e84c4e4 100644 --- a/python/gigl/orchestration/kubeflow/components/split_generator/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/split_generator/component.yaml @@ -8,7 +8,7 @@ outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.split_generator.split_generator, --job_name, {inputValue: job_name}, diff --git a/python/gigl/orchestration/kubeflow/components/subgraph_sampler/component.yaml b/python/gigl/orchestration/kubeflow/components/subgraph_sampler/component.yaml index ae87b1b..e0ecc50 100644 --- a/python/gigl/orchestration/kubeflow/components/subgraph_sampler/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/subgraph_sampler/component.yaml @@ -8,18 +8,18 @@ inputs: - {name: task_config_uri, type: String, description: 'Frozen GBML config uri'} - {name: resource_config_uri, type: String, description: 'Runtine argument for resource and env specifications of each component'} - {name: custom_worker_image_uri, type: String, description: "Docker image to use for the worker harness in dataflow "} -- {name: additional_spark35_local_jar_file_paths, type: String, default: "", description: "Additional local jar file paths which should be uploaded to GCS and then added to a Spark3.5 cluster"} +- {name: additional_spark35_jar_file_uris, type: String, default: "", description: "Additional local jar file paths which should be uploaded to GCS and then added to a Spark3.5 cluster"} outputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value, always overwritten by arguments passed to gnn.py + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value, always overwritten by arguments passed to gnn.py command: [ python, -m, gigl.src.subgraph_sampler.subgraph_sampler, --job_name, {inputValue: job_name}, --task_config_uri, {inputValue: task_config_uri}, --resource_config_uri, {inputValue: resource_config_uri}, --custom_worker_image_uri, {inputValue: custom_worker_image_uri}, - --additional_spark35_local_jar_file_paths, {inputValue: additional_spark35_local_jar_file_paths} + --additional_spark35_jar_file_uris, {inputValue: additional_spark35_jar_file_uris} ] diff --git a/python/gigl/orchestration/kubeflow/components/trainer/component.yaml b/python/gigl/orchestration/kubeflow/components/trainer/component.yaml index 5a8e969..a1780cd 100644 --- a/python/gigl/orchestration/kubeflow/components/trainer/component.yaml +++ b/python/gigl/orchestration/kubeflow/components/trainer/component.yaml @@ -1,5 +1,5 @@ name: Trainer -description: Executes training job based on selected configuration in resource_config +description: Executes training job based on selected configuration in resource_config inputs: - {name: config_uri, type: String, description: 'URI to Task Config'} - {name: job_name, type: String, description: 'Unique name to identify the job'} @@ -9,7 +9,7 @@ inputs: implementation: container: - image: gcr.io/external-snap-ci-github-gigl/gbml:latest # Dummy value + image: us-central1-docker.pkg.dev/external-snap-ci-github-gigl/gigl-base-images/src-cpu:latest # Dummy value command: [ python, -m, gigl.src.training.trainer, --task_config_uri, {inputValue: config_uri}, diff --git a/python/gigl/orchestration/kubeflow/kfp_orchestrator.py b/python/gigl/orchestration/kubeflow/kfp_orchestrator.py index 380d02f..0af7f78 100644 --- a/python/gigl/orchestration/kubeflow/kfp_orchestrator.py +++ b/python/gigl/orchestration/kubeflow/kfp_orchestrator.py @@ -1,25 +1,25 @@ -import os -from dataclasses import dataclass +from __future__ import annotations + from pathlib import Path -from typing import Optional +from typing import Optional, Union -from dotenv import load_dotenv +from google.cloud import aiplatform from kfp.compiler import Compiler import gigl.src.common.constants.local_fs as local_fs_constants from gigl.common import LocalUri, Uri from gigl.common.logger import Logger -from gigl.common.services.kfp import KFPService +from gigl.common.services.vertex_ai import VertexAIService from gigl.common.types.resource_config import CommonPipelineComponentConfigs -from gigl.env.dep_constants import ( - GIGL_DATAFLOW_IMAGE, - GIGL_SRC_IMAGE_CPU, - GIGL_SRC_IMAGE_CUDA, -) +from gigl.env.pipelines_config import get_resource_config from gigl.orchestration.kubeflow.kfp_pipeline import generate_pipeline from gigl.src.common.constants.components import GiGLComponents from gigl.src.common.types import AppliedTaskIdentifier +from gigl.src.common.utils.file_loader import FileLoader from gigl.src.common.utils.time import current_formatted_datetime +from gigl.src.validation_check.libs.name_checks import ( + check_if_kfp_pipeline_job_name_valid, +) logger = Logger() @@ -28,39 +28,18 @@ f"gigl-pipeline-version-at-{current_formatted_datetime()}" ) -GIGL_PIPELINE_BUDLE_PATH = LocalUri.join( +DEFAULT_KFP_COMPILED_PIPELINE_DEST_PATH = LocalUri.join( local_fs_constants.get_project_root_directory(), "build", - f"gigl_pipeline_gnn.tar.gz", + f"gigl_pipeline_gnn.yaml", ) DEFAULT_START_AT_COMPONENT = "config_populator" -@dataclass -class KfpEnvMetadata: - kfp_host: str - k8_sa: str - experiment_id: str - pipeline_id: str - - def __repr__(self) -> str: - return ( - f"KfpEnvMetadata(" - f"kfp_host={self.kfp_host}, " - f"k8_sa={self.k8_sa}, " - f"experiment_id={self.experiment_id}, " - f"pipeline_id={self.pipeline_id})" - f")" - ) - - class KfpOrchestrator: """ Orchestration of Kubeflow Pipelines for GiGL. - Args: - kfp_metadata (Optional[KfpEnvMetadata]): KFP environment metadata. If not provided, it will be loaded from the environment. - env_path (Optional[str]): Path to the environment file containing KFP metadata. Default checks in the current directory. Methods: compile: Compiles the Kubeflow pipeline. run: Runs the Kubeflow pipeline. @@ -68,49 +47,33 @@ class KfpOrchestrator: wait_for_completion: Waits for the pipeline run to complete. """ - def __init__( - self, - kfp_metadata: Optional[KfpEnvMetadata] = None, - env_path: Optional[str] = None, - ): - if kfp_metadata: - self.kfp_metadata = kfp_metadata - else: - self.kfp_metadata = self._load_kfp_metadata(env_path=env_path) - self.kfp_service = KFPService( - kfp_host=self.kfp_metadata.kfp_host, - k8_sa=self.kfp_metadata.k8_sa, - ) - - @staticmethod - def _load_kfp_metadata(env_path: Optional[str] = None) -> KfpEnvMetadata: - load_dotenv(dotenv_path=env_path) - - return KfpEnvMetadata( - kfp_host=os.getenv("KFP_HOST", "default_host"), - k8_sa=os.getenv("K8_SA", "default_sa"), - experiment_id=os.getenv("EXPERIMENT_ID", "default_experiment_id"), - pipeline_id=os.getenv("PIPELINE_ID", "default_pipeline_id"), - ) - @classmethod def compile( cls, cuda_container_image: str, cpu_container_image: str, dataflow_container_image: str, + dst_compiled_pipeline_path: Uri = DEFAULT_KFP_COMPILED_PIPELINE_DEST_PATH, additional_job_args: Optional[dict[GiGLComponents, dict[str, str]]] = None, - ) -> LocalUri: + tag: Optional[str] = None, + ) -> Uri: """ Compiles the GiGL Kubeflow pipeline. Args: cuda_container_image (str): Container image for CUDA (see: containers/Dockerfile.cuda). cpu_container_image (str): Container image for CPU. dataflow_container_image (str): Container image for Dataflow. - additional_job_args: Optional additional arguements to be passed into components, by component. + dst_compiled_pipeline_path (Uri): Destination path for where to store the compiled pipeline yaml. + additional_job_args: Optional additional arguments to be passed into components, by component. + tag: Optional tag, which is provided will be used to tag the pipeline description. """ - pipeline_bundle_path: LocalUri = GIGL_PIPELINE_BUDLE_PATH - Path(pipeline_bundle_path.uri).parent.mkdir(parents=True, exist_ok=True) + local_pipeline_bundle_path: LocalUri = ( + dst_compiled_pipeline_path + if isinstance(dst_compiled_pipeline_path, LocalUri) + else DEFAULT_KFP_COMPILED_PIPELINE_DEST_PATH + ) + Path(local_pipeline_bundle_path.uri).parent.mkdir(parents=True, exist_ok=True) + logger.info(f"Compiling pipeline to {local_pipeline_bundle_path.uri}") common_pipeline_component_configs = CommonPipelineComponentConfigs( cuda_container_image=cuda_container_image, @@ -122,13 +85,23 @@ def compile( Compiler().compile( generate_pipeline( common_pipeline_component_configs=common_pipeline_component_configs, + tag=tag, ), - pipeline_bundle_path.uri, + local_pipeline_bundle_path.uri, ) - logger.info(f"Compiled Kubeflow pipeline to {pipeline_bundle_path.uri}") + logger.info(f"Compiled Kubeflow pipeline to {local_pipeline_bundle_path.uri}") - return pipeline_bundle_path + logger.info(f"Uploading compiled pipeline to {dst_compiled_pipeline_path.uri}") + if local_pipeline_bundle_path != dst_compiled_pipeline_path: + logger.info(f"Will upload pipeline to {dst_compiled_pipeline_path.uri}") + file_loader = FileLoader() + file_loader.load_file( + file_uri_src=local_pipeline_bundle_path, + file_uri_dst=dst_compiled_pipeline_path, + ) + + return dst_compiled_pipeline_path def run( self, @@ -137,19 +110,31 @@ def run( resource_config_uri: Uri, start_at: str = DEFAULT_START_AT_COMPONENT, stop_after: Optional[str] = None, - cuda_container_image: str = GIGL_SRC_IMAGE_CUDA, - cpu_container_image: str = GIGL_SRC_IMAGE_CPU, - dataflow_container_image: str = GIGL_DATAFLOW_IMAGE, - compile: bool = True, - additional_job_args: Optional[dict[GiGLComponents, dict[str, str]]] = None, - ) -> str: - if compile: - pipeline_budle_path = self.compile( - cuda_container_image=cuda_container_image, - cpu_container_image=cpu_container_image, - dataflow_container_image=dataflow_container_image, - additional_job_args=additional_job_args, - ) + compiled_pipeline_path: Uri = DEFAULT_KFP_COMPILED_PIPELINE_DEST_PATH, + ) -> aiplatform.PipelineJob: + """ + Runs the GiGL Kubeflow pipeline. + Args: + applied_task_identifier (AppliedTaskIdentifier): Identifier for the task. + task_config_uri (Uri): URI for the task config. + resource_config_uri (Uri): URI for the resource config. + start_at (str): Component to start at. + stop_after (str): Component to stop after. + compiled_pipeline_path (Uri): Path to the compiled pipeline. + If compile is False, this should be provided and is directly used to run the pipeline and skip compilation. + If compile is True, this flag is optional and if provided, is used as the destination path for where to + store the compiled pipeline yaml. + additional_job_args: Optional additional arguements to be passed into components, by component. + + Returns: + aiplatform.PipelineJob: The job that was created. + """ + check_if_kfp_pipeline_job_name_valid(str(applied_task_identifier)) + file_loader = FileLoader() + assert file_loader.does_uri_exist( + compiled_pipeline_path + ), f"Compiled pipeline path {compiled_pipeline_path} does not exist." + logger.info(f"Skipping pipeline compilation; will use {compiled_pipeline_path}") run_keyword_args = { "job_name": applied_task_identifier, @@ -161,26 +146,22 @@ def run( run_keyword_args["stop_after"] = stop_after logger.info(f"Running pipeline with args: {run_keyword_args}") - run_id = self.kfp_service.run_pipeline( - pipeline_bundle_path=str(pipeline_budle_path), - experiment_id=self.kfp_metadata.experiment_id, - run_name=applied_task_identifier, - run_keyword_args=run_keyword_args, + resource_config = get_resource_config(resource_config_uri=resource_config_uri) + vertex_ai_service = VertexAIService( + project=resource_config.project, + location=resource_config.region, + service_account=resource_config.service_account_email, + staging_bucket=resource_config.temp_assets_regional_bucket_path.uri, ) - - return run_id - - def upload(self, pipeline_version_name: str = DEFAULT_PIPELINE_VERSION_NAME) -> str: - logger.info( - f"Uploading pipeline version: {pipeline_version_name} to pipeline id: {self.kfp_metadata.pipeline_id}" - ) - upload_url = self.kfp_service.upload_pipeline_version( - pipeline_bundle_path=str(GIGL_PIPELINE_BUDLE_PATH), - pipeline_id=self.kfp_metadata.pipeline_id, - pipeline_version_name=pipeline_version_name, + run = vertex_ai_service.run_pipeline( + display_name=str(applied_task_identifier), + template_path=compiled_pipeline_path, + run_keyword_args=run_keyword_args, + job_id=str(applied_task_identifier).replace("_", "-"), ) + return run - return upload_url - - def wait_for_completion(self, run_id: str): - self.kfp_service.wait_for_run_completion(run_id=run_id) + def wait_for_completion(self, run: Union[aiplatform.PipelineJob, str]): + resource_name = run if isinstance(run, str) else run.resource_name + VertexAIService.wait_for_run_completion(resource_name) + logger.info(f"Pipeline run {resource_name} completed successfully.") diff --git a/python/gigl/orchestration/kubeflow/kfp_pipeline.py b/python/gigl/orchestration/kubeflow/kfp_pipeline.py index 20e90a1..d63e5b5 100644 --- a/python/gigl/orchestration/kubeflow/kfp_pipeline.py +++ b/python/gigl/orchestration/kubeflow/kfp_pipeline.py @@ -1,11 +1,9 @@ import os -from typing import Optional +from typing import Dict, Final, List, Optional import kfp -import kfp.containers -import kfp.gcp -from kfp.dsl import PipelineParam -from kfp.dsl._container_op import ContainerOp +import kfp.dsl.pipeline_channel +from kfp.dsl import PipelineTask import gigl.src.common.constants.local_fs as local_fs_constants from gigl.common import LocalUri @@ -14,11 +12,11 @@ from gigl.orchestration.kubeflow.utils.glt_backend import ( check_glt_backend_eligibility_component, ) -from gigl.orchestration.kubeflow.utils.log_metrics import log_eval_metrics_to_ui +from gigl.orchestration.kubeflow.utils.log_metrics import log_metrics_to_ui from gigl.orchestration.kubeflow.utils.resource import add_task_resource_requirements from gigl.src.common.constants.components import GiGLComponents -COMPONENTS_BASE_PATH = os.path.join( +_COMPONENTS_BASE_PATH: Final[str] = os.path.join( local_fs_constants.get_gigl_root_directory(), "orchestration", "kubeflow", @@ -26,7 +24,7 @@ logger = Logger() -SPECED_COMPONENTS = [ +SPECED_COMPONENTS: Final[List[str]] = [ GiGLComponents.ConfigValidator.value, GiGLComponents.ConfigPopulator.value, GiGLComponents.SubgraphSampler.value, @@ -37,162 +35,157 @@ GiGLComponents.Trainer.value, ] -speced_component_root: LocalUri = LocalUri.join(COMPONENTS_BASE_PATH, "components") -speced_component_op_dict = { - component: kfp.components.load_component_from_file( - LocalUri.join(speced_component_root, component, "component.yaml").uri +_speced_component_root: Final[LocalUri] = LocalUri.join( + _COMPONENTS_BASE_PATH, "components" +) +_speced_component_op_dict: Final[Dict[GiGLComponents, kfp.components.YamlComponent]] = { + GiGLComponents(component): kfp.components.load_component_from_file( + LocalUri.join(_speced_component_root, component, "component.yaml").uri ) for component in SPECED_COMPONENTS } -def generate_component_task( - component: str, +def _generate_component_task( + component: GiGLComponents, job_name: str, - uri: str, + task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, start_at: Optional[str] = None, stop_after: Optional[str] = None, -) -> ContainerOp: - component_task_op: ContainerOp - - if component == GiGLComponents.ConfigPopulator.value: - component_task_op = speced_component_op_dict[component]( +) -> PipelineTask: + component_task: PipelineTask + if component == GiGLComponents.ConfigPopulator: + component_task = _speced_component_op_dict[component]( job_name=job_name, - template_uri=uri, + template_uri=task_config_uri, resource_config_uri=resource_config_uri, + **common_pipeline_component_configs.additional_job_args.get(component, {}), ) - elif component == GiGLComponents.ConfigValidator.value: - component_task_op = speced_component_op_dict[component]( + + elif component == GiGLComponents.ConfigValidator: + component_task = _speced_component_op_dict[component]( job_name=job_name, - task_config_uri=uri, + task_config_uri=task_config_uri, start_at=start_at, resource_config_uri=resource_config_uri, stop_after=stop_after, + **common_pipeline_component_configs.additional_job_args.get(component, {}), ) - elif component == GiGLComponents.Trainer.value: - component_task_op = speced_component_op_dict[component]( + elif component == GiGLComponents.SubgraphSampler: + component_task = _speced_component_op_dict[component]( job_name=job_name, - config_uri=uri, + task_config_uri=task_config_uri, + resource_config_uri=resource_config_uri, + custom_worker_image_uri=common_pipeline_component_configs.dataflow_container_image, + **common_pipeline_component_configs.additional_job_args.get(component, {}), + ) + elif component == GiGLComponents.Trainer: + component_task = _speced_component_op_dict[component]( + job_name=job_name, + config_uri=task_config_uri, resource_config_uri=resource_config_uri, cpu_docker_uri=common_pipeline_component_configs.cpu_container_image, cuda_docker_uri=common_pipeline_component_configs.cuda_container_image, + **common_pipeline_component_configs.additional_job_args.get(component, {}), ) - elif component == GiGLComponents.DataPreprocessor.value: - component_task_op = speced_component_op_dict[component]( + elif component == GiGLComponents.DataPreprocessor: + component_task = _speced_component_op_dict[component]( job_name=job_name, - task_config_uri=uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, custom_worker_image_uri=common_pipeline_component_configs.dataflow_container_image, + **common_pipeline_component_configs.additional_job_args.get(component, {}), ) - elif component == GiGLComponents.Inferencer.value: - component_task_op = speced_component_op_dict[component]( + elif component == GiGLComponents.Inferencer: + component_task = _speced_component_op_dict[component]( job_name=job_name, - task_config_uri=uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, custom_worker_image_uri=common_pipeline_component_configs.dataflow_container_image, cpu_docker_uri=common_pipeline_component_configs.cpu_container_image, cuda_docker_uri=common_pipeline_component_configs.cuda_container_image, + **common_pipeline_component_configs.additional_job_args.get(component, {}), ) else: - component_task_op = speced_component_op_dict[component]( + component_task = _speced_component_op_dict[component]( job_name=job_name, - task_config_uri=uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, + **common_pipeline_component_configs.additional_job_args.get(component, {}), ) add_task_resource_requirements( - task=component_task_op, + task=component_task, common_pipeline_component_configs=common_pipeline_component_configs, ) - return component_task_op + return component_task def generate_pipeline( common_pipeline_component_configs: CommonPipelineComponentConfigs, + tag: Optional[str] = None, ): """ Generates a KFP pipeline definition for GiGL. Args: common_pipeline_component_configs (CommonPipelineComponentConfigs): Shared configuration between components. + tag (Optiona[str]): Optional tag, which is provided will be used to tag the pipeline description. Returns: An @kfp.dsl.pipeline decorated function to generated a pipeline. """ - if ( - common_pipeline_component_configs.additional_job_args - and GiGLComponents.SubgraphSampler - not in common_pipeline_component_configs.additional_job_args - ): - raise ValueError( - f"Only additional args for Subgraph Sampler are supported. Received {common_pipeline_component_configs.additional_job_args}" - ) @kfp.dsl.pipeline( name="GiGL_Pipeline", - description="GiGL Pipeline", + description="GiGL Pipeline" if not tag else f"GiGL Pipeline @ {tag}", ) def pipeline( - job_name, - template_or_frozen_config_uri, - resource_config_uri, - start_at=GiGLComponents.ConfigPopulator.value, - stop_after=None, + job_name: str, + template_or_frozen_config_uri: str, + resource_config_uri: str, + start_at: str = GiGLComponents.ConfigPopulator.value, + stop_after: Optional[str] = None, ): - validation_check_task = generate_component_task( - component=GiGLComponents.ConfigValidator.value, + validation_check_task = _generate_component_task( + component=GiGLComponents.ConfigValidator, job_name=job_name, - uri=template_or_frozen_config_uri, + task_config_uri=template_or_frozen_config_uri, start_at=start_at, stop_after=stop_after, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, ) - - # TODO (mkolodner-sc): Update method for specifying glt_backend once long-term alignment is reached - check_glt_backend_eligibility_component_generator = ( - kfp.components.func_to_container_op( - check_glt_backend_eligibility_component, - base_image=common_pipeline_component_configs.cpu_container_image, - ) - ) - check_glt_backend_eligibility_container_op: ContainerOp = ( - check_glt_backend_eligibility_component_generator( - task_config_uri=template_or_frozen_config_uri - ) - ) - check_glt_backend_eligibility_container_op.set_display_name( - name="Check whether to use GLT Backend" - ) - should_use_glt_runtime_param: PipelineParam = ( - check_glt_backend_eligibility_container_op.output + should_use_glt = check_glt_backend_eligibility_component( + task_config_uri=template_or_frozen_config_uri, + base_image=common_pipeline_component_configs.cpu_container_image, ) with kfp.dsl.Condition(start_at == GiGLComponents.ConfigPopulator.value): - config_populator_task = create_config_populator_task_op( + config_populator_task = _create_config_populator_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, - should_use_glt_runtime_param=should_use_glt_runtime_param, + should_use_glt_runtime_param=should_use_glt, stop_after=stop_after, ) config_populator_task.after(validation_check_task) with kfp.dsl.Condition(start_at == GiGLComponents.DataPreprocessor.value): - data_preprocessor_task = create_data_preprocessor_task_op( + data_preprocessor_task = _create_data_preprocessor_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, stop_after=stop_after, - should_use_glt_runtime_param=should_use_glt_runtime_param, + should_use_glt_runtime_param=should_use_glt, ) data_preprocessor_task.after(validation_check_task) with kfp.dsl.Condition(start_at == GiGLComponents.SubgraphSampler.value): - subgraph_sampler_task = create_subgraph_sampler_task_op( + subgraph_sampler_task = _create_subgraph_sampler_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, @@ -202,7 +195,7 @@ def pipeline( subgraph_sampler_task.after(validation_check_task) with kfp.dsl.Condition(start_at == GiGLComponents.SplitGenerator.value): - split_generator_task = create_split_generator_task_op( + split_generator_task = _create_split_generator_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, @@ -212,7 +205,7 @@ def pipeline( split_generator_task.after(validation_check_task) with kfp.dsl.Condition(start_at == GiGLComponents.Trainer.value): - trainer_task = create_trainer_task_op( + trainer_task = _create_trainer_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, @@ -222,7 +215,7 @@ def pipeline( trainer_task.after(validation_check_task) with kfp.dsl.Condition(start_at == GiGLComponents.Inferencer.value): - inferencer_task = create_inferencer_task_op( + inferencer_task = _create_inferencer_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, @@ -232,30 +225,29 @@ def pipeline( inferencer_task.after(validation_check_task) with kfp.dsl.Condition(start_at == GiGLComponents.PostProcessor.value): - post_processor_task = create_post_processor_task_op( + post_processor_task = _create_post_processor_task_op( job_name=job_name, task_config_uri=template_or_frozen_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, - stop_after=stop_after, ) post_processor_task.after(validation_check_task) return pipeline -def create_config_populator_task_op( +def _create_config_populator_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, - should_use_glt_runtime_param: PipelineParam, + should_use_glt_runtime_param: kfp.dsl.pipeline_channel.PipelineChannel, stop_after: Optional[str] = None, -) -> ContainerOp: - config_populator_task = generate_component_task( - component=GiGLComponents.ConfigPopulator.value, +) -> PipelineTask: + config_populator_task = _generate_component_task( + component=GiGLComponents.ConfigPopulator, job_name=job_name, - uri=task_config_uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, stop_after=stop_after, @@ -263,7 +255,7 @@ def create_config_populator_task_op( frozen_gbml_config_uri = config_populator_task.outputs["frozen_gbml_config_uri"] with kfp.dsl.Condition(stop_after != GiGLComponents.ConfigPopulator.value): - data_preprocessor_task = create_data_preprocessor_task_op( + data_preprocessor_task = _create_data_preprocessor_task_op( job_name=job_name, task_config_uri=frozen_gbml_config_uri, resource_config_uri=resource_config_uri, @@ -275,25 +267,29 @@ def create_config_populator_task_op( return config_populator_task -def create_data_preprocessor_task_op( +def _create_data_preprocessor_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, - should_use_glt_runtime_param: PipelineParam, + should_use_glt_runtime_param: kfp.dsl.pipeline_channel.PipelineChannel, stop_after: Optional[str] = None, -) -> ContainerOp: - data_preprocessor_task = generate_component_task( - component=GiGLComponents.DataPreprocessor.value, +) -> PipelineTask: + data_preprocessor_task = _generate_component_task( + component=GiGLComponents.DataPreprocessor, job_name=job_name, - uri=task_config_uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, ) + add_task_resource_requirements( + task=data_preprocessor_task, + common_pipeline_component_configs=common_pipeline_component_configs, + ) with kfp.dsl.Condition(stop_after != GiGLComponents.DataPreprocessor.value): with kfp.dsl.Condition(should_use_glt_runtime_param == False): - subgraph_sampler_task = create_subgraph_sampler_task_op( + subgraph_sampler_task = _create_subgraph_sampler_task_op( job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, @@ -304,7 +300,7 @@ def create_data_preprocessor_task_op( # If we are using the GLT runtime, we skip the subgraph sampler and split generator # and go straight to the GLT trainer with kfp.dsl.Condition(should_use_glt_runtime_param == True): - glt_trainer_task = create_trainer_task_op( + glt_trainer_task = _create_trainer_task_op( job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, @@ -316,32 +312,23 @@ def create_data_preprocessor_task_op( return data_preprocessor_task -def create_subgraph_sampler_task_op( +def _create_subgraph_sampler_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, stop_after: Optional[str] = None, -) -> ContainerOp: - subgraph_sampler_task = speced_component_op_dict["subgraph_sampler"]( +) -> PipelineTask: + subgraph_sampler_task = _generate_component_task( + component=GiGLComponents.SubgraphSampler, job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, - custom_worker_image_uri=common_pipeline_component_configs.dataflow_container_image, - **( - common_pipeline_component_configs.additional_job_args.get( - GiGLComponents.SubgraphSampler - ) - or {} - ), - ) - add_task_resource_requirements( - task=subgraph_sampler_task, common_pipeline_component_configs=common_pipeline_component_configs, ) with kfp.dsl.Condition(stop_after != GiGLComponents.SubgraphSampler.value): - split_generator_task = create_split_generator_task_op( + split_generator_task = _create_split_generator_task_op( job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, @@ -353,26 +340,24 @@ def create_subgraph_sampler_task_op( return subgraph_sampler_task -def create_split_generator_task_op( +def _create_split_generator_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, stop_after: Optional[str] = None, -) -> ContainerOp: - split_generator_task: ContainerOp - split_generator_task = speced_component_op_dict["split_generator"]( +) -> PipelineTask: + split_generator_task: PipelineTask + split_generator_task = _generate_component_task( + component=GiGLComponents.SplitGenerator, job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, - ) - add_task_resource_requirements( - task=split_generator_task, common_pipeline_component_configs=common_pipeline_component_configs, ) with kfp.dsl.Condition(stop_after != GiGLComponents.SplitGenerator.value): - trainer_task = create_trainer_task_op( + trainer_task = _create_trainer_task_op( job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, @@ -384,23 +369,27 @@ def create_split_generator_task_op( return split_generator_task -def create_inferencer_task_op( +def _create_inferencer_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, stop_after: Optional[str] = None, -) -> ContainerOp: - inferencer_task = generate_component_task( - component=GiGLComponents.Inferencer.value, +) -> PipelineTask: + inferencer_task = _generate_component_task( + component=GiGLComponents.Inferencer, job_name=job_name, - uri=task_config_uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, ) + add_task_resource_requirements( + task=inferencer_task, + common_pipeline_component_configs=common_pipeline_component_configs, + ) with kfp.dsl.Condition(stop_after != GiGLComponents.Inferencer.value): - post_processor_task = create_post_processor_task_op( + post_processor_task = _create_post_processor_task_op( job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, @@ -411,33 +400,31 @@ def create_inferencer_task_op( return inferencer_task -def create_trainer_task_op( +def _create_trainer_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, stop_after: Optional[str] = None, -) -> ContainerOp: - trainer_task = generate_component_task( - component=GiGLComponents.Trainer.value, +) -> PipelineTask: + trainer_task = _generate_component_task( + component=GiGLComponents.Trainer, job_name=job_name, - uri=task_config_uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, ) - log_metrics_op = kfp.components.func_to_container_op( - log_eval_metrics_to_ui, + log_metrics_component = log_metrics_to_ui( + task_config_uri=task_config_uri, + component_name=GiGLComponents.Trainer.value, base_image=common_pipeline_component_configs.cpu_container_image, ) - log_metrics_task_op: ContainerOp = log_metrics_op( - task_config_uri=task_config_uri, component=GiGLComponents.Trainer.value - ) - log_metrics_task_op.set_display_name(name="Log Trainer Eval Metrics") - log_metrics_task_op.after(trainer_task) + log_metrics_component.set_display_name(name="Log Trainer Eval Metrics") + log_metrics_component.after(trainer_task) with kfp.dsl.Condition(stop_after != GiGLComponents.Trainer.value): - inference_task = create_inferencer_task_op( + inference_task = _create_inferencer_task_op( job_name=job_name, task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, @@ -448,29 +435,24 @@ def create_trainer_task_op( return trainer_task -def create_post_processor_task_op( +def _create_post_processor_task_op( job_name: str, task_config_uri: str, resource_config_uri: str, common_pipeline_component_configs: CommonPipelineComponentConfigs, - stop_after: Optional[str] = None, -) -> ContainerOp: - post_processor_task = generate_component_task( - component=GiGLComponents.PostProcessor.value, +) -> PipelineTask: + post_processor_task = _generate_component_task( + component=GiGLComponents.PostProcessor, job_name=job_name, - uri=task_config_uri, + task_config_uri=task_config_uri, resource_config_uri=resource_config_uri, common_pipeline_component_configs=common_pipeline_component_configs, ) - # Log post processor eval metrics - log_metrics_op = kfp.components.func_to_container_op( - log_eval_metrics_to_ui, - base_image=common_pipeline_component_configs.cpu_container_image, - ) - log_metrics_task_op: ContainerOp = log_metrics_op( + log_metrics_component = log_metrics_to_ui( task_config_uri=task_config_uri, - component=GiGLComponents.PostProcessor.value, + component_name=GiGLComponents.PostProcessor.value, + base_image=common_pipeline_component_configs.cpu_container_image, ) - log_metrics_task_op.set_display_name(name="Log PostProcessor Eval Metrics") - log_metrics_task_op.after(post_processor_task) + log_metrics_component.set_display_name(name="Log PostProcessor Eval Metrics") + log_metrics_component.after(post_processor_task) return post_processor_task diff --git a/python/gigl/orchestration/kubeflow/runner.py b/python/gigl/orchestration/kubeflow/runner.py new file mode 100644 index 0000000..ccb29ce --- /dev/null +++ b/python/gigl/orchestration/kubeflow/runner.py @@ -0,0 +1,303 @@ +""" +This script is used to run a Kubeflow pipeline on VAI. +You have options to RUN a pipeline, COMPILE a pipeline, or RUN a pipeline without compiling it +i.e. you have a precompiled pipeline somewhere. + +RUNNING A PIPELINE: + python gigl.orchestration.kubeflow.runner --action=run ...args + The following arguments are required: + --task_config_uri: GCS URI to template_or_frozen_config_uri. + --resource_config_uri: GCS URI to resource_config_uri. + --container_image_cuda: GiGL source code image compiled for use with cuda. See containers/Dockerfile.src + --container_image_cpu: GiGL source code image compiled for use with cpu. See containers/Dockerfile.src + --container_image_dataflow: GiGL source code image compiled for use with dataflow. See containers/Dockerfile.dataflow.src + The folowing arguments are optional: + --job_name: The name to give to the KFP job. Default is "gigl_run_at_" + --start_at: The component to start the pipeline at. Default is config_populator. See gigl.src.common.constants.components.GiGLComponents + --stop_after: The component to stop the pipeline at. Default is None. + --pipeline_tag: Optional tag, which is provided will be used to tag the pipeline description. + --compiled_pipeline_path: The path to where to store the compiled pipeline to. + --wait: Wait for the pipeline run to finish. + --additional_job_args: Additional job arguments for the pipeline components, by component. + The value has to be of form: ".=". Where is one of the + string representations of component specified in gigl.src.common.constants.components.GiGLComponents + This argument can be repeated. + Example: + --additional_job_args=subgraph_sampler.additional_spark35_jar_file_uris='gs://path/to/jar' + --additional_job_args=split_generator.some_other_arg='value' + This passes additional_spark35_jar_file_uris="gs://path/to/jar" to subgraph_sampler at compile time and + some_other_arg="value" to split_generator at compile time. + + You can alternatively run_no_compile if you have a precompiled pipeline somewhere. + python gigl.orchestration.kubeflow.runner --action=run_no_compile ...args + The following arguments are required: + --task_config_uri + --resource_config_uri + --compiled_pipeline_path: The path to a pre-compiled pipeline; can be gcs URI (gs://...), or a local path + The following arguments are optional: + --job_name + --start_at + --stop_after + --pipeline_tag + --wait + +COMPILING A PIPELINE: + A strict subset of running a pipeline, + python gigl.orchestration.kubeflow.runner --action=compile ...args + The following arguments are required: + --container_image_cuda + --container_image_cpu + --container_image_dataflow + The following arguments are optional: + --compiled_pipeline_path: The path to where to store the compiled pipeline to. + --pipeline_tag: Optional tag, which is provided will be used to tag the pipeline description. + --additional_job_args: Additional job arguments for the pipeline components, by component. + The value has to be of form: ".=". Where is one of the + string representations of component specified in gigl.src.common.constants.components.GiGLComponents + This argument can be repeated. + Example: + --additional_job_args=subgraph_sampler.additional_spark35_jar_file_uris='gs://path/to/jar' + --additional_job_args=split_generator.some_other_arg='value' + This passes additional_spark35_jar_file_uris="gs://path/to/jar" to subgraph_sampler at compile time and + some_other_arg="value" to split_generator at compile time. +""" +from __future__ import annotations + +import argparse +from collections import defaultdict +from enum import Enum +from typing import List + +from gigl.common import UriFactory +from gigl.common.logger import Logger +from gigl.orchestration.kubeflow.kfp_orchestrator import ( + DEFAULT_KFP_COMPILED_PIPELINE_DEST_PATH, + KfpOrchestrator, +) +from gigl.orchestration.kubeflow.kfp_pipeline import SPECED_COMPONENTS +from gigl.src.common.constants.components import GiGLComponents +from gigl.src.common.types import AppliedTaskIdentifier +from gigl.src.common.utils.time import current_formatted_datetime + +DEFAULT_JOB_NAME = f"gigl_run_at_{current_formatted_datetime()}" +DEFAULT_START_AT = GiGLComponents.ConfigPopulator.value + + +class Action(Enum): + RUN = "run" + COMPILE = "compile" + RUN_NO_COMPILE = "run_no_compile" + + @staticmethod + def from_string(s: str) -> Action: + try: + return Action(s) + except KeyError: + raise ValueError() + + +_REQUIRED_RUN_FLAGS = frozenset( + [ + "task_config_uri", + "resource_config_uri", + "container_image_cuda", + "container_image_cpu", + "container_image_dataflow", + ] +) +_REQUIRED_RUN_NO_COMPILE_FLAGS = frozenset( + [ + "task_config_uri", + "resource_config_uri", + "compiled_pipeline_path", + ] +) +_REQUIRED_COMPILE_FLAGS = frozenset( + [ + "container_image_cuda", + "container_image_cpu", + "container_image_dataflow", + ] +) + +logger = Logger() + + +def _parse_additional_job_args( + additional_job_args: List[str], +) -> dict[GiGLComponents, dict[str, str]]: + """ + Parse the additional job arguments for the pipeline components, by component. + Args: + additional_job_args List[str]: Each element is of form: ".=" + Where is one of the string representations of component specified in + gigl.src.common.constants.components.GiGLComponents + Example: + ["subgraph_sampler.additional_spark35_jar_file_uris=gs://path/to/jar", "split_generator.some_other_arg=value"]. + + Returns dict[GiGLComponents, dict[str, str]]: The parsed additional job arguments. + Example for the example above: { + GiGLComponents.SubgraphSampler: { + "additional_spark35_jar_file_uris"="gs://path/to/jar", + }, + GiGLComponents.SplitGenerator: { + "some_other_arg": "value", + }, + } + """ + result: dict[GiGLComponents, dict[str, str]] = defaultdict(dict) + for job_arg in additional_job_args: + component_dot_arg, value = job_arg.split("=", 1) + component_str, arg = component_dot_arg.split(".", 1) # Handle nested keys + component = GiGLComponents(component_str) + # Build the nested dictionary dynamically + result[component][arg] = value + + logger.info(f"Parsed additional job args: {result}") + return dict(result) # Ensure the default dict is converted to a regular dict + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Create the KF pipeline for GNN preprocessing/training/inference" + ) + parser.add_argument( + "--container_image_cuda", + help="The docker image name and tag to use for cuda pipeline components ", + ) + parser.add_argument( + "--container_image_cpu", + help="The docker image name and tag to use for cpu pipeline components ", + ) + parser.add_argument( + "--container_image_dataflow", + help="The docker image name and tag to use for the worker harness in dataflow ", + ) + parser.add_argument( + "--job_name", + help="Runtime argument for running the pipeline. The name to give to the KFP job.", + default=DEFAULT_JOB_NAME, + ) + parser.add_argument( + "--start_at", + help="Runtime argument for running the pipeline. Specify the component where to start the pipeline.", + choices=SPECED_COMPONENTS, + default=DEFAULT_START_AT, + ) + parser.add_argument( + "--stop_after", + help="Runtime argument for running the pipeline. Specify the component where to stop the pipeline.", + choices=SPECED_COMPONENTS, + default=None, + ) + parser.add_argument( + "--task_config_uri", + help="Runtime argument for running the pipeline. GCS URI to template_or_frozen_config_uri.", + ) + parser.add_argument( + "--resource_config_uri", + help="Runtine argument for resource and env specifications of each component", + ) + parser.add_argument( + "--action", + type=Action.from_string, + choices=list(Action), + required=True, + ) + parser.add_argument( + "--wait", + help="Wait for the pipeline run to finish", + action="store_true", + ) + parser.add_argument( + "--pipeline_tag", "-t", help="Tag for the pipeline definition", default=None + ) + parser.add_argument( + "--compiled_pipeline_path", + help="A custom URI that points to where you want the compiled pipeline is to be saved to." + + "In the case you want to run an existing pipeline that you are not compiling, this is the path to the compiled pipeline.", + default=DEFAULT_KFP_COMPILED_PIPELINE_DEST_PATH.uri, + ) + parser.add_argument( + "--additional_job_args", + action="append", # Allow multiple occurrences of this argument + default=[], + help="""Additional pipeline job arguments by component of form: "gigl_component.key=value,gigl_component.key_2=value_2" + Example: --additional_job_args=subgraph_sampler.additional_spark35_jar_file_uris='gs://path/to/jar' + --additional_job_args=split_generator.some_other_arg='value' + This passes additional_spark35_jar_file_uris="gs://path/to/jar" to subgraph_sampler at compile time and + some_other_arg="value" to split_generator at compile time. + """, + ) + + args = parser.parse_args() + logger.info(f"Beginning runner.py with args: {args}") + + parsed_additional_job_args = _parse_additional_job_args(args.additional_job_args) + + # Assert correctness of args + required_flags: frozenset[str] + if args.action == Action.RUN: + required_flags = _REQUIRED_RUN_FLAGS + elif args.action == Action.RUN_NO_COMPILE: + required_flags = _REQUIRED_RUN_NO_COMPILE_FLAGS + elif args.action == Action.COMPILE: + required_flags = _REQUIRED_COMPILE_FLAGS + + missing_flags = [] + for flag in required_flags: + if not hasattr(args, flag): + missing_flags.append(flag) + if missing_flags: + raise ValueError( + f"Missing the following flags for a {args.action} command: {missing_flags}. " + + f"All required flags are: {list(required_flags)}" + ) + + compiled_pipeline_path = UriFactory.create_uri(args.compiled_pipeline_path) + if args.action in (Action.RUN, Action.RUN_NO_COMPILE): + orchestrator = KfpOrchestrator() + + task_config_uri = UriFactory.create_uri(args.task_config_uri) + resource_config_uri = UriFactory.create_uri(args.resource_config_uri) + applied_task_identifier = AppliedTaskIdentifier(args.job_name) + + if args.action == Action.RUN: + path = orchestrator.compile( + cuda_container_image=args.container_image_cuda, + cpu_container_image=args.container_image_cpu, + dataflow_container_image=args.container_image_dataflow, + dst_compiled_pipeline_path=compiled_pipeline_path, + additional_job_args=parsed_additional_job_args, + tag=args.pipeline_tag, + ) + assert ( + path == compiled_pipeline_path + ), f"Compiled pipeline path {path} does not match provided path {compiled_pipeline_path}" + + run = orchestrator.run( + applied_task_identifier=applied_task_identifier, + task_config_uri=task_config_uri, + resource_config_uri=resource_config_uri, + start_at=args.start_at, + stop_after=args.stop_after, + compiled_pipeline_path=compiled_pipeline_path, + ) + + if args.wait: + orchestrator.wait_for_completion(run=run) + + elif args.action == Action.COMPILE: + pipeline_bundle_path = KfpOrchestrator.compile( + cuda_container_image=args.container_image_cuda, + cpu_container_image=args.container_image_cpu, + dataflow_container_image=args.container_image_dataflow, + dst_compiled_pipeline_path=compiled_pipeline_path, + additional_job_args=parsed_additional_job_args, + tag=args.pipeline_tag, + ) + logger.info( + f"Pipeline finished compiling, exported to: {pipeline_bundle_path.uri}" + ) + else: + raise ValueError(f"Unknown action: {args.action}") diff --git a/python/gigl/orchestration/kubeflow/utils/glt_backend.py b/python/gigl/orchestration/kubeflow/utils/glt_backend.py index cf15a4f..889fb0d 100644 --- a/python/gigl/orchestration/kubeflow/utils/glt_backend.py +++ b/python/gigl/orchestration/kubeflow/utils/glt_backend.py @@ -1,4 +1,17 @@ +from kfp import dsl + + def check_glt_backend_eligibility_component( + task_config_uri: str, base_image: str +) -> bool: + comp = dsl.component( + func=_check_glt_backend_eligibility_component, base_image=base_image + ) + comp.description = "Check whether to use GLT Backend" + return comp(task_config_uri=task_config_uri).output + + +def _check_glt_backend_eligibility_component( task_config_uri: str, ) -> bool: """ diff --git a/python/gigl/orchestration/kubeflow/utils/log_metrics.py b/python/gigl/orchestration/kubeflow/utils/log_metrics.py index 398fe3a..124803d 100644 --- a/python/gigl/orchestration/kubeflow/utils/log_metrics.py +++ b/python/gigl/orchestration/kubeflow/utils/log_metrics.py @@ -1,25 +1,38 @@ -from typing import NamedTuple +import kfp -def log_eval_metrics_to_ui( +def log_metrics_to_ui( task_config_uri: str, - component: str, -) -> NamedTuple( # type: ignore - "Outputs", - [ - ("mlpipeline_metrics", "Metrics"), - ], -): - """Returns model evaluation metrics produced by trainer, such - that they are parsable by the Kubeflow Pipelines UI. + component_name: str, + base_image: str, +) -> kfp.components.BaseComponent: + """Publishes metrics for components to the Vertex AI Pipeline UI. Args: - task_config_uri (str,): _description_ - component (str,): _description_ + task_config_uri (str): URI to the task config. + component (str): Name of the component to log metrics for. + base_image: The Docker image to be used as the base image for the component. + Returns: - _type_: _description_ + kfp.components.BaseComponent: The component to log metrics. + """ + kfp_component = kfp.dsl.component(_log_eval_metrics_to_ui, base_image=base_image) + return kfp_component(task_config_uri=task_config_uri, component=component_name) + + +def _log_eval_metrics_to_ui( + task_config_uri: str, + component: str, + metrics: kfp.dsl.Output[kfp.dsl.Metrics], +) -> None: + """Publishes metrics for components to the Vertex AI Pipeline UI. + Args: + task_config_uri (str): URI to the task config. + component (str): Name of the component to log metrics for. + metrics (Output[Metrics]): Metrics object to log metrics. Populated by the KFP SDK. """ # This is required to resolve below packages when containerized by KFP. + import json import os import sys @@ -64,8 +77,10 @@ def log_eval_metrics_to_ui( logger.warning( f"Error loading metrics file: {e}, evaluation could have been skipped" ) - return [{"metrics": []}] + return logger.info(f"Got metrics_str: {metrics_str}") - - return [metrics_str] + j = json.loads(metrics_str) + if "metrics" in j: + for metric in j["metrics"]: + metrics.log_metric(metric["name"], metric["numberValue"]) diff --git a/python/gigl/orchestration/kubeflow/utils/resource.py b/python/gigl/orchestration/kubeflow/utils/resource.py index d4cf294..c6826c1 100644 --- a/python/gigl/orchestration/kubeflow/utils/resource.py +++ b/python/gigl/orchestration/kubeflow/utils/resource.py @@ -1,10 +1,10 @@ -from kfp.dsl._container_op import ContainerOp +from kfp.dsl import PipelineTask from gigl.common.types.resource_config import CommonPipelineComponentConfigs def add_task_resource_requirements( - task: ContainerOp, + task: PipelineTask, common_pipeline_component_configs: CommonPipelineComponentConfigs, ): """ @@ -20,6 +20,6 @@ def add_task_resource_requirements( DEFAULT_CPU_REQUEST = "4" DEFAULT_MEMORY_REQUEST = "1Gi" # default to cpu image, overwrite later as needed - task.container.image = common_pipeline_component_configs.cpu_container_image - task.container.set_cpu_request(DEFAULT_CPU_REQUEST) - task.container.set_memory_request(DEFAULT_MEMORY_REQUEST) + task.container_spec.image = common_pipeline_component_configs.cpu_container_image + task.set_cpu_request(DEFAULT_CPU_REQUEST) + task.set_memory_request(DEFAULT_MEMORY_REQUEST) diff --git a/python/gigl/src/common/constants/test_assets.py b/python/gigl/src/common/constants/test_assets.py deleted file mode 100644 index 4214e73..0000000 --- a/python/gigl/src/common/constants/test_assets.py +++ /dev/null @@ -1,139 +0,0 @@ -import gigl.env.dep_constants as dep_constants -from gigl.common import GcsUri, LocalUri -from gigl.src.common.constants.local_fs import get_gigl_root_directory -from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType -from gigl.src.common.utils.bq import BqUtils - -TEST_DATA_GCS_BUCKET = GcsUri(f"gs://{dep_constants.GIGL_TEST_BUCKET_NAME}/") -EXAMPLE_TASK_ASSETS_GCS_PATH = GcsUri.join(TEST_DATA_GCS_BUCKET, "mocked_assets") -EXAMPLE_TASK_ASSETS_BQ_PATH = "external-snap-ci-github-gigl.gbml_mocked_assets" -MOCKED_DATASET_ARTIFACT_METADATA_LOCAL_PATH = LocalUri.join( - get_gigl_root_directory(), - "src", - "mocking", - "lib", - "mocked_dataset_artifact_metadata.json", -) - - -def update_gcs_uri_with_test_assets_and_version(uri_str: str, version: str) -> str: - """ - Replaces the bucket and path of a GCS URI with the test assets bucket and path. - - Example: - input gs://some_bucket_name//data_preprocess/preprocessed_metadata.yaml - output gs://{TEST_DATA_GCS_BUCKET}/mocked_assets///data_preprocess/preprocessed_metadata.yaml - """ - - uri_tokens = uri_str.split("/") - replaced_uri = ( - f"{EXAMPLE_TASK_ASSETS_GCS_PATH}/{version}/{'/'.join(uri_tokens[3:])}" - ) - return replaced_uri - - -def update_bq_table_with_test_assets_and_version(bq_table: str, version: str) -> str: - table_name = bq_table.split(".")[-1] - replaced_table_name = f"{table_name}_{version}" - replaced_bq_table = f"{EXAMPLE_TASK_ASSETS_BQ_PATH}.{replaced_table_name}" - return replaced_bq_table - - -# BQ table paths for node / edge data -def get_example_task_nodes_bq_table_path( - task_name: str, version: str, node_type: NodeType -) -> str: - table_path = BqUtils.join_path( - EXAMPLE_TASK_ASSETS_BQ_PATH, f"{task_name}_{str(node_type)}_nodes_{version}" - ) - return table_path - - -def get_example_task_edges_bq_table_path( - task_name: str, - version: str, - edge_type: EdgeType, - edge_usage_type: EdgeUsageType, -) -> str: - table_path = BqUtils.join_path( - EXAMPLE_TASK_ASSETS_BQ_PATH, - f"{task_name}_{str(edge_type)}_edges_{str(edge_usage_type)}_{version}", - ) - return table_path - - -def get_example_task_static_assets_gcs_dir(task_name: str, version: str) -> GcsUri: - return GcsUri.join(EXAMPLE_TASK_ASSETS_GCS_PATH, f"{version}/", f"{task_name}/") - - -# Preprocessed tfrecord paths for node / edge data - - -def get_example_task_preprocess_gcs_prefix(task_name: str, version: str) -> GcsUri: - return GcsUri.join( - get_example_task_static_assets_gcs_dir(task_name=task_name, version=version), - "data_preprocess", - ) - - -def get_example_task_frozen_gbml_config_gcs_path( - task_name: str, version: str -) -> GcsUri: - return GcsUri.join( - get_example_task_static_assets_gcs_dir(task_name=task_name, version=version), - "frozen_gbml_config.yaml", - ) - - -def get_example_task_node_features_gcs_dir( - task_name: str, version: str, node_type: NodeType -) -> GcsUri: - return GcsUri.join( - get_example_task_preprocess_gcs_prefix(task_name=task_name, version=version), - "node_features_dir", - node_type, - "features/", - ) - - -def get_example_task_node_features_schema_gcs_path( - task_name: str, version: str, node_type: NodeType -) -> GcsUri: - return GcsUri.join( - get_example_task_preprocess_gcs_prefix(task_name=task_name, version=version), - "node_features_dir", - node_type, - "schema.pbtxt", - ) - - -def get_example_task_edge_features_gcs_dir( - task_name: str, - version: str, - edge_type: EdgeType, - edge_usage_type: EdgeUsageType, -) -> GcsUri: - parent_uri = GcsUri.join( - get_example_task_preprocess_gcs_prefix(task_name=task_name, version=version), - "edge_features_dir", - str(edge_type), - ) - return GcsUri.join(parent_uri, f"{str(edge_usage_type)}_edges", "features/") - - -def get_example_task_edge_features_schema_gcs_path( - task_name: str, - version: str, - edge_type: EdgeType, - edge_usage_type: EdgeUsageType, -) -> GcsUri: - parent_uri = GcsUri.join( - get_example_task_preprocess_gcs_prefix(task_name=task_name, version=version), - "edge_features_dir", - str(edge_type), - ) - return GcsUri.join( - parent_uri, - f"{str(edge_usage_type)}_edges", - "schema.pbtxt", - ) diff --git a/python/gigl/src/common/graph_builder/pyg_graph_data.py b/python/gigl/src/common/graph_builder/pyg_graph_data.py index 149dc18..e080f70 100644 --- a/python/gigl/src/common/graph_builder/pyg_graph_data.py +++ b/python/gigl/src/common/graph_builder/pyg_graph_data.py @@ -21,9 +21,9 @@ def __init__(self, **kwargs) -> None: super().__init__( **kwargs, ) - self.__global_node_to_subgraph_node_mapping: FrozenDict[Node, Node] = ( - FrozenDict({}) - ) + self.__global_node_to_subgraph_node_mapping: FrozenDict[ + Node, Node + ] = FrozenDict({}) self.__subgraph_node_to_global_node_mapping: Optional[ FrozenDict[Node, Node] ] = None diff --git a/python/gigl/src/common/modeling_task_specs/__init__.py b/python/gigl/src/common/modeling_task_specs/__init__.py index 468a3eb..e69de29 100644 --- a/python/gigl/src/common/modeling_task_specs/__init__.py +++ b/python/gigl/src/common/modeling_task_specs/__init__.py @@ -1,10 +0,0 @@ -# autoflake: skip_file -""" -Task specs for training / inferring on models. -- All trainer implementations should subclass gigl.src.training.v1.lib.base_trainer.BaseTrainer -- All inferencer implementations should subclass from gigl.src.inference.v1.lib.base_inferencer.BaseInferencer -""" - -from gigl.src.common.modeling_task_specs.node_anchor_based_link_prediction_modeling_task_spec import NodeAnchorBasedLinkPredictionModelingTaskSpec -from gigl.src.common.modeling_task_specs.node_classification_modeling_task_spec import NodeClassificationModelingTaskSpec -from gigl.src.common.modeling_task_specs.graphsage_template_modeling_spec import GraphSageTemplateTrainerSpec diff --git a/python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py b/python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py index 813ca6a..ebc9578 100644 --- a/python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py +++ b/python/gigl/src/common/modeling_task_specs/node_anchor_based_link_prediction_modeling_task_spec.py @@ -70,8 +70,8 @@ def __init__(self, **kwargs) -> None: super().__init__() # Model Arguments - # Supported internal homogeneous models can be found in gigl.src.common.models.pyg.homogeneous.py - # Supported internal heterogeneous models can be found in gigl.src.common.models.pyg.heterogeneous.py + # Supported homogeneous models can be found in gigl.src.common.models.pyg.homogeneous + # Supported heterogeneous models can be found in gigl.src.common.models.pyg.heterogeneous gnn_model_class_path = str( kwargs.get( "gnn_model_class_path", @@ -235,12 +235,12 @@ def init_model( preprocessed_metadata_pb_wrapper: PreprocessedMetadataPbWrapper = ( gbml_config_pb_wrapper.preprocessed_metadata_pb_wrapper ) - condensed_node_type_to_feat_dim_map: Dict[CondensedNodeType, int] = ( - preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_dim_map - ) - condensed_edge_type_to_feat_dim_map: Dict[CondensedEdgeType, int] = ( - preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_dim_map - ) + condensed_node_type_to_feat_dim_map: Dict[ + CondensedNodeType, int + ] = preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_dim_map + condensed_edge_type_to_feat_dim_map: Dict[ + CondensedEdgeType, int + ] = preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_dim_map encoder_model: nn.Module if gbml_config_pb_wrapper.graph_metadata_pb_wrapper.is_heterogeneous: node_type_to_feat_dim_map: Dict[NodeType, int] = { diff --git a/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py b/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py index b58ab8d..06ca19a 100644 --- a/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py +++ b/python/gigl/src/common/modeling_task_specs/node_classification_modeling_task_spec.py @@ -126,9 +126,9 @@ def setup_for_training(self): lr=self.__optim_lr, weight_decay=self.__optim_weight_decay, ) - self._train_loss_fn: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = ( - lambda input, target: F.cross_entropy(input=input, target=target) - ) + self._train_loss_fn: Callable[ + [torch.Tensor, torch.Tensor], torch.Tensor + ] = lambda input, target: F.cross_entropy(input=input, target=target) self.model.train() def _train( diff --git a/python/gigl/src/common/modeling_task_specs/utils/infer.py b/python/gigl/src/common/modeling_task_specs/utils/infer.py index 3fbb0ee..1095397 100644 --- a/python/gigl/src/common/modeling_task_specs/utils/infer.py +++ b/python/gigl/src/common/modeling_task_specs/utils/infer.py @@ -119,9 +119,9 @@ def infer_task_inputs( _pos_embeddings: Dict[CondensedEdgeType, List[torch.FloatTensor]] = defaultdict( list ) - _hard_neg_embeddings: Dict[CondensedEdgeType, List[torch.FloatTensor]] = ( - defaultdict(list) - ) + _hard_neg_embeddings: Dict[ + CondensedEdgeType, List[torch.FloatTensor] + ] = defaultdict(list) _positive_ids: Dict[CondensedEdgeType, List[torch.LongTensor]] = defaultdict(list) _hard_neg_ids: Dict[CondensedEdgeType, List[torch.LongTensor]] = defaultdict(list) @@ -174,9 +174,9 @@ def infer_task_inputs( device=device, ) - main_batch_node_id_mapping: Dict[CondensedNodeType, Dict[NodeId, NodeId]] = ( - main_batch.condensed_node_type_to_subgraph_id_to_global_node_id - ) + main_batch_node_id_mapping: Dict[ + CondensedNodeType, Dict[NodeId, NodeId] + ] = main_batch.condensed_node_type_to_subgraph_id_to_global_node_id random_negative_batch_node_id_mapping: Dict[ CondensedNodeType, Dict[NodeId, NodeId] ] = random_neg_batch.condensed_node_type_to_subgraph_id_to_global_node_id @@ -430,15 +430,15 @@ def infer_task_inputs( else torch.tensor([]) ) - batch_combined_scores[condensed_supervision_edge_type] = ( - BatchCombinedScores( - repeated_candidate_scores=repeated_candidate_scores, - positive_ids=global_positive_ids, # type: ignore - hard_neg_ids=global_hard_neg_ids, # type: ignore - random_neg_ids=global_random_neg_ids, # type: ignore - repeated_query_ids=repeated_global_query_ids, # type: ignore - num_unique_query_ids=main_batch_root_node_indices.shape[0], - ) + batch_combined_scores[ + condensed_supervision_edge_type + ] = BatchCombinedScores( + repeated_candidate_scores=repeated_candidate_scores, + positive_ids=global_positive_ids, # type: ignore + hard_neg_ids=global_hard_neg_ids, # type: ignore + random_neg_ids=global_random_neg_ids, # type: ignore + repeated_query_ids=repeated_global_query_ids, # type: ignore + num_unique_query_ids=main_batch_root_node_indices.shape[0], ) # Populate all computed embeddings for task input diff --git a/python/gigl/src/common/models/layers/count_min_sketch.py b/python/gigl/src/common/models/layers/count_min_sketch.py index be6a5a7..8ab3ea3 100644 --- a/python/gigl/src/common/models/layers/count_min_sketch.py +++ b/python/gigl/src/common/models/layers/count_min_sketch.py @@ -1,4 +1,3 @@ -import random from typing import Any, List import numpy as np @@ -41,12 +40,9 @@ def hash_i(x: Any, i: int) -> int: """ Return the hash value of the item for the i-th hash function """ - random.seed(i) - # mask is deterministic for the same i - mask = random.getrandbits(32) - input_bytes = (str(x) + str(i)).encode("utf-8") - # Note that python built-in hash function is not deterministic across different runs - return hash(input_bytes) ^ mask + # Note that python built-in hash function is not deterministic across different processes for many types + # So we should be careful to only use the CMS in the same process + return hash((x, i)) return [hash_i(item, i) for i in range(self.__depth)] diff --git a/python/gigl/src/common/models/pyg/heterogeneous.py b/python/gigl/src/common/models/pyg/heterogeneous.py index 922cafd..042163b 100644 --- a/python/gigl/src/common/models/pyg/heterogeneous.py +++ b/python/gigl/src/common/models/pyg/heterogeneous.py @@ -1,4 +1,4 @@ -from typing import Dict, List +from typing import Dict, List, Optional import torch import torch_geometric.data @@ -9,6 +9,7 @@ from gigl.src.common.models.layers.normalization import l2_normalize_embeddings from gigl.src.common.models.pyg.nn.conv.hgt_conv import HGTConv from gigl.src.common.models.pyg.nn.conv.simplehgn_conv import SimpleHGNConv +from gigl.src.common.models.pyg.nn.models.feature_embedding import FeatureEmbeddingLayer from gigl.src.common.models.utils.torch import to_hetero_feat from gigl.src.common.types.graph_data import EdgeType, NodeType @@ -37,6 +38,9 @@ def __init__( num_layers: int = 2, num_heads: int = 2, should_l2_normalize_embedding_layer_output: bool = False, + feature_embedding_layers: Optional[ + Dict[NodeType, FeatureEmbeddingLayer] + ] = None, **kwargs, ): super().__init__() @@ -62,6 +66,8 @@ def __init__( should_l2_normalize_embedding_layer_output ) + self.feature_embedding_layers = feature_embedding_layers + def forward( self, data: torch_geometric.data.hetero_data.HeteroData, @@ -76,23 +82,40 @@ def forward( Returns: Dict[NodeType, torch.Tensor]: Dictionary with node types as keys and output tensors as values. """ - x_dict = { + node_type_to_features_dict = data.x_dict + + if self.feature_embedding_layers: + node_type_to_features_dict = { + node_type: self.feature_embedding_layers[node_type](x) + if node_type in self.feature_embedding_layers + else x + for node_type, x in node_type_to_features_dict.items() + } + + node_type_to_features_dict = { node_type: self.lin_dict[node_type](x).relu_() - for node_type, x in data.x_dict.items() + for node_type, x in node_type_to_features_dict.items() } + for conv in self.convs: - x_dict = conv(x_dict, data.edge_index_dict) + node_type_to_features_dict = conv( + node_type_to_features_dict, data.edge_index_dict + ) + node_typed_embeddings: Dict[NodeType, torch.Tensor] = {} + for node_type in output_node_types: node_typed_embeddings[node_type] = ( - self.lin(x_dict[node_type]) - if node_type in x_dict + self.lin(node_type_to_features_dict[node_type]) + if node_type in node_type_to_features_dict else torch.FloatTensor([]).to(device=device) ) + if self.should_l2_normalize_embedding_layer_output: node_typed_embeddings = l2_normalize_embeddings( # type: ignore node_typed_embeddings=node_typed_embeddings ) + return node_typed_embeddings diff --git a/python/gigl/src/common/translators/training_samples_protos_translator.py b/python/gigl/src/common/translators/training_samples_protos_translator.py index e8118ec..5a6e87e 100644 --- a/python/gigl/src/common/translators/training_samples_protos_translator.py +++ b/python/gigl/src/common/translators/training_samples_protos_translator.py @@ -121,11 +121,11 @@ def training_samples_from_NodeAnchorBasedLinkPredictionSamplePb( # this or filter those out in Split Generator. for pos_edge_pb in sample.pos_edges: - pos_edge: Tuple[Edge, Optional[torch.Tensor]] = ( - GbmlProtosTranslator.edge_from_EdgePb( - graph_metadata_pb_wrapper=graph_metadata_pb_wrapper, - edge_pb=pos_edge_pb, - ) + pos_edge: Tuple[ + Edge, Optional[torch.Tensor] + ] = GbmlProtosTranslator.edge_from_EdgePb( + graph_metadata_pb_wrapper=graph_metadata_pb_wrapper, + edge_pb=pos_edge_pb, ) node_id = pos_edge[0].dst_node.id condensed_edge_type = ( @@ -146,11 +146,11 @@ def training_samples_from_NodeAnchorBasedLinkPredictionSamplePb( ) for hard_neg_edge_pb in sample.hard_neg_edges: - hard_neg_edge: Tuple[Edge, Optional[torch.Tensor]] = ( - GbmlProtosTranslator.edge_from_EdgePb( - graph_metadata_pb_wrapper=graph_metadata_pb_wrapper, - edge_pb=hard_neg_edge_pb, - ) + hard_neg_edge: Tuple[ + Edge, Optional[torch.Tensor] + ] = GbmlProtosTranslator.edge_from_EdgePb( + graph_metadata_pb_wrapper=graph_metadata_pb_wrapper, + edge_pb=hard_neg_edge_pb, ) node_id = hard_neg_edge[0].dst_node.id condensed_edge_type = ( @@ -172,43 +172,43 @@ def training_samples_from_NodeAnchorBasedLinkPredictionSamplePb( ) for condensed_edge_type in graph_metadata_pb_wrapper.condensed_edge_types: - condensed_edge_type_to_supervision_edge_data[condensed_edge_type] = ( - NodeAnchorBasedLinkPredictionSample.SampleSupervisionEdgeData( - pos_nodes=condensed_supervision_edge_type_to_pos_nodes[ - condensed_edge_type - ], - hard_neg_nodes=condensed_supervision_edge_type_to_hard_neg_nodes[ - condensed_edge_type - ], - pos_edge_features=( - torch.stack( # type: ignore - condensed_supervision_edge_type_to_pos_edge_feats[ # type: ignore - condensed_edge_type - ] - ) - if len( - condensed_supervision_edge_type_to_pos_edge_feats[ - condensed_edge_type - ] - ) - > 0 - else None - ), - hard_neg_edge_features=( - torch.stack( # type: ignore - condensed_supervision_edge_type_to_hard_neg_edge_feats[ # type: ignore - condensed_edge_type - ] - ) - if len( - condensed_supervision_edge_type_to_hard_neg_edge_feats[ - condensed_edge_type - ] - ) - > 0 - else None - ), - ) + condensed_edge_type_to_supervision_edge_data[ + condensed_edge_type + ] = NodeAnchorBasedLinkPredictionSample.SampleSupervisionEdgeData( + pos_nodes=condensed_supervision_edge_type_to_pos_nodes[ + condensed_edge_type + ], + hard_neg_nodes=condensed_supervision_edge_type_to_hard_neg_nodes[ + condensed_edge_type + ], + pos_edge_features=( + torch.stack( # type: ignore + condensed_supervision_edge_type_to_pos_edge_feats[ # type: ignore + condensed_edge_type + ] + ) + if len( + condensed_supervision_edge_type_to_pos_edge_feats[ + condensed_edge_type + ] + ) + > 0 + else None + ), + hard_neg_edge_features=( + torch.stack( # type: ignore + condensed_supervision_edge_type_to_hard_neg_edge_feats[ # type: ignore + condensed_edge_type + ] + ) + if len( + condensed_supervision_edge_type_to_hard_neg_edge_feats[ + condensed_edge_type + ] + ) + > 0 + else None + ), ) training_samples.append( diff --git a/python/gigl/src/common/types/model.py b/python/gigl/src/common/types/model.py index 0ce99cc..3bcab98 100644 --- a/python/gigl/src/common/types/model.py +++ b/python/gigl/src/common/types/model.py @@ -9,16 +9,19 @@ @runtime_checkable class BaseModelOperationsProtocol(Protocol): @property - def model(self) -> torch.nn.Module: ... + def model(self) -> torch.nn.Module: + ... @model.setter - def model(self, model: torch.nn.Module) -> None: ... + def model(self, model: torch.nn.Module) -> None: + ... def init_model( self, gbml_config_pb_wrapper: GbmlConfigPbWrapper, state_dict: Optional[OrderedDict[str, torch.Tensor]] = None, - ) -> torch.nn.Module: ... + ) -> torch.nn.Module: + ... class GraphBackend(str, Enum): @@ -31,4 +34,5 @@ class GnnModel(Protocol): """ @property - def graph_backend(self) -> GraphBackend: ... + def graph_backend(self) -> GraphBackend: + ... diff --git a/python/gigl/src/common/types/pb_wrappers/dataset_metadata_utils.py b/python/gigl/src/common/types/pb_wrappers/dataset_metadata_utils.py index dfb84a7..2668556 100644 --- a/python/gigl/src/common/types/pb_wrappers/dataset_metadata_utils.py +++ b/python/gigl/src/common/types/pb_wrappers/dataset_metadata_utils.py @@ -133,12 +133,12 @@ def _load_dataloaders_from_config( if data_loader_type in self.dataloaders: dataloaders[data_loader_type] = self.dataloaders[data_loader_type] continue - self.dataloaders[data_loader_type] = ( - SupervisedNodeClassificationBatch.get_default_data_loader( - gbml_config_pb_wrapper=gbml_config_pb_wrapper, - graph_builder=graph_builder, - config=config, - ) + self.dataloaders[ + data_loader_type + ] = SupervisedNodeClassificationBatch.get_default_data_loader( + gbml_config_pb_wrapper=gbml_config_pb_wrapper, + graph_builder=graph_builder, + config=config, ) dataloaders[data_loader_type] = self.dataloaders[data_loader_type] return dataloaders @@ -316,21 +316,21 @@ def _load_dataloaders_from_config( continue # If we hae a list of uris, we are getting a main batch dataloader if isinstance(config.uris, list): - self.dataloaders[data_loader_type] = ( - NodeAnchorBasedLinkPredictionBatch.get_default_data_loader( - gbml_config_pb_wrapper=gbml_config_pb_wrapper, - graph_builder=graph_builder, - config=config, - ) + self.dataloaders[ + data_loader_type + ] = NodeAnchorBasedLinkPredictionBatch.get_default_data_loader( + gbml_config_pb_wrapper=gbml_config_pb_wrapper, + graph_builder=graph_builder, + config=config, ) # If we have a dictionary of uris, we are getting a rooted node neighborhood dataloader else: - self.dataloaders[data_loader_type] = ( - RootedNodeNeighborhoodBatch.get_default_data_loader( - gbml_config_pb_wrapper=gbml_config_pb_wrapper, - graph_builder=graph_builder, - config=config, - ) + self.dataloaders[ + data_loader_type + ] = RootedNodeNeighborhoodBatch.get_default_data_loader( + gbml_config_pb_wrapper=gbml_config_pb_wrapper, + graph_builder=graph_builder, + config=config, ) dataloaders[data_loader_type] = self.dataloaders[data_loader_type] return dataloaders diff --git a/python/gigl/src/common/types/pb_wrappers/gbml_config.py b/python/gigl/src/common/types/pb_wrappers/gbml_config.py index 7473cd9..688510c 100644 --- a/python/gigl/src/common/types/pb_wrappers/gbml_config.py +++ b/python/gigl/src/common/types/pb_wrappers/gbml_config.py @@ -428,3 +428,21 @@ def should_use_experimental_glt_backend(self) -> bool: ) ) ) + + @property + def should_populate_predictions_path(self) -> bool: + """ + Allows access to should_populate_predictions_path under GbmlConfig + + This flag is a temporary workaround to populate the extra embeddings for the same entity type + + Returns: + bool: Whether to populate predictions path in the InferenceOutput for each entity type + """ + return bool( + strtobool( + dict(self.gbml_config_pb.feature_flags).get( + "should_populate_predictions_path", "False" + ) + ) + ) diff --git a/python/gigl/src/common/types/pb_wrappers/graph_metadata.py b/python/gigl/src/common/types/pb_wrappers/graph_metadata.py index 66bfb52..43bea9f 100644 --- a/python/gigl/src/common/types/pb_wrappers/graph_metadata.py +++ b/python/gigl/src/common/types/pb_wrappers/graph_metadata.py @@ -96,6 +96,50 @@ def condensed_edge_type_to_condensed_node_types( return self.__condensed_edge_type_to_condensed_node_types + @property + def homogeneous_node_type(self) -> NodeType: + """ + Returns the singular node type for a homogeneous graph. This property should only be called if the graph is known to be homogeneous. + """ + if len(self.node_types) != 1: + raise ValueError( + f"Found node types {self.node_types}, expected one node type for homogeneous use cases" + ) + return self.node_types[0] + + @property + def homogeneous_condensed_node_type(self) -> CondensedNodeType: + """ + Returns the singular condensed node type for a homogeneous graph. This property should only be called if the graph is known to be homogeneous. + """ + if len(self.condensed_node_types) != 1: + raise ValueError( + f"Found condensed node types {self.condensed_node_types}, expected one condensed node type." + ) + return self.condensed_node_types[0] + + @property + def homogeneous_edge_type(self) -> EdgeType: + """ + Returns the singular edge type for a homogeneous graph. This property should only be called if the graph is known to be homogeneous. + """ + if len(self.edge_types) != 1: + raise ValueError( + f"Found edge types {self.edge_types}, expected one edge type for homogeneous use cases" + ) + return self.edge_types[0] + + @property + def homogeneous_condensed_edge_type(self) -> CondensedEdgeType: + """ + Returns the singular condensed edge type for a homogeneous graph. This property should only be called if the graph is known to be homogeneous. + """ + if len(self.condensed_edge_types) != 1: + raise ValueError( + f"Found condensed edge types {self.condensed_edge_types}, expected one condensed edge type for homogeneous use cases" + ) + return self.condensed_edge_types[0] + @property # type: ignore @lru_cache(maxsize=1) def condensed_node_type_to_node_type_map(self) -> Dict[CondensedNodeType, NodeType]: diff --git a/python/gigl/src/common/types/pb_wrappers/preprocessed_metadata.py b/python/gigl/src/common/types/pb_wrappers/preprocessed_metadata.py index 2d1a053..f4cd0e7 100644 --- a/python/gigl/src/common/types/pb_wrappers/preprocessed_metadata.py +++ b/python/gigl/src/common/types/pb_wrappers/preprocessed_metadata.py @@ -43,9 +43,9 @@ class PreprocessedMetadataPbWrapper: CondensedEdgeType, FeatureSchema ] = field(init=False) - _condensed_edge_type_to_pos_edge_feature_dim_map: Dict[CondensedEdgeType, int] = ( - field(init=False) - ) + _condensed_edge_type_to_pos_edge_feature_dim_map: Dict[ + CondensedEdgeType, int + ] = field(init=False) _condensed_edge_type_to_pos_edge_feature_schema_map: Dict[ CondensedEdgeType, FeatureSchema ] = field(init=False) diff --git a/python/gigl/src/common/types/pb_wrappers/subgraph_sampling_strategy.py b/python/gigl/src/common/types/pb_wrappers/subgraph_sampling_strategy.py index 82adbf3..997eb01 100644 --- a/python/gigl/src/common/types/pb_wrappers/subgraph_sampling_strategy.py +++ b/python/gigl/src/common/types/pb_wrappers/subgraph_sampling_strategy.py @@ -42,9 +42,9 @@ def __post_init__(self): error_type=SubgraphSamplingValidationErrorType.REPEATED_OP_NAME, ) - op_name_to_sampling_op_pb_wrapper[sampling_op_pb.op_name] = ( - cur_sampling_op_pb_wrapper - ) + op_name_to_sampling_op_pb_wrapper[ + sampling_op_pb.op_name + ] = cur_sampling_op_pb_wrapper is_root_sampling_op_node = ( len(cur_sampling_op_pb_wrapper.input_op_names) == 0 @@ -116,9 +116,9 @@ def __post_init__(self): message=f"Found repeated root node type {root_node_type} when constructing message passing paths, please ensure each MessagePassingPath root node type is unique.", error_type=SubgraphSamplingValidationErrorType.REPEATED_ROOT_NODE_TYPE, ) - root_node_type_to_message_passing_path_pb_wrapper[root_node_type] = ( - message_passing_path_pb_wrapper - ) + root_node_type_to_message_passing_path_pb_wrapper[ + root_node_type + ] = message_passing_path_pb_wrapper self.__root_node_type_to_message_passing_path_pb_wrapper = ( root_node_type_to_message_passing_path_pb_wrapper @@ -195,9 +195,9 @@ def validate_dags( task_metadata_pb_wrapper = TaskMetadataPbWrapper( task_metadata_pb=task_metadata_pb ) - expected_root_node_types: Set[NodeType] = ( - task_metadata_pb_wrapper.get_task_root_node_types() - ) + expected_root_node_types: Set[ + NodeType + ] = task_metadata_pb_wrapper.get_task_root_node_types() graph_edge_types = graph_metadata_pb_wrapper.edge_types graph_node_types = graph_metadata_pb_wrapper.node_types diff --git a/python/gigl/src/common/utils/bq.py b/python/gigl/src/common/utils/bq.py index a094ab4..2029269 100644 --- a/python/gigl/src/common/utils/bq.py +++ b/python/gigl/src/common/utils/bq.py @@ -113,6 +113,13 @@ def count_number_of_rows_in_bq_table( n_rows = row["ct"] return n_rows + def count_number_of_columns_in_bq_table( + self, + bq_table: str, + ) -> int: + schema = self.fetch_bq_table_schema(bq_table=bq_table) + return len(schema.keys()) + def run_query( self, query, @@ -174,6 +181,25 @@ def join_path(path: str, *paths) -> str: assert joined_path.count(".") <= 2, f"Invalid BQ path: {joined_path}" return BqUtils.format_bq_path(joined_path) + @staticmethod + def parse_bq_table_path(bq_table_path: str) -> Tuple[str, str, str]: + """ + Parses a joined bq table path into its project, dataset, and table names + Args: + bq_table_path (str): Joined bq table path of format `project.dataset.table` + Returns: + bq_project_id (str): Parsed BQ Project ID + bq_dataset_id (str): Parsed Dataset ID + bq_table_name (str): Parsed Table Name + """ + split_bq_table_path = BqUtils.format_bq_path(bq_table_path).split(".") + assert ( + len(split_bq_table_path) == 3 + ), "bqtable_path should be in the format project.dataset.table" + bq_project_id, bq_dataset_id, bq_table_name = split_bq_table_path + + return bq_project_id, bq_dataset_id, bq_table_name + def update_bq_dataset_retention( self, bq_dataset_path: str, @@ -407,3 +433,40 @@ def check_columns_exist_in_table( raise ValueError(f"Fields {missing_fields} missing from table {bq_table}.") else: logger.info(f"All requisite fields found in table {bq_table}") + + def export_to_gcs( + self, + bq_table_path: str, + destination_gcs_uri: GcsUri, + destination_format: str = "NEWLINE_DELIMITED_JSON", + ) -> None: + """ + Export a BigQuery table to Google Cloud Storage. + + Args: + bq_table_path (str): The full BigQuery table path to export. + destination_gcs_uri (str): The destination GCS URI where the table will be exported. + If the gcs uri has * in it, the table will be exported to multiple shards. + destination_format (str, optional): The format of the exported data. Defaults to 'NEWLINE_DELIMITED_JSON'. + 'CSV', 'AVRO', 'PARQUET' also available. + """ + try: + job_config = bigquery.job.ExtractJobConfig() + job_config.destination_format = destination_format + + extract_job = self.__bq_client.extract_table( + source=bigquery.TableReference.from_string(bq_table_path), + destination_uris=destination_gcs_uri.uri, + job_config=job_config, + ) + + logger.info( + f"Exporting `{bq_table_path}` to {destination_gcs_uri} with format '{destination_format}'..." + ) + extract_job.result() # Waits for job to complete. + logger.info( + f"Exported `{bq_table_path}` to {destination_gcs_uri} successfully." + ) + except Exception as e: + logger.exception(f"Failed to export table to GCS.") + raise e diff --git a/python/gigl/src/common/utils/dataflow.py b/python/gigl/src/common/utils/dataflow.py index 232566a..9406f9d 100644 --- a/python/gigl/src/common/utils/dataflow.py +++ b/python/gigl/src/common/utils/dataflow.py @@ -107,6 +107,21 @@ def init_beam_pipeline_options( google_cloud_options.region = ( google_cloud_options.region or get_resource_config().region ) + + # For context see: https://cloud.google.com/dataflow/docs/reference/service-options#python + # This is different than how `num_workers` is leveraged by dataflow in the default `PipelineOptions` exposed by beam. + # i.e. simply setting `num_workers` in `PipelineOptions`, the dataflow service still may downscale to 1 worker. + # vs. setting `min_num_workers` in `dataflow_service_options` explicitly will ensure that the service will not downscale below + # that number. + if kwargs.get("num_workers"): + num_workers = kwargs.get("num_workers") + logger.info( + f"Setting `min_num_workers` for Dataflow explicitly to {num_workers}" + ) + dataflow_service_options = google_cloud_options.dataflow_service_options or [] + dataflow_service_options.append(f"min_num_workers={num_workers}") + google_cloud_options.dataflow_service_options = dataflow_service_options + google_cloud_options.service_account_email = ( google_cloud_options.service_account_email or (get_resource_config().service_account_email) diff --git a/python/gigl/src/common/utils/file_loader.py b/python/gigl/src/common/utils/file_loader.py index cf7013b..e762437 100644 --- a/python/gigl/src/common/utils/file_loader.py +++ b/python/gigl/src/common/utils/file_loader.py @@ -1,4 +1,5 @@ import tempfile +from collections.abc import Mapping from tempfile import _TemporaryFileWrapper as TemporaryFileWrapper # type: ignore from typing import Dict, List, Optional, Sequence, Tuple, Type, Union, cast @@ -28,7 +29,7 @@ def __init__(self, project: Optional[str] = None): @staticmethod def __get_uri_map_schema( - uri_map: Dict[Uri, Uri] + uri_map: Mapping[Uri, Uri] ) -> Tuple[Optional[Type[Uri]], Optional[Type[Uri]]]: uniform_src_type: Optional[Type[Uri]] = None uniform_dst_type: Optional[Type[Uri]] = None @@ -102,7 +103,7 @@ def load_directory(self, dir_uri_src: Uri, dir_uri_dst: Uri): def load_files( self, - source_to_dest_file_uri_map: Dict[Uri, Uri], + source_to_dest_file_uri_map: Mapping[Uri, Uri], should_create_symlinks_if_possible: bool = True, ) -> None: uri_map_schema = self.__get_uri_map_schema(uri_map=source_to_dest_file_uri_map) @@ -216,7 +217,7 @@ def count_assets(self, uri_prefix: Uri, suffix: Optional[str] = None) -> int: def does_uri_exist(self, uri: Union[str, Uri]) -> bool: """"" Check if a URI exists - + Args: uri (Union[str, Uri]): uri to check Returns: diff --git a/python/gigl/src/common/utils/spark_job_manager.py b/python/gigl/src/common/utils/spark_job_manager.py index 163a477..84cdb28 100644 --- a/python/gigl/src/common/utils/spark_job_manager.py +++ b/python/gigl/src/common/utils/spark_job_manager.py @@ -15,7 +15,7 @@ ) from google.protobuf.duration_pb2 import Duration -from gigl.common import Uri, GcsUri +from gigl.common import GcsUri, Uri from gigl.common.logger import Logger from gigl.common.services.dataproc import DataprocService @@ -54,6 +54,9 @@ def create_dataproc_cluster( metadata = {} if cluster_init_data.init_script_uri is not None: + logger.info( + f"Adding node init action to run following executable on every node: {cluster_init_data.init_script_uri}" + ) init_action = NodeInitializationAction( executable_file=cluster_init_data.init_script_uri, execution_timeout=Duration(seconds=300), # 5 mins diff --git a/python/gigl/src/config_populator/config_populator.py b/python/gigl/src/config_populator/config_populator.py index 4b47064..962b768 100644 --- a/python/gigl/src/config_populator/config_populator.py +++ b/python/gigl/src/config_populator/config_populator.py @@ -16,6 +16,7 @@ from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.types.dataset_split import DatasetSplit from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation +from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper from gigl.src.common.types.pb_wrappers.task_metadata import TaskMetadataPbWrapper from gigl.src.common.types.task_metadata import TaskMetadataType from gigl.src.common.utils.metrics_service_provider import ( @@ -331,6 +332,9 @@ def __populate_inference_metadata_pb( str, inference_metadata_pb2.InferenceOutput ] = {} inferencer_node_types = self.task_metadata_pb_wrapper.get_task_root_node_types() + template_gbml_config_pb_wrapper = GbmlConfigPbWrapper( + gbml_config_pb=self.template_gbml_config + ) for node_type in inferencer_node_types: embeddings_path = bq_constants.get_embeddings_table( applied_task_identifier=self.applied_task_identifier, @@ -341,7 +345,10 @@ def __populate_inference_metadata_pb( if ( self.task_metadata_pb_wrapper.task_metadata_type == TaskMetadataType.NODE_BASED_TASK + or template_gbml_config_pb_wrapper.should_populate_predictions_path ): + # TODO: currently, we are overloading the predictions path to store extra embeddings. + # consider extending InferenceOutput's definition for this purpose. predictions_path = bq_constants.get_predictions_table( applied_task_identifier=self.applied_task_identifier, node_type=node_type, diff --git a/python/gigl/src/data_preprocessor/data_preprocessor.py b/python/gigl/src/data_preprocessor/data_preprocessor.py index 50e64e8..584eeb2 100644 --- a/python/gigl/src/data_preprocessor/data_preprocessor.py +++ b/python/gigl/src/data_preprocessor/data_preprocessor.py @@ -4,8 +4,9 @@ import threading from collections import defaultdict from itertools import chain, repeat -from typing import Dict, Iterable, List, NamedTuple, Optional, Tuple, Union +from typing import Callable, Dict, Iterable, List, NamedTuple, Optional, Tuple, Union +import tensorflow as tf import tensorflow_data_validation as tfdv import tensorflow_transform as tft from apache_beam.runners.dataflow.dataflow_runner import DataflowPipelineResult @@ -48,6 +49,10 @@ EnumeratorEdgeTypeMetadata, EnumeratorNodeTypeMetadata, ) +from gigl.src.data_preprocessor.lib.ingest.bigquery import ( + BigqueryEdgeDataReference, + BigqueryNodeDataReference, +) from gigl.src.data_preprocessor.lib.ingest.reference import ( DataReference, EdgeDataReference, @@ -57,8 +62,10 @@ TransformedFeaturesInfo, ) from gigl.src.data_preprocessor.lib.types import ( + DEFAULT_TF_INT_DTYPE, EdgeDataPreprocessingSpec, EdgeOutputIdentifier, + FeatureSpecDict, NodeDataPreprocessingSpec, NodeOutputIdentifier, ) @@ -375,9 +382,9 @@ def __build_data_reference_str(references: Iterable[DataReference]) -> str: for data_ref_and_prep_spec, feature_type in zip( data_ref_and_prep_specs, feature_types ): - data_ref: Union[NodeDataReference, EdgeDataReference] = ( - data_ref_and_prep_spec[0] - ) + data_ref: Union[ + NodeDataReference, EdgeDataReference + ] = data_ref_and_prep_spec[0] prep_spec: Union[ NodeDataPreprocessingSpec, EdgeDataPreprocessingSpec ] = data_ref_and_prep_spec[1] @@ -456,11 +463,9 @@ def generate_preprocessed_metadata_pb( f"Adding to preprocessed metadata pb: [{node_data_ref}: {node_transformed_features_info}]" ) - condensed_node_type: CondensedNodeType = ( - self.gbml_config_pb_wrapper.graph_metadata_pb_wrapper.node_type_to_condensed_node_type_map[ - node_type - ] - ) + condensed_node_type: CondensedNodeType = self.gbml_config_pb_wrapper.graph_metadata_pb_wrapper.node_type_to_condensed_node_type_map[ + node_type + ] node_identifier_output = node_transformed_features_info.identifier_output assert isinstance( node_identifier_output, NodeOutputIdentifier @@ -516,25 +521,25 @@ def generate_preprocessed_metadata_pb( edge_type, edge_transformed_features_info_map, ) in preprocessed_metadata_references_map.items(): - positive_transformed_features_info: Optional[TransformedFeaturesInfo] = ( - edge_transformed_features_info_map.get(EdgeUsageType.POSITIVE, None) - ) - negative_transformed_features_info: Optional[TransformedFeaturesInfo] = ( - edge_transformed_features_info_map.get(EdgeUsageType.NEGATIVE, None) - ) - main_transformed_features_info: Optional[TransformedFeaturesInfo] = ( - edge_transformed_features_info_map.get(EdgeUsageType.MAIN, None) - ) + positive_transformed_features_info: Optional[ + TransformedFeaturesInfo + ] = edge_transformed_features_info_map.get(EdgeUsageType.POSITIVE, None) + negative_transformed_features_info: Optional[ + TransformedFeaturesInfo + ] = edge_transformed_features_info_map.get(EdgeUsageType.NEGATIVE, None) + main_transformed_features_info: Optional[ + TransformedFeaturesInfo + ] = edge_transformed_features_info_map.get(EdgeUsageType.MAIN, None) assert ( main_transformed_features_info is not None ), f"Main edge data must be present for edge type {edge_type}." - positive_enumerated_edge_metadata: Optional[EnumeratorEdgeTypeMetadata] = ( - None - ) - negative_enumerated_edge_metadata: Optional[EnumeratorEdgeTypeMetadata] = ( - None - ) + positive_enumerated_edge_metadata: Optional[ + EnumeratorEdgeTypeMetadata + ] = None + negative_enumerated_edge_metadata: Optional[ + EnumeratorEdgeTypeMetadata + ] = None main_enumerated_edge_metadata: Optional[EnumeratorEdgeTypeMetadata] = None if positive_transformed_features_info: positive_enumerated_edge_metadata = enumerator_edge_type_metadata_map[ @@ -548,11 +553,9 @@ def generate_preprocessed_metadata_pb( edge_type ][EdgeUsageType.MAIN] - condensed_edge_type: CondensedEdgeType = ( - self.gbml_config_pb_wrapper.graph_metadata_pb_wrapper.edge_type_to_condensed_edge_type_map[ - edge_type - ] - ) + condensed_edge_type: CondensedEdgeType = self.gbml_config_pb_wrapper.graph_metadata_pb_wrapper.edge_type_to_condensed_edge_type_map[ + edge_type + ] assert isinstance( main_transformed_features_info.identifier_output, EdgeOutputIdentifier ), f"Identifier output should be of class {EdgeOutputIdentifier.__name__}." @@ -637,6 +640,120 @@ def __validate_data_references_map_to_graph_metadata(self) -> None: f"Edge type {edge_data_ref.edge_type} from {edge_data_ref} not found in graph metadata." ) + def __patch_preprocessing_specs( + self, + node_data_reference_to_preprocessing_spec: Dict[ + NodeDataReference, NodeDataPreprocessingSpec + ], + edge_data_reference_to_preprocessing_spec: Dict[ + EdgeDataReference, EdgeDataPreprocessingSpec + ], + enumerator_node_type_metadata: List[EnumeratorNodeTypeMetadata], + enumerator_edge_type_metadata: List[EnumeratorEdgeTypeMetadata], + ) -> Tuple[ + Dict[NodeDataReference, NodeDataPreprocessingSpec], + Dict[EdgeDataReference, EdgeDataPreprocessingSpec], + ]: + """ + Patches the preprocessing specs for enumerated node and edge data references. + This is necessary because the enumerated node and edge data references have different identifiers than the original + node and edge data references. We need to update the preprocessing specs to use the enumerated identifiers. + + Args: + enumerator_node_type_metadata: List of enumerated node type metadata. + enumerator_edge_type_metadata: List of enumerated edge type metadata. + Returns: + Tuple of dictionaries mapping enumerated node and edge data references to their preprocessing specs. + """ + + # First, we patch the node data references. + enumerated_node_refs_to_preprocessing_specs: Dict[ + NodeDataReference, NodeDataPreprocessingSpec + ] = {} + + def feature_spec_fn( + feature_spec: FeatureSpecDict, + ) -> Callable[[], FeatureSpecDict]: + # We do this in order to bind the value of feature_spec to the returned function. + # This is a common pattern in Python to create a closure. + def inner() -> FeatureSpecDict: + return feature_spec + + return inner + + for enumerated_node_metadata in enumerator_node_type_metadata: + input_node_preprocessing_spec = node_data_reference_to_preprocessing_spec[ + enumerated_node_metadata.input_node_data_reference + ] + + feature_spec = input_node_preprocessing_spec.feature_spec_fn() + assert ( + input_node_preprocessing_spec.identifier_output in feature_spec + ), f"identifier_output: {input_node_preprocessing_spec.identifier_output} must be in feature_spec: {feature_spec}" + + # We expect the user to give us the actual feature spec for the node id; i.e. it might be string. + # By the end of this function, we will finish enumerated the node id to an integer; thus we update + # the feature spec respectively. + feature_spec[ + input_node_preprocessing_spec.identifier_output + ] = tf.io.FixedLenFeature(shape=[], dtype=DEFAULT_TF_INT_DTYPE) + + enumerated_node_data_preprocessing_spec = NodeDataPreprocessingSpec( + feature_spec_fn=feature_spec_fn(feature_spec), + preprocessing_fn=input_node_preprocessing_spec.preprocessing_fn, + identifier_output=input_node_preprocessing_spec.identifier_output, + pretrained_tft_model_uri=input_node_preprocessing_spec.pretrained_tft_model_uri, + features_outputs=input_node_preprocessing_spec.features_outputs, + labels_outputs=input_node_preprocessing_spec.labels_outputs, + ) + enumerated_node_refs_to_preprocessing_specs[ + enumerated_node_metadata.enumerated_node_data_reference + ] = enumerated_node_data_preprocessing_spec + + # Now we do the same for edges. + enumerated_edge_refs_to_preprocessing_specs: Dict[ + EdgeDataReference, EdgeDataPreprocessingSpec + ] = {} + for enumerated_edge_metadata in enumerator_edge_type_metadata: + input_edge_preprocessing_spec = edge_data_reference_to_preprocessing_spec[ + enumerated_edge_metadata.input_edge_data_reference + ] + + feature_spec = input_edge_preprocessing_spec.feature_spec_fn() + assert ( + input_edge_preprocessing_spec.identifier_output.src_node in feature_spec + ), f"identifier_output: {input_edge_preprocessing_spec.identifier_output.src_node} must be in feature_spec: {feature_spec}" + assert ( + input_edge_preprocessing_spec.identifier_output.dst_node in feature_spec + ), f"identifier_output: {input_edge_preprocessing_spec.identifier_output.dst_node} must be in feature_spec: {feature_spec}" + + # We expect the user to give us the actual feature spec for the node id; i.e. it might be string. + # By the end of this function, we will finish enumerated the node id to an integer; thus we update + # the feature spec respectively. + feature_spec[ + input_edge_preprocessing_spec.identifier_output.src_node + ] = tf.io.FixedLenFeature(shape=[], dtype=DEFAULT_TF_INT_DTYPE) + feature_spec[ + input_edge_preprocessing_spec.identifier_output.dst_node + ] = tf.io.FixedLenFeature(shape=[], dtype=DEFAULT_TF_INT_DTYPE) + + enumerated_edge_data_preprocessing_spec = EdgeDataPreprocessingSpec( + feature_spec_fn=feature_spec_fn(feature_spec), + preprocessing_fn=input_edge_preprocessing_spec.preprocessing_fn, + identifier_output=input_edge_preprocessing_spec.identifier_output, + pretrained_tft_model_uri=input_edge_preprocessing_spec.pretrained_tft_model_uri, + features_outputs=input_edge_preprocessing_spec.features_outputs, + labels_outputs=input_edge_preprocessing_spec.labels_outputs, + ) + enumerated_edge_refs_to_preprocessing_specs[ + enumerated_edge_metadata.enumerated_edge_data_reference + ] = enumerated_edge_data_preprocessing_spec + + return ( + enumerated_node_refs_to_preprocessing_specs, + enumerated_edge_refs_to_preprocessing_specs, + ) + def __run( self, applied_task_identifier: AppliedTaskIdentifier, @@ -661,32 +778,71 @@ def __run( bq_gcp_project = get_resource_config().project logger.info(f"Using implicit GCP project {bq_gcp_project} for BigQuery.") + # Update the node and edge data references to include identifiers. In current configuration setup, + # these identifiers are piped in from the DataPreprocessorConfig. + node_refs_to_specs: Dict[NodeDataReference, NodeDataPreprocessingSpec] = {} + for ( + node_data_reference, + node_data_preprocessing_spec, + ) in self.data_preprocessor_config.get_nodes_preprocessing_spec().items(): + assert isinstance( + node_data_reference, BigqueryNodeDataReference + ), f"Only {BigqueryNodeDataReference.__name__} references are currently supported." + node_data_ref_with_identifier = BigqueryNodeDataReference( + reference_uri=node_data_reference.reference_uri, + node_type=node_data_reference.node_type, + identifier=node_data_preprocessing_spec.identifier_output, + ) + node_refs_to_specs[ + node_data_ref_with_identifier + ] = node_data_preprocessing_spec + + edge_refs_to_specs: Dict[EdgeDataReference, EdgeDataPreprocessingSpec] = {} + for ( + edge_data_reference, + edge_data_preprocessing_spec, + ) in self.data_preprocessor_config.get_edges_preprocessing_spec().items(): + assert isinstance( + edge_data_reference, BigqueryEdgeDataReference + ), f"Only {BigqueryEdgeDataReference.__name__} references are currently supported." + edge_data_ref_with_identifier = BigqueryEdgeDataReference( + reference_uri=edge_data_reference.reference_uri, + edge_type=edge_data_reference.edge_type, + edge_usage_type=edge_data_reference.edge_usage_type, + src_identifier=edge_data_preprocessing_spec.identifier_output.src_node, + dst_identifier=edge_data_preprocessing_spec.identifier_output.dst_node, + ) + edge_refs_to_specs[ + edge_data_ref_with_identifier + ] = edge_data_preprocessing_spec + # Enumerate all graph data. enumerator = Enumerator() enumerator_results: Tuple[ List[EnumeratorNodeTypeMetadata], List[EnumeratorEdgeTypeMetadata] ] = enumerator.run( applied_task_identifier=self.applied_task_identifier, - node_preprocessing_specs=self.data_preprocessor_config.get_nodes_preprocessing_spec(), - edge_preprocessing_specs=self.data_preprocessor_config.get_edges_preprocessing_spec(), + node_data_references=list(node_refs_to_specs.keys()), + edge_data_references=list(edge_refs_to_specs.keys()), gcp_project=bq_gcp_project, ) + ( enumerator_node_type_metadata, enumerator_edge_type_metadata, ) = enumerator_results - enumerated_node_refs_to_preprocessing_specs: Dict[ - NodeDataReference, NodeDataPreprocessingSpec - ] = { - metadata.enumerated_node_data_reference: metadata.enumerated_node_data_preprocessing_spec - for metadata in enumerator_node_type_metadata - } - enumerated_edge_refs_to_preprocessing_specs: Dict[ - EdgeDataReference, EdgeDataPreprocessingSpec - ] = { - metadata.enumerated_edge_data_reference: metadata.enumerated_edge_data_preprocessing_spec - for metadata in enumerator_edge_type_metadata - } + + # Now that we've enumerated all the node and edge data, we need to update + # the preprocessing specs to use the enumerated node and edge data references. + ( + enumerated_node_refs_to_preprocessing_specs, + enumerated_edge_refs_to_preprocessing_specs, + ) = self.__patch_preprocessing_specs( + node_data_reference_to_preprocessing_spec=node_refs_to_specs, + edge_data_reference_to_preprocessing_spec=edge_refs_to_specs, + enumerator_node_type_metadata=enumerator_node_type_metadata, + enumerator_edge_type_metadata=enumerator_edge_type_metadata, + ) # Validating Enumerated Edge Tables that were generated # We perform this check on the enumerated table, meaning that for nodes that exist in the @@ -701,11 +857,15 @@ def __run( component=GiGLComponents.DataPreprocessor ) for enumerated_edge_metadata in enumerator_edge_type_metadata: - edge_preprocessing_spec = ( - enumerated_edge_metadata.enumerated_edge_data_preprocessing_spec + src_node_column_name = ( + enumerated_edge_metadata.enumerated_edge_data_reference.src_identifier + ) + dst_node_column_name = ( + enumerated_edge_metadata.enumerated_edge_data_reference.dst_identifier ) - src_node_column_name = edge_preprocessing_spec.identifier_output.src_node - dst_node_column_name = edge_preprocessing_spec.identifier_output.dst_node + assert (src_node_column_name is not None) and ( + dst_node_column_name is not None + ), f"Missing src/dst dentifiers in enumerated edge data reference: {enumerated_edge_metadata.enumerated_edge_data_reference}" edge_table = ( enumerated_edge_metadata.enumerated_edge_data_reference.reference_uri ) @@ -720,20 +880,18 @@ def __run( if has_dangling_edges: raise ValueError( f""" - ERROR: The enumerated edge table {edge_table} has dangling edges. Meaning that at least one - edge exists where either src_node ({src_node_column_name}) and/or + ERROR: The enumerated edge table {edge_table} has dangling edges. Meaning that at least one + edge exists where either src_node ({src_node_column_name}) and/or dst_node ({dst_node_column_name}) is null. This is usually because of input data having - edges containig nodes which are not present in the input node data. Please look into the + edges containing nodes which are not present in the input node data. Please look into the input data and fix the issue. """ ) # Run Dataflow jobs to transform data references as per DataPreprocessorConfig. - preprocessed_metadata_references: PreprocessedMetadataReferences = ( - self.__preprocess_all_data_references( - node_ref_to_preprocessing_spec=enumerated_node_refs_to_preprocessing_specs, - edge_ref_to_preprocessing_spec=enumerated_edge_refs_to_preprocessing_specs, - ) + preprocessed_metadata_references: PreprocessedMetadataReferences = self.__preprocess_all_data_references( + node_ref_to_preprocessing_spec=enumerated_node_refs_to_preprocessing_specs, + edge_ref_to_preprocessing_spec=enumerated_edge_refs_to_preprocessing_specs, ) logger.info("All preprocessed NODE results:\n") @@ -839,7 +997,6 @@ def run( help="Docker image to use for the worker harness in dataflow", required=False, ) - args = parser.parse_args() ati = AppliedTaskIdentifier(args.job_name) diff --git a/python/gigl/src/data_preprocessor/lib/enumerate/queries.py b/python/gigl/src/data_preprocessor/lib/enumerate/queries.py index 7a25d75..4dca9f2 100644 --- a/python/gigl/src/data_preprocessor/lib/enumerate/queries.py +++ b/python/gigl/src/data_preprocessor/lib/enumerate/queries.py @@ -3,7 +3,7 @@ UNIQUE_NODE_ENUMERATION_QUERY = """ WITH - unique_nodes AS ( + unique_nodes AS ( SELECT DISTINCT {bq_source_table_node_id_col_name} as {original_node_id_field} FROM `{bq_source_table_name}` ) SELECT @@ -49,23 +49,22 @@ WITH unmapped_graph AS ( - SELECT DISTINCT {src_node_id_col}, {dst_node_id_col} FROM `{bq_graph}` + SELECT {src_node_id_col}, {dst_node_id_col} FROM `{bq_graph}` ) -SELECT +SELECT ( - SELECT {enumerated_int_id_field} - FROM `{src_enumerated_node_ids}` + SELECT {enumerated_int_id_field} + FROM `{src_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{src_node_id_col} ) as {src_node_id_col}, ( - SELECT {enumerated_int_id_field} - FROM `{dst_enumerated_node_ids}` + SELECT {enumerated_int_id_field} + FROM `{dst_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{dst_node_id_col} ) as {dst_node_id_col}, FROM unmapped_graph """ -# TODO: (svij-sc) This query should have DISTINCT clause like NO_EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY = """ WITH unmapped_graph AS @@ -77,14 +76,14 @@ FROM `{bq_graph}` ) -SELECT +SELECT ( - SELECT {enumerated_int_id_field} - FROM `{src_enumerated_node_ids}` + SELECT {enumerated_int_id_field} + FROM `{src_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{src_node_id_col} ) as {src_node_id_col}, ( - SELECT {enumerated_int_id_field} + SELECT {enumerated_int_id_field} FROM `{dst_enumerated_node_ids}` WHERE {original_node_id_field} = unmapped_graph.{dst_node_id_col} ) as {dst_node_id_col}, diff --git a/python/gigl/src/data_preprocessor/lib/enumerate/utils.py b/python/gigl/src/data_preprocessor/lib/enumerate/utils.py index eff97a3..7f8a709 100644 --- a/python/gigl/src/data_preprocessor/lib/enumerate/utils.py +++ b/python/gigl/src/data_preprocessor/lib/enumerate/utils.py @@ -2,10 +2,9 @@ import sys import traceback from dataclasses import dataclass -from typing import Dict, List, Tuple +from typing import Dict, List, Sequence, Tuple import google.cloud.bigquery as bigquery -import tensorflow as tf from gigl.common.env_config import get_available_cpus from gigl.common.logger import Logger @@ -24,12 +23,6 @@ EdgeDataReference, NodeDataReference, ) -from gigl.src.data_preprocessor.lib.types import ( - DEFAULT_TF_INT_DTYPE, - EdgeDataPreprocessingSpec, - FeatureSpecDict, - NodeDataPreprocessingSpec, -) logger = Logger() @@ -82,33 +75,30 @@ def get_resource_labels() -> Dict[str, str]: @dataclass class EnumeratorNodeTypeMetadata: input_node_data_reference: NodeDataReference - input_node_data_preprocessing_spec: NodeDataPreprocessingSpec enumerated_node_data_reference: BigqueryNodeDataReference - enumerated_node_data_preprocessing_spec: NodeDataPreprocessingSpec bq_unique_node_ids_enumerated_table_name: str + num_nodes: int def __repr__(self) -> str: return f"""EnumeratorNodeTypeMetadata( input_node_data_reference={self.input_node_data_reference}, - input_node_data_preprocessing_spec={self.input_node_data_preprocessing_spec}, enumerated_node_data_reference={self.enumerated_node_data_reference}, - enumerated_node_data_preprocessing_spec={self.enumerated_node_data_preprocessing_spec}, bq_unique_node_ids_enumerated_table_name={self.bq_unique_node_ids_enumerated_table_name}) + bq_unique_node_ids_enumerated_table_name={self.bq_unique_node_ids_enumerated_table_name}, + num_nodes={self.num_nodes}) """ @dataclass class EnumeratorEdgeTypeMetadata: input_edge_data_reference: EdgeDataReference - input_edge_data_preprocessing_spec: EdgeDataPreprocessingSpec enumerated_edge_data_reference: BigqueryEdgeDataReference - enumerated_edge_data_preprocessing_spec: EdgeDataPreprocessingSpec + num_edges: int def __repr__(self) -> str: return f"""EnumeratorEdgeTypeMetadata( input_edge_data_reference={self.input_edge_data_reference}, - input_edge_data_preprocessing_spec={self.input_edge_data_preprocessing_spec}, enumerated_edge_data_reference={self.enumerated_edge_data_reference}, - enumerated_edge_data_preprocessing_spec={self.enumerated_edge_data_preprocessing_spec}) + num_edges={self.num_edges} """ @@ -121,7 +111,7 @@ def __generate_enumerated_node_id_table_from_src_node_feature_table( bq_source_table_name: str, bq_source_table_node_id_col_name: str, node_type: NodeType, - ) -> str: + ) -> Tuple[str, int]: num_nodes_in_source_table = self.__bq_utils.count_number_of_rows_in_bq_table( bq_table=bq_source_table_name, labels=get_resource_labels() ) @@ -158,12 +148,13 @@ def __generate_enumerated_node_id_table_from_src_node_feature_table( # If they are not, it suggests the input table has multiple rows for same node id. assert num_nodes_in_source_table == num_enumerated_nodes, ( f"Number of input nodes not equal to number of enumerated nodes: ({num_nodes_in_source_table} != {num_enumerated_nodes}). " - f"Check the input table in case of duplicates." + f"This suggests the input table {bq_source_table_name} has multiple rows for the same node, which have been uniquified in " + f"the enumerated node id table {bq_enumerated_node_id_map_table_name}." ) logger.info( f"[Node Type: {node_type}] Finished generating enumerated ids for {num_enumerated_nodes} nodes; mapping written to {bq_enumerated_node_id_map_table_name}." ) - return bq_enumerated_node_id_map_table_name + return bq_enumerated_node_id_map_table_name, num_enumerated_nodes def __generate_enumerated_node_feat_table_using_node_id_map_table( self, @@ -208,95 +199,61 @@ def __generate_enumerated_node_feat_table_using_node_id_map_table( def __enumerate_node_reference( self, node_data_ref: NodeDataReference, - node_data_preprocessing_spec: NodeDataPreprocessingSpec, ) -> EnumeratorNodeTypeMetadata: - feature_spec: FeatureSpecDict = node_data_preprocessing_spec.feature_spec_fn() - assert ( - node_data_preprocessing_spec.identifier_output in feature_spec - ), f"identifier_output: {node_data_preprocessing_spec.identifier_output} must be in feature_spec: {feature_spec}" - - logger.info(f"Read the following feature spec: {feature_spec}") - if not isinstance(node_data_ref, BigqueryNodeDataReference): raise NotImplementedError( f"Enumeration currently only supported for {BigqueryNodeDataReference.__name__}" ) - # raw_identifier_tf_feature_type: tf.DType = raw_feature_spec[ - # node_data_preprocessing_spec.identifier_output - # ].dtype # Will be used in the future; see coment below - # TODO: (svij-sc) Support this use case by dumping data to BQ using a beam pipeline - # Will follow up in PR - - # We expect the user to give us the actual feature spec for the node id; i.e. it might be string. - # By the end of this function, we will finish enumerated the node id to an integer; thus we update - # the feature spec respectively. - feature_spec[node_data_preprocessing_spec.identifier_output] = ( - tf.io.FixedLenFeature(shape=[], dtype=DEFAULT_TF_INT_DTYPE) - ) bq_source_table_name: str = BqUtils.format_bq_path( bq_path=node_data_ref.reference_uri, ) logger.info( - f"[Node Type: {node_data_ref.node_type}]: starting to enumerate node ids from source node table {bq_source_table_name}. The generated table will have the following feature spec: {feature_spec}" - ) - bq_source_table_node_id_col_name: str = str( - node_data_preprocessing_spec.identifier_output - ) - node_type: NodeType = node_data_ref.node_type - bq_unique_node_ids_enumerated_table_name: str = ( - self.__generate_enumerated_node_id_table_from_src_node_feature_table( - bq_source_table_name=bq_source_table_name, - bq_source_table_node_id_col_name=bq_source_table_node_id_col_name, - node_type=node_type, - ) - ) - bq_destination_enumerated_node_features_table_name: str = ( - self.__generate_enumerated_node_feat_table_using_node_id_map_table( - bq_source_table_name=bq_source_table_name, - bq_source_table_node_id_col_name=bq_source_table_node_id_col_name, - bq_enumerated_node_id_map_table_name=bq_unique_node_ids_enumerated_table_name, - node_type=node_type, - ) + f"[Node Type: {node_data_ref.node_type}]: starting to enumerate node ids from source node table {bq_source_table_name}." ) + assert ( + node_data_ref.identifier is not None + ), f"Missing identifier for node data reference: {node_data_ref}. " - enumerated_node_data_preprocessing_spec = NodeDataPreprocessingSpec( - feature_spec_fn=lambda: feature_spec, - preprocessing_fn=node_data_preprocessing_spec.preprocessing_fn, - identifier_output=node_data_preprocessing_spec.identifier_output, - pretrained_tft_model_uri=node_data_preprocessing_spec.pretrained_tft_model_uri, - features_outputs=node_data_preprocessing_spec.features_outputs, - labels_outputs=node_data_preprocessing_spec.labels_outputs, + ( + bq_unique_node_ids_enumerated_table_name, + num_enumerated_nodes, + ) = self.__generate_enumerated_node_id_table_from_src_node_feature_table( + bq_source_table_name=bq_source_table_name, + bq_source_table_node_id_col_name=node_data_ref.identifier, + node_type=node_data_ref.node_type, + ) + bq_destination_enumerated_node_features_table_name: str = self.__generate_enumerated_node_feat_table_using_node_id_map_table( + bq_source_table_name=bq_source_table_name, + bq_source_table_node_id_col_name=node_data_ref.identifier, + bq_enumerated_node_id_map_table_name=bq_unique_node_ids_enumerated_table_name, + node_type=node_data_ref.node_type, ) return EnumeratorNodeTypeMetadata( input_node_data_reference=node_data_ref, - input_node_data_preprocessing_spec=node_data_preprocessing_spec, enumerated_node_data_reference=BigqueryNodeDataReference( reference_uri=bq_destination_enumerated_node_features_table_name, - node_type=node_type, + node_type=node_data_ref.node_type, + identifier=node_data_ref.identifier, ), - enumerated_node_data_preprocessing_spec=enumerated_node_data_preprocessing_spec, bq_unique_node_ids_enumerated_table_name=bq_unique_node_ids_enumerated_table_name, + num_nodes=num_enumerated_nodes, ) def __enumerate_all_node_references( self, - node_preprocessing_specs: Dict[NodeDataReference, NodeDataPreprocessingSpec], + node_data_references: Sequence[NodeDataReference], ) -> List[EnumeratorNodeTypeMetadata]: results: List[EnumeratorNodeTypeMetadata] = [] with concurrent.futures.ThreadPoolExecutor( max_workers=get_available_cpus() ) as executor: futures: List[concurrent.futures.Future] = list() - for ( - node_data_ref, - node_data_preprocessing_spec, - ) in node_preprocessing_specs.items(): + for node_data_ref in node_data_references: future = executor.submit( self.__enumerate_node_reference, node_data_ref=node_data_ref, - node_data_preprocessing_spec=node_data_preprocessing_spec, ) futures.append(future) @@ -308,7 +265,7 @@ def __enumerate_all_node_references( def __enumerate_all_edge_references( self, - edge_preprocessing_specs: Dict[EdgeDataReference, EdgeDataPreprocessingSpec], + edge_data_references: Sequence[EdgeDataReference], map_enumerator_node_type_metadata: Dict[NodeType, EnumeratorNodeTypeMetadata], ) -> List[EnumeratorEdgeTypeMetadata]: results: List[EnumeratorEdgeTypeMetadata] = [] @@ -316,14 +273,10 @@ def __enumerate_all_edge_references( max_workers=get_available_cpus() ) as executor: futures: List[concurrent.futures.Future] = list() - for ( - edge_data_ref, - edge_preprocessing_spec, - ) in edge_preprocessing_specs.items(): + for edge_data_ref in edge_data_references: future = executor.submit( self.__enumerate_edge_reference, edge_data_ref=edge_data_ref, - edge_preprocessing_spec=edge_preprocessing_spec, map_enumerator_node_type_metadata=map_enumerator_node_type_metadata, ) futures.append(future) @@ -343,8 +296,7 @@ def __generate_enumerated_edge_feat_table_using_node_id_map_tables( bq_source_table_dst_node_id_col_name: str, bq_enumerated_src_node_id_map_table_name: str, bq_enumerated_dst_node_id_map_table_name: str, - has_edge_features: bool, - ) -> str: + ) -> Tuple[str, int]: dst_enumerated_edge_features_table_name: str = ( get_enumerated_edge_features_bq_table_name( applied_task_identifier=self.__applied_task_identifier, @@ -352,6 +304,18 @@ def __generate_enumerated_edge_feat_table_using_node_id_map_tables( edge_usage_type=edge_usage_type, ) ) + + num_edges_in_source_table = self.__bq_utils.count_number_of_rows_in_bq_table( + bq_table=bq_source_table_name, labels=get_resource_labels() + ) + + has_edge_features = ( + self.__bq_utils.count_number_of_columns_in_bq_table( + bq_table=bq_source_table_name, + ) + > 2 + ) + graph_edges_enumeration_query = ( enumeration_queries.EDGE_FEATURES_GRAPH_EDGELIST_ENUMERATION_QUERY if has_edge_features @@ -374,23 +338,30 @@ def __generate_enumerated_edge_feat_table_using_node_id_map_tables( destination=dst_enumerated_edge_features_table_name, write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE, ) - return dst_enumerated_edge_features_table_name + + num_edges_in_enumerated_table = ( + self.__bq_utils.count_number_of_rows_in_bq_table( + bq_table=dst_enumerated_edge_features_table_name, + labels=get_resource_labels(), + ) + ) + + # Make sure the number of input edges and output edges are equivalent. + # If they are not, it suggests there were edges which referenced src or dst nodes + # that were not in the source or dest node tables. + assert num_edges_in_source_table == num_edges_in_enumerated_table, ( + f"Number of input edges not equal to number of enumerated edges: ({num_edges_in_source_table} != {num_edges_in_enumerated_table}). " + f"This suggests there were edges in {bq_source_table_name} which referenced src nodes not found in {bq_enumerated_src_node_id_map_table_name} " + f"or dst nodes not found in {bq_enumerated_dst_node_id_map_table_name}." + ) + + return dst_enumerated_edge_features_table_name, num_edges_in_enumerated_table def __enumerate_edge_reference( self, edge_data_ref: EdgeDataReference, - edge_preprocessing_spec: EdgeDataPreprocessingSpec, map_enumerator_node_type_metadata: Dict[NodeType, EnumeratorNodeTypeMetadata], ) -> EnumeratorEdgeTypeMetadata: - feature_spec: FeatureSpecDict = edge_preprocessing_spec.feature_spec_fn() - assert ( - edge_preprocessing_spec.identifier_output.src_node in feature_spec - ), f"identifier_output: {edge_preprocessing_spec.identifier_output.src_node} must be in feature_spec: {feature_spec}" - assert ( - edge_preprocessing_spec.identifier_output.dst_node in feature_spec - ), f"identifier_output: {edge_preprocessing_spec.identifier_output.dst_node} must be in feature_spec: {feature_spec}" - logger.info(f"Read the following feature spec: {feature_spec}") - if not isinstance(edge_data_ref, BigqueryEdgeDataReference): raise NotImplementedError( f"Enumeration currently only supported for {BigqueryEdgeDataReference.__name__}" @@ -398,40 +369,20 @@ def __enumerate_edge_reference( # TODO: (svij-sc) Support this use case by dumping data to BQ using a beam pipeline # Will follow up in PR - # We expect the user to give us the actual feature spec for the node id; i.e. it might be string. - # By the end of this function, we will finish enumerated the node ids for edges to integers; - # thus we update the feature spec respectively. - feature_spec[edge_preprocessing_spec.identifier_output.src_node] = ( - tf.io.FixedLenFeature(shape=[], dtype=DEFAULT_TF_INT_DTYPE) - ) - feature_spec[edge_preprocessing_spec.identifier_output.dst_node] = ( - tf.io.FixedLenFeature(shape=[], dtype=DEFAULT_TF_INT_DTYPE) - ) - bq_source_table_name: str = BqUtils.format_bq_path( bq_path=edge_data_ref.reference_uri, ) - bq_source_table_src_node_id_col_name: str = str( - edge_preprocessing_spec.identifier_output.src_node - ) - bq_source_table_dst_node_id_col_name: str = str( - edge_preprocessing_spec.identifier_output.dst_node - ) logger.info( - f"[Edge Type: {edge_data_ref.edge_type} ; Edge Classification: {edge_data_ref.edge_usage_type}]: starting to enumerate node ids from source edge table {bq_source_table_name}. The generated table will have the following feature spec: {feature_spec}" + f"[Edge Type: {edge_data_ref.edge_type} ; Edge Classification: {edge_data_ref.edge_usage_type}]: starting to enumerate node ids from source edge table {bq_source_table_name}." ) # Get source and destination metadata. - src_node_type, dst_node_type = ( - edge_data_ref.edge_type.src_node_type, - edge_data_ref.edge_type.dst_node_type, - ) src_enumerated_node_type_metadata = map_enumerator_node_type_metadata[ - src_node_type + edge_data_ref.edge_type.src_node_type ] dst_enumerated_node_type_metadata = map_enumerator_node_type_metadata[ - dst_node_type + edge_data_ref.edge_type.dst_node_type ] src_enumerated_node_ids = BqUtils.format_bq_path( @@ -441,67 +392,56 @@ def __enumerate_edge_reference( bq_path=dst_enumerated_node_type_metadata.bq_unique_node_ids_enumerated_table_name ) - has_edge_features: bool = ( - edge_preprocessing_spec.features_outputs is not None - and len(edge_preprocessing_spec.features_outputs) > 0 - ) or ( - edge_preprocessing_spec.labels_outputs is not None - and len(edge_preprocessing_spec.labels_outputs) > 0 - ) - logger.info( f"[Edge Type: {edge_data_ref.edge_type} ; Edge Classification: {edge_data_ref.edge_usage_type}]: Started writing enumerated edges (and features)." ) - bq_enumerated_edge_features_table_name = self.__generate_enumerated_edge_feat_table_using_node_id_map_tables( + assert (edge_data_ref.src_identifier is not None) and ( + edge_data_ref.dst_identifier is not None + ), f"Missing identifiers for edge data reference: {edge_data_ref}. " + ( + bq_enumerated_edge_features_table_name, + num_enumerated_edges, + ) = self.__generate_enumerated_edge_feat_table_using_node_id_map_tables( edge_type=edge_data_ref.edge_type, edge_usage_type=edge_data_ref.edge_usage_type, bq_source_table_name=bq_source_table_name, - bq_source_table_src_node_id_col_name=bq_source_table_src_node_id_col_name, - bq_source_table_dst_node_id_col_name=bq_source_table_dst_node_id_col_name, + bq_source_table_src_node_id_col_name=edge_data_ref.src_identifier, + bq_source_table_dst_node_id_col_name=edge_data_ref.dst_identifier, bq_enumerated_src_node_id_map_table_name=src_enumerated_node_ids, bq_enumerated_dst_node_id_map_table_name=dst_enumerated_node_ids, - has_edge_features=has_edge_features, ) logger.info( f"[Edge Type: {edge_data_ref.edge_type} ; Edge Classification: {edge_data_ref.edge_usage_type}]: Finished writing enumerated edges (and features) to {bq_enumerated_edge_features_table_name}." ) - enumerated_edge_data_preprocessing_spec = EdgeDataPreprocessingSpec( - feature_spec_fn=lambda: feature_spec, - preprocessing_fn=edge_preprocessing_spec.preprocessing_fn, - identifier_output=edge_preprocessing_spec.identifier_output, - pretrained_tft_model_uri=edge_preprocessing_spec.pretrained_tft_model_uri, - features_outputs=edge_preprocessing_spec.features_outputs, - labels_outputs=edge_preprocessing_spec.labels_outputs, - ) - return EnumeratorEdgeTypeMetadata( input_edge_data_reference=edge_data_ref, - input_edge_data_preprocessing_spec=edge_preprocessing_spec, enumerated_edge_data_reference=BigqueryEdgeDataReference( reference_uri=bq_enumerated_edge_features_table_name, edge_type=edge_data_ref.edge_type, edge_usage_type=edge_data_ref.edge_usage_type, + src_identifier=edge_data_ref.src_identifier, + dst_identifier=edge_data_ref.dst_identifier, ), - enumerated_edge_data_preprocessing_spec=enumerated_edge_data_preprocessing_spec, + num_edges=num_enumerated_edges, ) def __run( self, applied_task_identifier: AppliedTaskIdentifier, - node_preprocessing_specs: Dict[NodeDataReference, NodeDataPreprocessingSpec], - edge_preprocessing_specs: Dict[EdgeDataReference, EdgeDataPreprocessingSpec], + node_data_references: Sequence[NodeDataReference], + edge_data_references: Sequence[EdgeDataReference], gcp_project: str, ) -> Tuple[List[EnumeratorNodeTypeMetadata], List[EnumeratorEdgeTypeMetadata]]: self.__bq_utils = BqUtils(project=gcp_project) self.__applied_task_identifier = applied_task_identifier - enumerated_node_metadata: List[EnumeratorNodeTypeMetadata] = ( - self.__enumerate_all_node_references( - node_preprocessing_specs=node_preprocessing_specs - ) + enumerated_node_metadata: List[ + EnumeratorNodeTypeMetadata + ] = self.__enumerate_all_node_references( + node_data_references=node_data_references ) map_enumerator_node_type_metadata: Dict[ NodeType, EnumeratorNodeTypeMetadata @@ -509,11 +449,11 @@ def __run( node_metadata.input_node_data_reference.node_type: node_metadata for node_metadata in enumerated_node_metadata } - enumerated_edge_metadata: List[EnumeratorEdgeTypeMetadata] = ( - self.__enumerate_all_edge_references( - edge_preprocessing_specs=edge_preprocessing_specs, - map_enumerator_node_type_metadata=map_enumerator_node_type_metadata, - ) + enumerated_edge_metadata: List[ + EnumeratorEdgeTypeMetadata + ] = self.__enumerate_all_edge_references( + edge_data_references=edge_data_references, + map_enumerator_node_type_metadata=map_enumerator_node_type_metadata, ) logger.info("Finished enumerating all node and edge references.") @@ -529,15 +469,15 @@ def __run( def run( self, applied_task_identifier: AppliedTaskIdentifier, - node_preprocessing_specs: Dict[NodeDataReference, NodeDataPreprocessingSpec], - edge_preprocessing_specs: Dict[EdgeDataReference, EdgeDataPreprocessingSpec], + node_data_references: Sequence[NodeDataReference], + edge_data_references: Sequence[EdgeDataReference], gcp_project: str, ) -> Tuple[List[EnumeratorNodeTypeMetadata], List[EnumeratorEdgeTypeMetadata]]: try: return self.__run( applied_task_identifier=applied_task_identifier, - node_preprocessing_specs=node_preprocessing_specs, - edge_preprocessing_specs=edge_preprocessing_specs, + node_data_references=node_data_references, + edge_data_references=edge_data_references, gcp_project=gcp_project, ) except Exception as e: diff --git a/python/gigl/src/data_preprocessor/lib/ingest/bigquery.py b/python/gigl/src/data_preprocessor/lib/ingest/bigquery.py index 5a3c6a5..46ec168 100644 --- a/python/gigl/src/data_preprocessor/lib/ingest/bigquery.py +++ b/python/gigl/src/data_preprocessor/lib/ingest/bigquery.py @@ -32,7 +32,7 @@ def yield_instance_dict_ptransform(self, *args, **kwargs) -> InstanceDictPTransf return _get_bigquery_ptransform(table_name=self.reference_uri, *args, **kwargs) # type: ignore def __repr__(self) -> str: - return f"BigqueryNodeDataReference(node_type={self.node_type}, reference_uri={self.reference_uri})" + return f"BigqueryNodeDataReference(node_type={self.node_type}, identifier={self.identifier}, reference_uri={self.reference_uri})" class BigqueryEdgeDataReference(EdgeDataReference): @@ -40,4 +40,4 @@ def yield_instance_dict_ptransform(self, *args, **kwargs) -> InstanceDictPTransf return _get_bigquery_ptransform(table_name=self.reference_uri, *args, **kwargs) # type: ignore def __repr__(self) -> str: - return f"BigqueryEdgeDataReference(edge_type={self.edge_type}, reference_uri={self.reference_uri})" + return f"BigqueryEdgeDataReference(edge_type={self.edge_type}, src_identifier={self.src_identifier}, dst_identifier={self.dst_identifier}, reference_uri={self.reference_uri})" diff --git a/python/gigl/src/data_preprocessor/lib/ingest/reference.py b/python/gigl/src/data_preprocessor/lib/ingest/reference.py index 628b243..445ea18 100644 --- a/python/gigl/src/data_preprocessor/lib/ingest/reference.py +++ b/python/gigl/src/data_preprocessor/lib/ingest/reference.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass +from typing import Optional from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType from gigl.src.data_preprocessor.lib.types import InstanceDictPTransform @@ -43,9 +44,10 @@ class NodeDataReference(DataReference, ABC): """ node_type: NodeType + identifier: Optional[str] = None def __repr__(self) -> str: - return f"NodeDataReference(node_type={self.node_type}, reference_uri={self.reference_uri})" + return f"NodeDataReference(node_type={self.node_type}, identifier={self.identifier}, reference_uri={self.reference_uri})" @dataclass(frozen=True) # type: ignore @@ -56,6 +58,8 @@ class EdgeDataReference(DataReference, ABC): edge_type: EdgeType edge_usage_type: EdgeUsageType = EdgeUsageType.MAIN + src_identifier: Optional[str] = None + dst_identifier: Optional[str] = None def __repr__(self) -> str: - return f"EdgeDataReference(edge_type={self.edge_type}, reference_uri={self.reference_uri})" + return f"EdgeDataReference(edge_type={self.edge_type}, src_identifier={self.src_identifier}, dst_identifier={self.dst_identifier}, reference_uri={self.reference_uri})" diff --git a/python/gigl/src/data_preprocessor/lib/transform/tf_value_encoder.py b/python/gigl/src/data_preprocessor/lib/transform/tf_value_encoder.py index e4e29ae..7ef0d6a 100644 --- a/python/gigl/src/data_preprocessor/lib/transform/tf_value_encoder.py +++ b/python/gigl/src/data_preprocessor/lib/transform/tf_value_encoder.py @@ -5,7 +5,7 @@ class TFValueEncoder: @staticmethod - def __get_value_to_impute(dtype: tf.dtypes.DType) -> Union[int, str, float]: + def get_value_to_impute(dtype: tf.dtypes.DType) -> Union[int, str, float]: """ Returns the default value to use for a missing field. :param dtype: @@ -70,7 +70,7 @@ def encode_value_as_feature(value: Any, dtype: tf.dtypes.DType) -> tf.train.Feat """ # prepare value if value is None: - value = TFValueEncoder.__get_value_to_impute(dtype=dtype) + value = TFValueEncoder.get_value_to_impute(dtype=dtype) if not isinstance(value, list): value = [value] diff --git a/python/gigl/src/data_preprocessor/lib/transform/transformed_features_info.py b/python/gigl/src/data_preprocessor/lib/transform/transformed_features_info.py index 349d79b..4523ef4 100644 --- a/python/gigl/src/data_preprocessor/lib/transform/transformed_features_info.py +++ b/python/gigl/src/data_preprocessor/lib/transform/transformed_features_info.py @@ -25,9 +25,9 @@ class TransformedFeaturesInfo: transformed_features_schema_path: GcsUri transform_directory_path: GcsUri dataflow_console_uri: Optional[HttpUri] = None - identifier_output: Optional[Union[NodeOutputIdentifier, EdgeOutputIdentifier]] = ( - None - ) + identifier_output: Optional[ + Union[NodeOutputIdentifier, EdgeOutputIdentifier] + ] = None features_outputs: Optional[List[str]] = None label_outputs: Optional[List[str]] = None feature_dim_output: Optional[int] = None diff --git a/python/gigl/src/data_preprocessor/lib/transform/utils.py b/python/gigl/src/data_preprocessor/lib/transform/utils.py index 2930469..f4e8e52 100644 --- a/python/gigl/src/data_preprocessor/lib/transform/utils.py +++ b/python/gigl/src/data_preprocessor/lib/transform/utils.py @@ -2,13 +2,13 @@ import apache_beam as beam import pyarrow as pa -import tensorflow as tf import tensorflow_data_validation as tfdv -import tensorflow_transform as tft +import tensorflow_transform import tfx_bsl from apache_beam.pvalue import PBegin, PCollection, PDone from tensorflow_metadata.proto.v0 import schema_pb2, statistics_pb2 from tensorflow_transform import beam as tft_beam +from tensorflow_transform.tf_metadata import schema_utils from tfx_bsl.tfxio.record_based_tfxio import RecordBasedTFXIO from gigl.common import GcsUri, LocalUri, Uri @@ -48,24 +48,38 @@ class InstanceDictToTFExample(beam.DoFn): See https://www.tensorflow.org/tfx/transform/get_started#the_tfxio_format. """ - def __init__(self, feature_spec: FeatureSpecDict): + def __init__( + self, + feature_spec: FeatureSpecDict, + schema: schema_pb2.Schema, + ): self.feature_spec = feature_spec + self.schema = schema + self._coder: Optional[tensorflow_transform.coders.ExampleProtoCoder] = None def process(self, element: InstanceDict) -> Iterable[bytes]: - # Each row is a single instance dict from the original tabular input (BQ, GCS, etc.) - example = dict() - for key in self.feature_spec.keys(): - # prepare each value associated with a key that appears in the feature_spec. - # only the instance dict keys the user specifies wanting in the feature_spec will pass through here - value = element[key] - if value is None: - logger.debug(f"Found key {key} with missing value in sample {element}") - example[key] = TFValueEncoder.encode_value_as_feature( - value=value, dtype=self.feature_spec[key].dtype + # This coder is sensitive to environment (e.g., proto library version), and thus + # it is recommended to instantiate the coder at pipeline execution time (i.e., + # in process function) instead of at pipeline construction time (i.e., in __init__) + if not self._coder: + self._coder = tensorflow_transform.coders.ExampleProtoCoder(self.schema) + + # Each element is a single row from the original tabular input (BQ, GCS, etc.) + # Only features in the user specified feature_spec are extracted from element. + # Imputation is applied when feature value is NULL. + parsed_and_imputed_element = { + feature_name: ( + element[feature_name] + # If feature_name does not exist as a column in the original table, a + # KeyError should raise to warn the user. Therefore, we do not use + # element.get() here. + if element[feature_name] is not None + else TFValueEncoder.get_value_to_impute(dtype=spec.dtype) ) - example_proto = tf.train.Example(features=tf.train.Features(feature=example)) - serialized_proto = example_proto.SerializeToString() - return [serialized_proto] + for feature_name, spec in self.feature_spec.items() + } + + yield self._coder.encode(parsed_and_imputed_element) class IngestRawFeatures(beam.PTransform): @@ -74,10 +88,12 @@ def __init__( self, data_reference: DataReference, feature_spec: FeatureSpecDict, + schema: schema_pb2.Schema, beam_record_tfxio: RecordBasedTFXIO, ): self.data_reference = data_reference self.feature_spec = feature_spec + self.schema = schema self.beam_record_tfxio = beam_record_tfxio def expand(self, pbegin: PBegin) -> PCollection[pa.RecordBatch]: @@ -91,7 +107,11 @@ def expand(self, pbegin: PBegin) -> PCollection[pa.RecordBatch]: | "Parse raw tabular features into instance dicts." >> self.data_reference.yield_instance_dict_ptransform() | "Serialize instance dicts to transformed TFExamples" - >> beam.ParDo(InstanceDictToTFExample(feature_spec=self.feature_spec)) + >> beam.ParDo( + InstanceDictToTFExample( + feature_spec=self.feature_spec, schema=self.schema + ) + ) | "Transformed TFExamples to RecordBatches with TFXIO" >> self.beam_record_tfxio.BeamSource() ) @@ -249,8 +269,8 @@ def get_load_data_and_transform_pipeline_component( use_deep_copy_optimization=False, ): raw_feature_spec = preprocessing_spec.feature_spec_fn() - raw_data_schema: schema_pb2.Schema = ( - tft.tf_metadata.schema_utils.schema_from_feature_spec(raw_feature_spec) + raw_data_schema: schema_pb2.Schema = schema_utils.schema_from_feature_spec( + raw_feature_spec ) beam_record_tfxio = tfx_bsl.tfxio.tf_example_record.TFExampleBeamRecord( @@ -263,6 +283,7 @@ def get_load_data_and_transform_pipeline_component( raw_features = p | IngestRawFeatures( data_reference=data_reference, feature_spec=raw_feature_spec, + schema=raw_data_schema, beam_record_tfxio=beam_record_tfxio, ) @@ -307,20 +328,35 @@ def get_load_data_and_transform_pipeline_component( transformed_features_info.transform_directory_path.uri ) - # Write out transformed features. - # This transformed_metadata can only be relied on for encoding purposes - # reusing a pretrained transform_fn. It can sometimes produce inaccurate - # metadata for transformed features when building a new transform_fn, - # hence we opt to use the deferred_metadata instead. + # Apply TransformFn over raw features transformed_features, transformed_metadata = ( (raw_features, raw_tensor_adapter_config), resolved_transform_fn, - ) | "Transform raw features dataset" >> tft_beam.TransformDataset() + ) | "Transform raw features dataset" >> tft_beam.TransformDataset( + output_record_batches=True + ) + + # The transformed_features returned by tft_beam.TransformDataset is a + # PCollection of Tuple[pa.RecordBatch, Dict[str, pa.Array]]. The first + # one are the transformed features. The second one are the passthrough + # features, which doesn't apply here since we do not specify passthrough_keys + # in tft_beam.Context. Hence we drop the second one in the tuple. + transformed_features = transformed_features | "Extract RecordBatch" >> beam.Map( + lambda element: element[0] + ) + + # The transformed_metadata returned by tft_beam.TransformDataset can only + # be relied on for encoding purposes when reusing a pretrained transform_fn, + # yet it could be inaccurate when using a new transform_fn built by + # tft_beam.AnalyzeDataset. For the later case, we do not use transformed_metadata + # returned by tft_beam.TransformDataset, but use deferred_metadata from + # transform_fn instead. resolved_transformed_metadata = ( transformed_metadata if should_use_existing_transform_fn else beam.pvalue.AsSingleton(analyzed_transform_fn[1].deferred_metadata) # type: ignore ) + transformed_features | "Write tf record files" >> BetterWriteToTFRecord( file_path_prefix=transformed_features_info.transformed_features_file_prefix.uri, max_bytes_per_shard=int(2e8), # 200mb, diff --git a/python/gigl/src/inference/inferencer.py b/python/gigl/src/inference/inferencer.py index 1f6a7c3..0590e1f 100644 --- a/python/gigl/src/inference/inferencer.py +++ b/python/gigl/src/inference/inferencer.py @@ -56,11 +56,10 @@ def run( cuda_docker_uri=cuda_docker_uri, ) else: - inferencer_v1 = InferencerV1() + inferencer_v1 = InferencerV1(bq_gcp_project=resource_config_wrapper.project) inferencer_v1.run( applied_task_identifier=applied_task_identifier, task_config_uri=task_config_uri, - resource_config_uri=resource_config_uri, custom_worker_image_uri=custom_worker_image_uri, ) @@ -71,16 +70,19 @@ def run( "--job_name", type=str, help="Unique identifier for the job name", + required=True, ) parser.add_argument( "--task_config_uri", type=str, help="Gbml config uri", + required=True, ) parser.add_argument( "--resource_config_uri", type=str, help="Runtime argument for resource and env specifications of each component", + required=True, ) parser.add_argument( "--custom_worker_image_uri", @@ -101,7 +103,6 @@ def run( help="User Specified or KFP compiled Docker Image for GPU inference", required=False, ) - args = parser.parse_args() task_config_uri = UriFactory.create_uri(args.task_config_uri) diff --git a/python/gigl/src/inference/lib/assets.py b/python/gigl/src/inference/lib/assets.py index 6caa28c..bcf545e 100644 --- a/python/gigl/src/inference/lib/assets.py +++ b/python/gigl/src/inference/lib/assets.py @@ -106,7 +106,7 @@ def prepare_staging_paths( logger.info("Staging paths for Inferencer prepared.") @staticmethod - def _get_gcs_asset_write_path_prefix( + def get_gcs_asset_write_path_prefix( applied_task_identifier: AppliedTaskIdentifier, bq_table_path: str ) -> GcsUri: """ @@ -174,7 +174,7 @@ def _delete_temp_gcs_files( for bq_table_path in active_bq_table_paths: table_gcs_write_path_uri: GcsUri = ( - InferenceAssets._get_gcs_asset_write_path_prefix( + InferenceAssets.get_gcs_asset_write_path_prefix( applied_task_identifier=applied_task_identifier, bq_table_path=bq_table_path, ) diff --git a/python/gigl/src/inference/v1/gnn_inferencer.py b/python/gigl/src/inference/v1/gnn_inferencer.py index a82ad5b..24390fa 100644 --- a/python/gigl/src/inference/v1/gnn_inferencer.py +++ b/python/gigl/src/inference/v1/gnn_inferencer.py @@ -18,6 +18,7 @@ from gigl.common.logger import Logger from gigl.common.metrics.decorators import flushes_metrics, profileit from gigl.common.utils import os_utils +from gigl.env.pipelines_config import get_resource_config from gigl.src.common.constants.metrics import TIMER_INFERENCER_S from gigl.src.common.graph_builder.graph_builder_factory import GraphBuilderFactory from gigl.src.common.types import AppliedTaskIdentifier @@ -180,19 +181,17 @@ def __infer_single_node_type( temp_predictions_gcs_path: Optional[GcsUri] temp_embeddings_gcs_path: Optional[GcsUri] if should_persist_predictions: - temp_predictions_gcs_path = ( - InferenceAssets._get_gcs_asset_write_path_prefix( - applied_task_identifier=applied_task_identifier, - bq_table_path=node_type_to_inferencer_output_info_map[ - node_type - ].predictions_path, - ) + temp_predictions_gcs_path = InferenceAssets.get_gcs_asset_write_path_prefix( + applied_task_identifier=applied_task_identifier, + bq_table_path=node_type_to_inferencer_output_info_map[ + node_type + ].predictions_path, ) else: temp_predictions_gcs_path = None if should_persist_embeddings: - temp_embeddings_gcs_path = InferenceAssets._get_gcs_asset_write_path_prefix( + temp_embeddings_gcs_path = InferenceAssets.get_gcs_asset_write_path_prefix( applied_task_identifier=applied_task_identifier, bq_table_path=node_type_to_inferencer_output_info_map[ node_type @@ -295,9 +294,9 @@ def __run( node_type = futures[future] try: inferencer_output_paths: InferencerOutputPaths = future.result() - node_type_to_inferencer_output_paths_map[node_type] = ( - inferencer_output_paths - ) + node_type_to_inferencer_output_paths_map[ + node_type + ] = inferencer_output_paths except Exception as e: logger.exception( f"{node_type} inferencer job failed due to a raised exception: {e}" @@ -346,7 +345,6 @@ def run( self, applied_task_identifier: AppliedTaskIdentifier, task_config_uri: Uri, - resource_config_uri: Uri, custom_worker_image_uri: Optional[str] = None, ): try: @@ -364,8 +362,8 @@ def run( logger.error(traceback.format_exc()) sys.exit(f"System will now exit: {e}") - def __init__(self): - self.__bq_utils = BqUtils() + def __init__(self, bq_gcp_project: str): + self.__bq_utils = BqUtils(project=bq_gcp_project if bq_gcp_project else None) if __name__ == "__main__": @@ -374,16 +372,19 @@ def __init__(self): "--job_name", type=str, help="Unique identifier for the job name", + required=True, ) parser.add_argument( "--task_config_uri", type=str, help="Gbml config uri", + required=True, ) parser.add_argument( "--resource_config_uri", type=str, help="Runtime argument for resource and env specifications of each component", + required=True, ) parser.add_argument( "--custom_worker_image_uri", @@ -404,7 +405,6 @@ def __init__(self): help="User Specified or KFP compiled Docker Image for GPU inference", required=False, ) - args = parser.parse_args() task_config_uri = UriFactory.create_uri(args.task_config_uri) @@ -414,10 +414,9 @@ def __init__(self): initialize_metrics(task_config_uri=task_config_uri, service_name=args.job_name) applied_task_identifier = AppliedTaskIdentifier(args.job_name) - inferencer = InferencerV1() + inferencer = InferencerV1(bq_gcp_project=get_resource_config().project) inferencer.run( applied_task_identifier=applied_task_identifier, task_config_uri=task_config_uri, - resource_config_uri=resource_config_uri, custom_worker_image_uri=custom_worker_image_uri, ) diff --git a/python/gigl/src/inference/v2/glt_inferencer.py b/python/gigl/src/inference/v2/glt_inferencer.py index 9c0616a..fc8e920 100644 --- a/python/gigl/src/inference/v2/glt_inferencer.py +++ b/python/gigl/src/inference/v2/glt_inferencer.py @@ -115,11 +115,9 @@ def __execute_VAI_inference( labels=resource_config_wrapper.get_resource_labels( component=GiGLComponents.Inferencer ), - timeout_s=( - inferencer_resource_config.timeout - if inferencer_resource_config.timeout - else None - ), + timeout_s=inferencer_resource_config.timeout + if inferencer_resource_config.timeout + else None, ) vertex_ai_service = VertexAIService( project=resource_config_wrapper.project, @@ -127,7 +125,7 @@ def __execute_VAI_inference( service_account=resource_config_wrapper.service_account_email, staging_bucket=resource_config_wrapper.temp_assets_regional_bucket_path.uri, ) - vertex_ai_service.run(job_config=job_config) + vertex_ai_service.launch_job(job_config=job_config) def run( self, diff --git a/python/gigl/src/mocking/dataset_asset_mocker.py b/python/gigl/src/mocking/dataset_asset_mocker.py index fa57d6e..b69d517 100644 --- a/python/gigl/src/mocking/dataset_asset_mocker.py +++ b/python/gigl/src/mocking/dataset_asset_mocker.py @@ -1,6 +1,6 @@ from typing import Optional -import gigl.src.common.constants.test_assets as test_asset_constants +import gigl.src.mocking.lib.constants as mocking_constants from gigl.common import GcsUri from gigl.common.logger import Logger from gigl.common.utils.gcs import GcsUtils @@ -68,12 +68,12 @@ def _update_supervised_node_classification_config_paths( pb.shared_config.flattened_graph_metadata.supervised_node_classification_output ) task_output.labeled_tfrecord_uri_prefix = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_output.labeled_tfrecord_uri_prefix, version=self._version ) ) task_output.unlabeled_tfrecord_uri_prefix = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_output.unlabeled_tfrecord_uri_prefix, version=self._version ) ) @@ -82,17 +82,17 @@ def _update_supervised_node_classification_config_paths( pb.shared_config.dataset_metadata.supervised_node_classification_dataset ) task_dataset.train_data_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_dataset.train_data_uri, version=self._version ) ) task_dataset.val_data_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_dataset.val_data_uri, version=self._version ) ) task_dataset.test_data_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_dataset.test_data_uri, version=self._version ) ) @@ -101,21 +101,21 @@ def _update_supervised_node_classification_config_paths( pb.shared_config.inference_metadata.node_type_to_inferencer_output_info_map ) for node_type in node_type_to_inferencer_output_info_map: - node_type_to_inferencer_output_info_map[node_type].predictions_path = ( - test_asset_constants.update_bq_table_with_test_assets_and_version( - bq_table=node_type_to_inferencer_output_info_map[ - node_type - ].predictions_path, - version=self._version, - ) + node_type_to_inferencer_output_info_map[ + node_type + ].predictions_path = mocking_constants.update_bq_table_with_test_assets_and_version( + bq_table=node_type_to_inferencer_output_info_map[ + node_type + ].predictions_path, + version=self._version, ) - node_type_to_inferencer_output_info_map[node_type].embeddings_path = ( - test_asset_constants.update_bq_table_with_test_assets_and_version( - bq_table=node_type_to_inferencer_output_info_map[ - node_type - ].embeddings_path, - version=self._version, - ) + node_type_to_inferencer_output_info_map[ + node_type + ].embeddings_path = mocking_constants.update_bq_table_with_test_assets_and_version( + bq_table=node_type_to_inferencer_output_info_map[ + node_type + ].embeddings_path, + version=self._version, ) def _update_node_anchor_based_link_prediction_config_paths( @@ -156,7 +156,7 @@ def _update_node_anchor_based_link_prediction_config_paths( pb.shared_config.flattened_graph_metadata.node_anchor_based_link_prediction_output ) task_output.tfrecord_uri_prefix = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_output.tfrecord_uri_prefix, version=self._version ) ) @@ -164,26 +164,26 @@ def _update_node_anchor_based_link_prediction_config_paths( node_type, random_negative_tfrecord_uri_prefix, ) in task_output.node_type_to_random_negative_tfrecord_uri_prefix.items(): - task_output.node_type_to_random_negative_tfrecord_uri_prefix[node_type] = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( - uri_str=random_negative_tfrecord_uri_prefix, version=self._version - ) + task_output.node_type_to_random_negative_tfrecord_uri_prefix[ + node_type + ] = mocking_constants.update_gcs_uri_with_test_assets_and_version( + uri_str=random_negative_tfrecord_uri_prefix, version=self._version ) task_dataset = ( pb.shared_config.dataset_metadata.node_anchor_based_link_prediction_dataset ) task_dataset.train_main_data_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_dataset.train_main_data_uri, version=self._version ) ) task_dataset.test_main_data_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_dataset.test_main_data_uri, version=self._version ) ) task_dataset.val_main_data_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=task_dataset.val_main_data_uri, version=self._version ) ) @@ -191,37 +191,37 @@ def _update_node_anchor_based_link_prediction_config_paths( node_type, random_negative_tfrecord_uri_prefix, ) in task_dataset.train_node_type_to_random_negative_data_uri.items(): - task_dataset.train_node_type_to_random_negative_data_uri[node_type] = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( - uri_str=random_negative_tfrecord_uri_prefix, version=self._version - ) + task_dataset.train_node_type_to_random_negative_data_uri[ + node_type + ] = mocking_constants.update_gcs_uri_with_test_assets_and_version( + uri_str=random_negative_tfrecord_uri_prefix, version=self._version ) for ( node_type, random_negative_tfrecord_uri_prefix, ) in task_dataset.val_node_type_to_random_negative_data_uri.items(): - task_dataset.val_node_type_to_random_negative_data_uri[node_type] = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( - uri_str=random_negative_tfrecord_uri_prefix, version=self._version - ) + task_dataset.val_node_type_to_random_negative_data_uri[ + node_type + ] = mocking_constants.update_gcs_uri_with_test_assets_and_version( + uri_str=random_negative_tfrecord_uri_prefix, version=self._version ) for ( node_type, random_negative_tfrecord_uri_prefix, ) in task_dataset.test_node_type_to_random_negative_data_uri.items(): - task_dataset.test_node_type_to_random_negative_data_uri[node_type] = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( - uri_str=random_negative_tfrecord_uri_prefix, version=self._version - ) + task_dataset.test_node_type_to_random_negative_data_uri[ + node_type + ] = mocking_constants.update_gcs_uri_with_test_assets_and_version( + uri_str=random_negative_tfrecord_uri_prefix, version=self._version ) inference_metadata = pb.shared_config.inference_metadata for node_type in inference_metadata.node_type_to_inferencer_output_info_map: inference_metadata.node_type_to_inferencer_output_info_map[ node_type - ].embeddings_path = test_asset_constants.update_bq_table_with_test_assets_and_version( + ].embeddings_path = mocking_constants.update_bq_table_with_test_assets_and_version( bq_table=inference_metadata.node_type_to_inferencer_output_info_map[ node_type ].embeddings_path, @@ -247,7 +247,7 @@ def _prepare_frozen_gbml_config_shared( ) frozen_gbml_config_pb.shared_config.preprocessed_metadata_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=frozen_gbml_config_pb.shared_config.preprocessed_metadata_uri, version=self._version, ) @@ -256,22 +256,22 @@ def _prepare_frozen_gbml_config_shared( frozen_gbml_config_pb.shared_config.trained_model_metadata ) trained_model_metadata.trained_model_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=trained_model_metadata.trained_model_uri, version=self._version ) ) trained_model_metadata.scripted_model_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=trained_model_metadata.scripted_model_uri, version=self._version ) ) trained_model_metadata.eval_metrics_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=trained_model_metadata.eval_metrics_uri, version=self._version ) ) trained_model_metadata.tensorboard_logs_uri = ( - test_asset_constants.update_gcs_uri_with_test_assets_and_version( + mocking_constants.update_gcs_uri_with_test_assets_and_version( uri_str=trained_model_metadata.tensorboard_logs_uri, version=self._version, ) @@ -286,7 +286,7 @@ def _populate_and_write_frozen_gbml_config( logger.info(self._frozen_gbml_config_pb) frozen_gbml_config_gcs_uri = ( - test_asset_constants.get_example_task_frozen_gbml_config_gcs_path( + mocking_constants.get_example_task_frozen_gbml_config_gcs_path( task_name=self._mocked_dataset_info.name, version=self._version ) ) @@ -435,11 +435,11 @@ def _mock_node_anchor_based_link_prediction_assets(self): def _prepare_env(self): bq_utils = BqUtils() bq_utils.create_bq_dataset( - dataset_id=test_asset_constants.EXAMPLE_TASK_ASSETS_BQ_PATH, exists_ok=True + dataset_id=mocking_constants.MOCK_DATA_BQ_DATASET_NAME, exists_ok=True ) gcs_utils = GcsUtils() gcs_utils.delete_files_in_bucket_dir( - gcs_path=test_asset_constants.get_example_task_static_assets_gcs_dir( + gcs_path=mocking_constants.get_example_task_static_assets_gcs_dir( task_name=self._mocked_dataset_info.name, version=self._version ) ) @@ -468,7 +468,7 @@ def mock_assets(self, mocked_dataset_info: MockedDatasetInfo) -> GcsUri: raise NotImplementedError frozen_gbml_config_uri = ( - test_asset_constants.get_example_task_frozen_gbml_config_gcs_path( + mocking_constants.get_example_task_frozen_gbml_config_gcs_path( task_name=self._mocked_dataset_info.name, version=self._version ) ) diff --git a/python/gigl/src/mocking/lib/constants.py b/python/gigl/src/mocking/lib/constants.py index b0f726e..cb18862 100644 --- a/python/gigl/src/mocking/lib/constants.py +++ b/python/gigl/src/mocking/lib/constants.py @@ -5,8 +5,8 @@ from gigl.src.common.utils.bq import BqUtils MOCK_DATA_GCS_BUCKET = GcsUri(f"gs://{dep_constants.GIGL_PUBLIC_BUCKET_NAME}/") +MOCK_DATA_BQ_DATASET_NAME = dep_constants.GIGL_PUBLIC_DATASET_NAME EXAMPLE_TASK_ASSETS_GCS_PATH = GcsUri.join(MOCK_DATA_GCS_BUCKET, "mocked_assets") -EXAMPLE_TASK_ASSETS_BQ_PATH = "external-snap-ci-github-gigl.gbml_mocked_assets" MOCKED_DATASET_ARTIFACT_METADATA_LOCAL_PATH = LocalUri.join( get_gigl_root_directory(), "src", @@ -35,7 +35,7 @@ def update_gcs_uri_with_test_assets_and_version(uri_str: str, version: str) -> s def update_bq_table_with_test_assets_and_version(bq_table: str, version: str) -> str: table_name = bq_table.split(".")[-1] replaced_table_name = f"{table_name}_{version}" - replaced_bq_table = f"{EXAMPLE_TASK_ASSETS_BQ_PATH}.{replaced_table_name}" + replaced_bq_table = f"{MOCK_DATA_BQ_DATASET_NAME}.{replaced_table_name}" return replaced_bq_table @@ -44,7 +44,7 @@ def get_example_task_nodes_bq_table_path( task_name: str, version: str, node_type: NodeType ) -> str: table_path = BqUtils.join_path( - EXAMPLE_TASK_ASSETS_BQ_PATH, f"{task_name}_{str(node_type)}_nodes_{version}" + MOCK_DATA_BQ_DATASET_NAME, f"{task_name}_{str(node_type)}_nodes_{version}" ) return table_path @@ -56,7 +56,7 @@ def get_example_task_edges_bq_table_path( edge_usage_type: EdgeUsageType, ) -> str: table_path = BqUtils.join_path( - EXAMPLE_TASK_ASSETS_BQ_PATH, + MOCK_DATA_BQ_DATASET_NAME, f"{task_name}_{str(edge_type)}_edges_{str(edge_usage_type)}_{version}", ) return table_path diff --git a/python/gigl/src/mocking/lib/mock_input_for_data_preprocessor.py b/python/gigl/src/mocking/lib/mock_input_for_data_preprocessor.py index c5bc0d5..c1a01bb 100644 --- a/python/gigl/src/mocking/lib/mock_input_for_data_preprocessor.py +++ b/python/gigl/src/mocking/lib/mock_input_for_data_preprocessor.py @@ -7,12 +7,12 @@ from gigl.common import UriFactory from gigl.common.logger import Logger -from gigl.src.common.constants.test_assets import ( +from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType +from gigl.src.common.utils.bq import BqUtils +from gigl.src.mocking.lib.constants import ( get_example_task_edges_bq_table_path, get_example_task_nodes_bq_table_path, ) -from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType -from gigl.src.common.utils.bq import BqUtils from gigl.src.mocking.lib.feature_handling import get_feature_field_name from gigl.src.mocking.lib.mocked_dataset_resources import MockedDatasetInfo diff --git a/python/gigl/src/mocking/lib/mock_input_for_split_generator.py b/python/gigl/src/mocking/lib/mock_input_for_split_generator.py index f33ac3e..920596f 100644 --- a/python/gigl/src/mocking/lib/mock_input_for_split_generator.py +++ b/python/gigl/src/mocking/lib/mock_input_for_split_generator.py @@ -22,12 +22,12 @@ def build_and_write_supervised_node_classification_subgraph_samples_from_mocked_ mocked_dataset_info=mocked_dataset_info ) - samples: List[training_samples_schema_pb2.SupervisedNodeClassificationSample] = ( - pyg_to_training_samples.build_supervised_node_classification_samples_from_pyg_heterodata( - hetero_data=hetero_data, - root_node_type=root_node_type, - graph_metadata_pb_wrapper=mocked_dataset_info.graph_metadata_pb_wrapper, - ) + samples: List[ + training_samples_schema_pb2.SupervisedNodeClassificationSample + ] = pyg_to_training_samples.build_supervised_node_classification_samples_from_pyg_heterodata( + hetero_data=hetero_data, + root_node_type=root_node_type, + graph_metadata_pb_wrapper=mocked_dataset_info.graph_metadata_pb_wrapper, ) # Write out to GbmlConfig-specified paths diff --git a/python/gigl/src/mocking/lib/mock_input_for_subgraph_sampler.py b/python/gigl/src/mocking/lib/mock_input_for_subgraph_sampler.py index fc59b5e..a8e49d8 100644 --- a/python/gigl/src/mocking/lib/mock_input_for_subgraph_sampler.py +++ b/python/gigl/src/mocking/lib/mock_input_for_subgraph_sampler.py @@ -9,12 +9,6 @@ from gigl.common import GcsUri, LocalUri, UriFactory from gigl.common.logger import Logger from gigl.common.utils.proto_utils import ProtoUtils -from gigl.src.common.constants.test_assets import ( - get_example_task_edge_features_gcs_dir, - get_example_task_edge_features_schema_gcs_path, - get_example_task_node_features_gcs_dir, - get_example_task_node_features_schema_gcs_path, -) from gigl.src.common.types.graph_data import ( CondensedEdgeType, CondensedNodeType, @@ -25,6 +19,12 @@ from gigl.src.common.utils.file_loader import FileLoader from gigl.src.data_preprocessor.lib.transform.tf_value_encoder import TFValueEncoder from gigl.src.data_preprocessor.lib.types import FeatureSpecDict, InstanceDict +from gigl.src.mocking.lib.constants import ( + get_example_task_edge_features_gcs_dir, + get_example_task_edge_features_schema_gcs_path, + get_example_task_node_features_gcs_dir, + get_example_task_node_features_schema_gcs_path, +) from gigl.src.mocking.lib.feature_handling import get_feature_field_name from gigl.src.mocking.lib.mocked_dataset_resources import MockedDatasetInfo from snapchat.research.gbml import gbml_config_pb2, preprocessed_metadata_pb2 @@ -289,15 +289,15 @@ def generate_preprocessed_tfrecord_data( node_labels=node_labels, ) - condensed_node_type_to_preprocessed_metadata[condensed_node_type] = ( - preprocessed_metadata_pb2.PreprocessedMetadata.NodeMetadataOutput( - node_id_key=node_preprocess_metadata.id_col, - feature_keys=node_preprocess_metadata.feature_cols, - label_keys=[node_preprocess_metadata.label_col] if node_preprocess_metadata.label_col is not None else None, # type: ignore - tfrecord_uri_prefix=node_preprocess_metadata.features_uri.uri, - schema_uri=node_preprocess_metadata.schema_uri.uri, - feature_dim=num_features, - ) + condensed_node_type_to_preprocessed_metadata[ + condensed_node_type + ] = preprocessed_metadata_pb2.PreprocessedMetadata.NodeMetadataOutput( + node_id_key=node_preprocess_metadata.id_col, + feature_keys=node_preprocess_metadata.feature_cols, + label_keys=[node_preprocess_metadata.label_col] if node_preprocess_metadata.label_col is not None else None, # type: ignore + tfrecord_uri_prefix=node_preprocess_metadata.features_uri.uri, + schema_uri=node_preprocess_metadata.schema_uri.uri, + feature_dim=num_features, ) num_features_by_edge_type = mocked_dataset_info.num_edge_features @@ -376,30 +376,30 @@ def generate_preprocessed_tfrecord_data( feature_dim=num_edge_feats, ) - edge_preprocess_metadata_pb_dict[user_def_label] = ( - user_def_edge_metadata_info_pb - ) - - condensed_edge_type_to_preprocessed_metadata[condensed_edge_type] = ( - preprocessed_metadata_pb2.PreprocessedMetadata.EdgeMetadataOutput( - src_node_id_key=edge_preprocess_metadata.src_id_col, - dst_node_id_key=edge_preprocess_metadata.dst_id_col, - main_edge_info=main_edge_metadata_info_pb, - positive_edge_info=edge_preprocess_metadata_pb_dict.get( - EdgeUsageType.POSITIVE, None - ), - negative_edge_info=edge_preprocess_metadata_pb_dict.get( - EdgeUsageType.NEGATIVE, None - ), - ) + edge_preprocess_metadata_pb_dict[ + user_def_label + ] = user_def_edge_metadata_info_pb + + condensed_edge_type_to_preprocessed_metadata[ + condensed_edge_type + ] = preprocessed_metadata_pb2.PreprocessedMetadata.EdgeMetadataOutput( + src_node_id_key=edge_preprocess_metadata.src_id_col, + dst_node_id_key=edge_preprocess_metadata.dst_id_col, + main_edge_info=main_edge_metadata_info_pb, + positive_edge_info=edge_preprocess_metadata_pb_dict.get( + EdgeUsageType.POSITIVE, None + ), + negative_edge_info=edge_preprocess_metadata_pb_dict.get( + EdgeUsageType.NEGATIVE, None + ), ) else: - condensed_edge_type_to_preprocessed_metadata[condensed_edge_type] = ( - preprocessed_metadata_pb2.PreprocessedMetadata.EdgeMetadataOutput( - src_node_id_key=edge_preprocess_metadata.src_id_col, - dst_node_id_key=edge_preprocess_metadata.dst_id_col, - main_edge_info=main_edge_metadata_info_pb, - ) + condensed_edge_type_to_preprocessed_metadata[ + condensed_edge_type + ] = preprocessed_metadata_pb2.PreprocessedMetadata.EdgeMetadataOutput( + src_node_id_key=edge_preprocess_metadata.src_id_col, + dst_node_id_key=edge_preprocess_metadata.dst_id_col, + main_edge_info=main_edge_metadata_info_pb, ) # Assemble Preprocessed Metadata pb and write out. diff --git a/python/gigl/src/mocking/lib/mock_input_for_trainer.py b/python/gigl/src/mocking/lib/mock_input_for_trainer.py index 472374b..ada7db4 100644 --- a/python/gigl/src/mocking/lib/mock_input_for_trainer.py +++ b/python/gigl/src/mocking/lib/mock_input_for_trainer.py @@ -30,12 +30,12 @@ def split_and_write_supervised_node_classification_subgraph_samples_from_mocked_ split_data: HeteroData = transductive_split_cls(hetero_data) # Build all SNC samples from dataset. - samples: List[training_samples_schema_pb2.SupervisedNodeClassificationSample] = ( - pyg_to_training_samples.build_supervised_node_classification_samples_from_pyg_heterodata( - hetero_data=split_data, - root_node_type=root_node_type, - graph_metadata_pb_wrapper=mocked_dataset_info.graph_metadata_pb_wrapper, - ) + samples: List[ + training_samples_schema_pb2.SupervisedNodeClassificationSample + ] = pyg_to_training_samples.build_supervised_node_classification_samples_from_pyg_heterodata( + hetero_data=split_data, + root_node_type=root_node_type, + graph_metadata_pb_wrapper=mocked_dataset_info.graph_metadata_pb_wrapper, ) # Separate into train / val / test sets according to mask. diff --git a/python/gigl/src/mocking/lib/pyg_to_training_samples.py b/python/gigl/src/mocking/lib/pyg_to_training_samples.py index 8b5bcf6..050f844 100644 --- a/python/gigl/src/mocking/lib/pyg_to_training_samples.py +++ b/python/gigl/src/mocking/lib/pyg_to_training_samples.py @@ -230,9 +230,9 @@ def build_supervised_node_classification_samples_from_pyg_heterodata( root_node_type: NodeType, graph_metadata_pb_wrapper: GraphMetadataPbWrapper, ) -> List[training_samples_schema_pb2.SupervisedNodeClassificationSample]: - samples: List[training_samples_schema_pb2.SupervisedNodeClassificationSample] = ( - list() - ) + samples: List[ + training_samples_schema_pb2.SupervisedNodeClassificationSample + ] = list() assert ( hetero_data[str(root_node_type)].get("y") is not None diff --git a/python/gigl/src/mocking/lib/versioning.py b/python/gigl/src/mocking/lib/versioning.py index 35444c5..f76729d 100644 --- a/python/gigl/src/mocking/lib/versioning.py +++ b/python/gigl/src/mocking/lib/versioning.py @@ -5,9 +5,7 @@ from gigl.common import Uri, UriFactory from gigl.common.logger import Logger -from gigl.src.common.constants.test_assets import ( - MOCKED_DATASET_ARTIFACT_METADATA_LOCAL_PATH, -) +from gigl.src.mocking.lib.constants import MOCKED_DATASET_ARTIFACT_METADATA_LOCAL_PATH logger = Logger() diff --git a/python/gigl/src/mocking/mocking_assets/passthrough_preprocessor_config_for_mocked_assets.py b/python/gigl/src/mocking/mocking_assets/passthrough_preprocessor_config_for_mocked_assets.py index 73f2b60..951cce1 100644 --- a/python/gigl/src/mocking/mocking_assets/passthrough_preprocessor_config_for_mocked_assets.py +++ b/python/gigl/src/mocking/mocking_assets/passthrough_preprocessor_config_for_mocked_assets.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List -import gigl.src.common.constants.test_assets as test_tasks_constants +import gigl.src.mocking.lib.constants as test_tasks_constants from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType, Relation from gigl.src.data_preprocessor.lib.data_preprocessor_config import ( DataPreprocessorConfig, @@ -89,14 +89,14 @@ def get_nodes_preprocessing_spec( ) node_features_outputs = node_feature_fields - node_data_ref_to_preprocessing_specs[node_data_ref] = ( - NodeDataPreprocessingSpec( - identifier_output=node_output_id, - features_outputs=node_features_outputs, - labels_outputs=node_labels_outputs, - feature_spec_fn=feature_spec_fn, - preprocessing_fn=preprocessing_fn, - ) + node_data_ref_to_preprocessing_specs[ + node_data_ref + ] = NodeDataPreprocessingSpec( + identifier_output=node_output_id, + features_outputs=node_features_outputs, + labels_outputs=node_labels_outputs, + feature_spec_fn=feature_spec_fn, + preprocessing_fn=preprocessing_fn, ) return node_data_ref_to_preprocessing_specs @@ -145,19 +145,19 @@ def get_edges_preprocessing_spec( preprocessing_fn = build_passthrough_transform_preprocessing_fn() - edge_data_ref_to_preprocessing_specs[main_edge_data_ref] = ( - EdgeDataPreprocessingSpec( - identifier_output=edge_output_id, - features_outputs=default_edge_feature_fields, - feature_spec_fn=build_ingestion_feature_spec_fn( - fixed_int_fields=[ - self.__mocked_dataset.edge_src_column_name, - self.__mocked_dataset.edge_dst_column_name, - ], - fixed_float_fields=default_edge_feature_fields, - ), - preprocessing_fn=preprocessing_fn, - ) + edge_data_ref_to_preprocessing_specs[ + main_edge_data_ref + ] = EdgeDataPreprocessingSpec( + identifier_output=edge_output_id, + features_outputs=default_edge_feature_fields, + feature_spec_fn=build_ingestion_feature_spec_fn( + fixed_int_fields=[ + self.__mocked_dataset.edge_src_column_name, + self.__mocked_dataset.edge_dst_column_name, + ], + fixed_float_fields=default_edge_feature_fields, + ), + preprocessing_fn=preprocessing_fn, ) should_use_user_defined_labels_for_this_edge_type = ( diff --git a/python/gigl/src/post_process/post_processor.py b/python/gigl/src/post_process/post_processor.py index e410137..3b60ffc 100644 --- a/python/gigl/src/post_process/post_processor.py +++ b/python/gigl/src/post_process/post_processor.py @@ -62,9 +62,9 @@ def __run_post_process( logger.info( f"Running user post processor class: {post_processor.__class__}, with config: {gbml_config_pb}" ) - post_processor_metrics: Optional[EvalMetricsCollection] = ( - post_processor.run_post_process(gbml_config_pb=gbml_config_pb) - ) + post_processor_metrics: Optional[ + EvalMetricsCollection + ] = post_processor.run_post_process(gbml_config_pb=gbml_config_pb) if post_processor_metrics is not None: self.__write_post_processor_metrics_to_uri( model_eval_metrics=post_processor_metrics, diff --git a/python/gigl/src/post_process/utils/component_runtime.py b/python/gigl/src/post_process/utils/component_runtime.py index ce59da3..f62db64 100644 --- a/python/gigl/src/post_process/utils/component_runtime.py +++ b/python/gigl/src/post_process/utils/component_runtime.py @@ -1,47 +1,37 @@ -from typing import Dict, Optional +from typing import Dict -from gigl.common.services.kfp import KFPService -from gigl.common.types.wrappers.argo_workflow_manifest import ( - ArgoWorkflowManifestWrapper, -) -from gigl.common.types.wrappers.kfp_api import ApiRunDetailWrapper +# from gigl.common.services.kfp import KFPService +from gigl.common.types.wrappers.kfp_api import KfpTaskDetails - -def get_runtime_manifest_from_kfp_pipeline( - kfp_service: KFPService, experiment_name: str, kfp_run_name: str -) -> ArgoWorkflowManifestWrapper: - pipeline_run_detail: Optional[ApiRunDetailWrapper] = ( - kfp_service.get_latest_run_with_name( - kfp_run_name=kfp_run_name, experiment_name=experiment_name - ) - ) - assert pipeline_run_detail is not None - manifest = pipeline_run_detail.workflow_manifest - return manifest +# TODO: This needs to update ot Vertex AI +# def get_task_details_from_kfp_pipeline( +# kfp_service: KFPService, experiment_name: str, kfp_run_name: str +# ) -> Dict[str, KfpTaskDetails]: +# pipeline_run_detail: Optional[ +# ApiRunDetailWrapper +# ] = kfp_service.get_latest_run_with_name( +# kfp_run_name=kfp_run_name, experiment_name=experiment_name +# ) +# assert pipeline_run_detail is not None +# return pipeline_run_detail.task_details_map def assert_component_runtimes_match_expected_parameters( - runtime_manifest: ArgoWorkflowManifestWrapper, + task_details_map: Dict[str, KfpTaskDetails], component_name_runtime_hr: Dict[str, int], ) -> None: - checked_components = set() for component_name, expected_runtime_hr in component_name_runtime_hr.items(): - for ( - _, - pipeline_node_data, - ) in runtime_manifest.component_status_by_component_display_name.items(): - if pipeline_node_data.display_name == component_name: - t_start = pipeline_node_data.started_at - t_finish = pipeline_node_data.finished_at - runtime_sec = (t_finish - t_start).seconds - expected_runtime_sec = expected_runtime_hr * 3600 - if runtime_sec > expected_runtime_sec: - raise ValueError( - f"Component {component_name} took longer than expected runtime of {expected_runtime_hr} hrs. Actual runtime was {t_finish- t_start}." - ) - else: - checked_components.add(component_name) - if len(checked_components) != len(component_name_runtime_hr): - raise ValueError( - f"run time check completed only for {checked_components}; components {component_name_runtime_hr.keys()- checked_components} not found in pipeline runtime manifest." - ) + relevant_task = task_details_map.get(component_name) + if relevant_task is None: + raise ValueError( + f"Component {component_name} not found in pipeline runtime manifest: {task_details_map}" + ) + + t_start = relevant_task.started_at + t_finish = relevant_task.finished_at + runtime_sec = (t_finish - t_start).seconds + expected_runtime_sec = expected_runtime_hr * 3600 + if runtime_sec > expected_runtime_sec: + raise ValueError( + f"Component {component_name} took longer than expected runtime of {expected_runtime_hr} hrs. Actual runtime was {t_finish- t_start}." + ) diff --git a/python/gigl/src/post_process/utils/unenumeration.py b/python/gigl/src/post_process/utils/unenumeration.py index f8d5d2b..c92b4eb 100644 --- a/python/gigl/src/post_process/utils/unenumeration.py +++ b/python/gigl/src/post_process/utils/unenumeration.py @@ -35,7 +35,9 @@ def _unenumerate_single_inferred_asset( inference_output_unenumerated_assets_table (str): BQ table which contains "final" unenumerated assets. enumerator_mapping_table (str): BQ table which contains mapping between enumerated and original ids. """ - bq_utils = BqUtils() + # TODO: relevant resource config args should be passed through instead of using global config + resource_config = get_resource_config() + bq_utils = BqUtils(project=resource_config.project) bq_utils.run_query( query=inference_queries.UNENUMERATION_QUERY.format( enumerated_assets_table=inference_output_enumerated_assets_table, @@ -44,9 +46,7 @@ def _unenumerate_single_inferred_asset( original_node_id_field=enumeration_queries.DEFAULT_ORIGINAL_NODE_ID_FIELD, enumerated_int_id_field=enumeration_queries.DEFAULT_ENUMERATED_NODE_ID_FIELD, ), - labels=get_resource_config().get_resource_labels( - component=GiGLComponents.Inferencer - ), + labels=resource_config.get_resource_labels(component=GiGLComponents.Inferencer), destination=inference_output_unenumerated_assets_table, write_disposition=bigquery.job.WriteDisposition.WRITE_TRUNCATE, ) @@ -160,5 +160,7 @@ def unenumerate_all_inferred_bq_assets(gbml_config_pb_wrapper: GbmlConfigPbWrapp ) futures.append(future) - concurrent.futures.wait(futures) + for fut in concurrent.futures.as_completed(futures): + fut.result() # Rereaise any exceptions + logger.info(f"Output to tables: {', '.join(unenumerated_assets_output_tables)}") diff --git a/python/gigl/src/split_generator/split_generator.py b/python/gigl/src/split_generator/split_generator.py index 78dd95e..db3969c 100644 --- a/python/gigl/src/split_generator/split_generator.py +++ b/python/gigl/src/split_generator/split_generator.py @@ -6,6 +6,7 @@ import gigl.env.dep_constants as dep_constants import gigl.src.common.constants.gcs as gcs_constants from gigl.common import GcsUri, LocalUri, Uri, UriFactory +from gigl.common.constants import SPARK_35_TFRECORD_JAR_GCS_PATH from gigl.common.logger import Logger from gigl.common.metrics.decorators import flushes_metrics, profileit from gigl.common.utils.gcs import GcsUtils @@ -18,7 +19,6 @@ get_metrics_service_instance, initialize_metrics, ) -from gigl.common.constants import SPARK_35_TFRECORD_JAR_GCS_PATH from gigl.src.common.utils.spark_job_manager import ( DataprocClusterInitData, SparkJobManager, diff --git a/python/gigl/src/subgraph_sampler/subgraph_sampler.py b/python/gigl/src/subgraph_sampler/subgraph_sampler.py index 56ae1ea..6161884 100644 --- a/python/gigl/src/subgraph_sampler/subgraph_sampler.py +++ b/python/gigl/src/subgraph_sampler/subgraph_sampler.py @@ -8,6 +8,10 @@ import gigl.src.common.constants.gcs as gcs_constants import gigl.src.common.constants.metrics as metrics_constants from gigl.common import GcsUri, LocalUri, Uri, UriFactory +from gigl.common.constants import ( + SPARK_31_TFRECORD_JAR_GCS_PATH, + SPARK_35_TFRECORD_JAR_GCS_PATH, +) from gigl.common.logger import Logger from gigl.common.metrics.decorators import flushes_metrics, profileit from gigl.common.utils import os_utils @@ -27,7 +31,6 @@ SparkJobManager, ) from gigl.src.subgraph_sampler.lib.ingestion_protocol import BaseIngestion -from gigl.common.constants import SPARK_31_TFRECORD_JAR_GCS_PATH, SPARK_35_TFRECORD_JAR_GCS_PATH logger = Logger() @@ -35,6 +38,7 @@ hours=5 ) # Allowed max job duration for SGS job -- for MAU workload + class SubgraphSampler: """ GiGL Component that generates k-hop localized subgraphs for each node in the graph using Spark/Scala running on Dataproc. @@ -75,7 +79,7 @@ def run( debug_cluster_owner_alias: Optional[str] = None, custom_worker_image_uri: Optional[str] = None, skip_cluster_delete: bool = False, - additional_spark35_local_jar_file_paths: Sequence[LocalUri] = (), + additional_spark35_jar_file_uris: Sequence[Uri] = (), ): resource_config = get_resource_config(resource_config_uri=resource_config_uri) gbml_config_pb_wrapper: GbmlConfigPbWrapper = ( @@ -180,26 +184,27 @@ def run( jar_file_gcs_bucket: GcsUri = gcs_constants.get_subgraph_sampler_root_dir( applied_task_identifier=applied_task_identifier ) - jars_to_upload: dict[LocalUri, GcsUri] = { + jars_to_upload: dict[Uri, GcsUri] = { main_jar_file_uri: GcsUri.join(jar_file_gcs_bucket, main_jar_file_name) } # Since Spark 3.5 and Spark 3.1 are using different versions of Scala # We need to pass the correct extra jar file to the Spark cluster, - # Otherwise, we may see some errors like:8 + # Otherwise, we may see some errors like: # java.io.InvalidClassException; local class incompatible: stream classdesc serialVersionUID = -1, local class serialVersionUID = 2 if use_spark35: - for jar in additional_spark35_local_jar_file_paths: - file_name = os.path.basename(jar.uri) - jars_to_upload[jar] = GcsUri.join(jar_file_gcs_bucket, file_name) + for jar_uri in additional_spark35_jar_file_uris: + jars_to_upload[jar_uri] = GcsUri.join( + jar_file_gcs_bucket, jar_uri.get_basename() + ) sgs_jar_file_gcs_path = GcsUri.join( jar_file_gcs_bucket, main_jar_file_name, ) - logger.info(f"Uploading local jar files {jars_to_upload}") - gcs_utils.upload_files_to_gcs(jars_to_upload, parallel=True) + logger.info(f"Uploading jar files {jars_to_upload}") + FileLoader().load_files(source_to_dest_file_uri_map=jars_to_upload) extra_jar_file_uris = [ jars_to_upload[jar].uri @@ -350,12 +355,12 @@ def run( required=False, ) parser.add_argument( - "--additional_spark35_local_jar_file_paths", + "--additional_spark35_jar_file_uris", action="append", type=str, required=False, default=[], - help="Additional local files to be added to the Spark cluster.", + help="Additional URIs to be added to the Spark cluster.", ) args = parser.parse_args() @@ -383,7 +388,9 @@ def run( resource_config_uri=resource_config_uri, custom_worker_image_uri=custom_worker_image_uri, # Filter out empty strings which kfp *may* add... - additional_spark35_local_jar_file_paths=[ - LocalUri(jar) for jar in args.additional_spark35_local_jar_file_paths if jar + additional_spark35_jar_file_uris=[ + UriFactory.create_uri(jar) + for jar in args.additional_spark35_jar_file_uris + if jar ], ) diff --git a/python/gigl/src/training/v1/lib/data_loaders/node_anchor_based_link_prediction_data_loader.py b/python/gigl/src/training/v1/lib/data_loaders/node_anchor_based_link_prediction_data_loader.py index ad55a41..4babaec 100644 --- a/python/gigl/src/training/v1/lib/data_loaders/node_anchor_based_link_prediction_data_loader.py +++ b/python/gigl/src/training/v1/lib/data_loaders/node_anchor_based_link_prediction_data_loader.py @@ -128,9 +128,9 @@ def collate_pyg_node_anchor_based_link_prediction_minibatch( condensed_node_type_to_subgraph_id_to_global_node_id: Dict[ CondensedNodeType, Dict[NodeId, NodeId] ] = defaultdict(dict) - node_mapping: Dict[Node, Node] = ( - batch_graph_data.global_node_to_subgraph_node_mapping - ) + node_mapping: Dict[ + Node, Node + ] = batch_graph_data.global_node_to_subgraph_node_mapping for node_with_global_id, node_with_subgraph_id in node_mapping.items(): condensed_node_type: CondensedNodeType = ( graph_metadata_pb_wrapper.node_type_to_condensed_node_type_map[ diff --git a/python/gigl/src/training/v1/lib/data_loaders/rooted_node_neighborhood_data_loader.py b/python/gigl/src/training/v1/lib/data_loaders/rooted_node_neighborhood_data_loader.py index 8f47ad4..9792330 100644 --- a/python/gigl/src/training/v1/lib/data_loaders/rooted_node_neighborhood_data_loader.py +++ b/python/gigl/src/training/v1/lib/data_loaders/rooted_node_neighborhood_data_loader.py @@ -106,9 +106,9 @@ def collate_pyg_rooted_node_neighborhood_minibatch( builder.add_graph_data(graph_data=graph_data) batch_graph_data = builder.build() - node_mapping: Dict[Node, Node] = ( - batch_graph_data.global_node_to_subgraph_node_mapping - ) + node_mapping: Dict[ + Node, Node + ] = batch_graph_data.global_node_to_subgraph_node_mapping condensed_node_type_to_subgraph_id_to_global_node_id: Dict[ CondensedNodeType, Dict[NodeId, NodeId] @@ -138,9 +138,9 @@ def collate_pyg_rooted_node_neighborhood_minibatch( node_type ] ) - condensed_node_type_to_root_node_indices_map[condensed_node_type] = ( - torch.LongTensor(root_node_indices_list) - ) + condensed_node_type_to_root_node_indices_map[ + condensed_node_type + ] = torch.LongTensor(root_node_indices_list) batch_graph_data.coalesce() batch_graph_data = cast_graph_for_training( batch_graph_data=batch_graph_data, diff --git a/python/gigl/src/training/v1/lib/data_loaders/supervised_node_classification_data_loader.py b/python/gigl/src/training/v1/lib/data_loaders/supervised_node_classification_data_loader.py index dcd1d02..e423f63 100644 --- a/python/gigl/src/training/v1/lib/data_loaders/supervised_node_classification_data_loader.py +++ b/python/gigl/src/training/v1/lib/data_loaders/supervised_node_classification_data_loader.py @@ -36,9 +36,9 @@ class SupervisedNodeClassificationBatch: ] # batch-coalesced graph data used for message passing root_node_indices: torch.LongTensor # dtype: int64, shape: [num_root_nodes, ] root_nodes: List[Node] # len(root_nodes) == number of graphs in Batch - root_node_labels: Optional[torch.LongTensor] = ( - None # dtype: int64, shape: [num_root_nodes, ] - ) + root_node_labels: Optional[ + torch.LongTensor + ] = None # dtype: int64, shape: [num_root_nodes, ] @staticmethod def preprocess_node_classification_sample_fn( diff --git a/python/gigl/src/training/v1/trainer.py b/python/gigl/src/training/v1/trainer.py index b5756e8..1a936f1 100644 --- a/python/gigl/src/training/v1/trainer.py +++ b/python/gigl/src/training/v1/trainer.py @@ -75,7 +75,7 @@ def run( staging_bucket=resource_config.temp_assets_regional_bucket_path.uri, ) - vertex_ai_service.run(job_config=job_config) + vertex_ai_service.launch_job(job_config=job_config) elif isinstance(trainer_config, LocalResourceConfig): training_process = GnnTrainingProcess() diff --git a/python/gigl/src/training/v2/glt_trainer.py b/python/gigl/src/training/v2/glt_trainer.py index 1ebea38..fb08483 100644 --- a/python/gigl/src/training/v2/glt_trainer.py +++ b/python/gigl/src/training/v2/glt_trainer.py @@ -113,11 +113,9 @@ def __execute_VAI_training( labels=resource_config.get_resource_labels( component=GiGLComponents.Inferencer ), - timeout_s=( - trainer_resource_config.timeout - if trainer_resource_config.timeout - else None - ), + timeout_s=trainer_resource_config.timeout + if trainer_resource_config.timeout + else None, ) vertex_ai_service = VertexAIService( project=resource_config.project, @@ -125,7 +123,7 @@ def __execute_VAI_training( service_account=resource_config.service_account_email, staging_bucket=resource_config.temp_assets_regional_bucket_path.uri, ) - vertex_ai_service.run(job_config=job_config) + vertex_ai_service.launch_job(job_config=job_config) def run( self, diff --git a/python/gigl/src/validation_check/config_validator.py b/python/gigl/src/validation_check/config_validator.py index 6ea19f4..0d1ff67 100644 --- a/python/gigl/src/validation_check/config_validator.py +++ b/python/gigl/src/validation_check/config_validator.py @@ -15,6 +15,9 @@ assert_subgraph_sampler_output_exists, assert_trained_model_exists, ) +from gigl.src.validation_check.libs.name_checks import ( + check_if_kfp_pipeline_job_name_valid, +) from gigl.src.validation_check.libs.resource_config_checks import ( check_if_inferencer_resource_config_valid, check_if_preprocessor_resource_config_valid, @@ -27,7 +30,6 @@ check_if_data_preprocessor_config_cls_valid, check_if_graph_metadata_valid, check_if_inferencer_cls_valid, - check_if_kfp_pipeline_job_name_valid, check_if_post_processor_cls_valid, check_if_preprocessed_metadata_valid, check_if_split_generator_config_valid, diff --git a/python/gigl/src/validation_check/libs/name_checks.py b/python/gigl/src/validation_check/libs/name_checks.py new file mode 100644 index 0000000..7ca7d09 --- /dev/null +++ b/python/gigl/src/validation_check/libs/name_checks.py @@ -0,0 +1,22 @@ +"""Checks for if assorted strings are valid.""" +import re + +from gigl.common.logger import Logger + +logger = Logger() + + +def check_if_kfp_pipeline_job_name_valid(job_name: str) -> None: + """ + Check if kfp pipeline job name valid. It is used to start spark cluster and must match pattern. + The kfp pipeline job name is also used to generate AppliedTaskIdentifier for each component. + """ + # TODO(mkolodner, kmonte): Check if our max length should be shorter. + logger.info(f"Config validation check: if job_name: {job_name} is valid.") + if not bool(re.match(r"^(?:[a-z](?:[_a-z0-9]{0,49}[a-z0-9])?)$", job_name)): + raise ValueError( + f"Invalid 'job_name'. Only lowercase letters, numbers, and underscores are allowed. " + f"The name must start with lowercase letter or number and end with a lowercase letter or number. " + "The name must be between 1 and 52 characters long. " + f"'job_name' provided: {job_name} ." + ) diff --git a/python/gigl/types/data.py b/python/gigl/types/data.py new file mode 100644 index 0000000..46e702e --- /dev/null +++ b/python/gigl/types/data.py @@ -0,0 +1,86 @@ +from collections import abc +from dataclasses import dataclass +from typing import Optional, Union + +import torch + +from gigl.common.logger import Logger +from gigl.src.common.types.graph_data import EdgeType, NodeType +from gigl.types.distributed import ( + NEGATIVE_LABEL_RELATION, + POSITIVE_LABEL_RELATION, + to_heterogeneous_edge, + to_heterogeneous_node, +) + +logger = Logger() + + +# This dataclass should not be frozen, as we are expected to delete its members once they have been registered inside of the partitioner +# in order to save memory. +@dataclass +class LoadedGraphTensors: + # Unpartitioned Node Ids + node_ids: Union[torch.Tensor, dict[NodeType, torch.Tensor]] + # Unpartitioned Node Features + node_features: Optional[Union[torch.Tensor, dict[NodeType, torch.Tensor]]] + # Unpartitioned Edge Index + edge_index: Union[torch.Tensor, dict[EdgeType, torch.Tensor]] + # Unpartitioned Edge Features + edge_features: Optional[Union[torch.Tensor, dict[EdgeType, torch.Tensor]]] + # Unpartitioned Positive Edge Label + positive_label: Optional[Union[torch.Tensor, dict[EdgeType, torch.Tensor]]] + # Unpartitioned Negative Edge Label + negative_label: Optional[Union[torch.Tensor, dict[EdgeType, torch.Tensor]]] + + def treat_labels_as_edges(self) -> None: + """Convert positive and negative labels to edges. Converts this object in-place to a "heterogeneous" representation. + + This requires the following conditions and will throw if they are not met: + 1. The node_ids, node_features, edge_index, and edge_features are not dictionaries (we loaded a homogeneous graph). + 2. The positive_label and negative_label are not None and are Tensors, not dictionaries. + """ + # TODO(kmonte): We should support heterogeneous graphs in the future. + if ( + isinstance(self.node_ids, abc.Mapping) + or isinstance(self.node_features, abc.Mapping) + or isinstance(self.edge_index, abc.Mapping) + or isinstance(self.edge_features, abc.Mapping) + or isinstance(self.positive_label, abc.Mapping) + or isinstance(self.negative_label, abc.Mapping) + ): + raise ValueError( + "Cannot treat labels as edges when using heterogeneous graph tensors." + ) + if self.positive_label is None or self.negative_label is None: + raise ValueError( + "Cannot treat labels as edges when positive or negative labels are None." + ) + + edge_index_with_labels = to_heterogeneous_edge(self.edge_index) + main_edge_type = next(iter(edge_index_with_labels.keys())) + logger.info( + f"Basing positive and negative labels on edge types on main label edge type: {main_edge_type}." + ) + positive_label_edge_type = EdgeType( + main_edge_type.src_node_type, + POSITIVE_LABEL_RELATION, + main_edge_type.dst_node_type, + ) + edge_index_with_labels[positive_label_edge_type] = self.positive_label + negative_label_edge_type = EdgeType( + main_edge_type.src_node_type, + NEGATIVE_LABEL_RELATION, + main_edge_type.dst_node_type, + ) + edge_index_with_labels[negative_label_edge_type] = self.negative_label + logger.info( + f"Treating positive labels as edge type {positive_label_edge_type} and negative labels as edge type {negative_label_edge_type}." + ) + + self.node_ids = to_heterogeneous_node(self.node_ids) + self.node_features = to_heterogeneous_node(self.node_features) + self.edge_index = edge_index_with_labels + self.edge_features = to_heterogeneous_edge(self.edge_features) + self.positive_label = None + self.negative_label = None diff --git a/python/gigl/types/distributed.py b/python/gigl/types/distributed.py index 6a553c2..6c06bd3 100644 --- a/python/gigl/types/distributed.py +++ b/python/gigl/types/distributed.py @@ -1,8 +1,8 @@ from dataclasses import dataclass -from enum import Enum -from typing import Dict, Optional, Union +from typing import Optional, TypeVar, Union, overload import torch +from graphlearn_torch.partition import PartitionBook from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation @@ -13,10 +13,8 @@ dst_node_type=DEFAULT_HOMOGENEOUS_NODE_TYPE, ) - -class EdgeAssignStrategy(Enum): - BY_SOURCE_NODE = "BY_SOURCE_NODE" - BY_DESTINATION_NODE = "BY_DESTINATION_NODE" +POSITIVE_LABEL_RELATION = Relation("positive_label") +NEGATIVE_LABEL_RELATION = Relation("negative_label") @dataclass(frozen=True) @@ -41,35 +39,106 @@ class GraphPartitionData: weights: Optional[torch.Tensor] = None -@dataclass(frozen=True) +# This dataclass should not be frozen, as we are expected to delete partition outputs once they have been registered inside of GLT DistDataset +# in order to save memory. +@dataclass class PartitionOutput: # Node partition book - node_partition_book: Union[torch.Tensor, Dict[NodeType, torch.Tensor]] + node_partition_book: Union[PartitionBook, dict[NodeType, PartitionBook]] # Edge partition book - edge_partition_book: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + edge_partition_book: Union[PartitionBook, dict[EdgeType, PartitionBook]] - # Partitioned edge index on current rank - partitioned_edge_index: Union[ - GraphPartitionData, Dict[EdgeType, GraphPartitionData] + # Partitioned edge index on current rank. This field will always be populated after partitioning. However, we may set this + # field to None during dataset.build() in order to minimize the peak memory usage, and as a result type this as Optional. + partitioned_edge_index: Optional[ + Union[GraphPartitionData, dict[EdgeType, GraphPartitionData]] ] # Node features on current rank, May be None if node features are not partitioned partitioned_node_features: Optional[ - Union[FeaturePartitionData, Dict[NodeType, FeaturePartitionData]] + Union[FeaturePartitionData, dict[NodeType, FeaturePartitionData]] ] # Edge features on current rank, May be None if edge features are not partitioned partitioned_edge_features: Optional[ - Union[FeaturePartitionData, Dict[EdgeType, FeaturePartitionData]] + Union[FeaturePartitionData, dict[EdgeType, FeaturePartitionData]] ] # Positive edge indices on current rank, May be None if positive edge labels are not partitioned partitioned_positive_labels: Optional[ - Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + Union[torch.Tensor, dict[EdgeType, torch.Tensor]] ] # Negative edge indices on current rank, May be None if negative edge labels are not partitioned partitioned_negative_labels: Optional[ - Union[torch.Tensor, Dict[EdgeType, torch.Tensor]] + Union[torch.Tensor, dict[EdgeType, torch.Tensor]] ] + + +_T = TypeVar("_T") + + +@overload +def to_heterogeneous_node(x: None) -> None: + ... + + +@overload +def to_heterogeneous_node(x: Union[_T, dict[NodeType, _T]]) -> dict[NodeType, _T]: + ... + + +def to_heterogeneous_node( + x: Optional[Union[_T, dict[NodeType, _T]]] +) -> Optional[dict[NodeType, _T]]: + if x is None: + return None + if isinstance(x, dict): + return x + return {DEFAULT_HOMOGENEOUS_NODE_TYPE: x} + + +@overload +def to_heterogeneous_edge(x: None) -> None: + ... + + +@overload +def to_heterogeneous_edge(x: Union[_T, dict[EdgeType, _T]]) -> dict[EdgeType, _T]: + ... + + +def to_heterogeneous_edge( + x: Optional[Union[_T, dict[EdgeType, _T]]] +) -> Optional[dict[EdgeType, _T]]: + if x is None: + return None + if isinstance(x, dict): + return x + return {DEFAULT_HOMOGENEOUS_EDGE_TYPE: x} + + +@overload +def to_homogeneous(x: None) -> None: + ... + + +@overload +def to_homogeneous(x: Union[_T, dict[Union[NodeType, EdgeType], _T]]) -> _T: + ... + + +def to_homogeneous( + x: Optional[Union[_T, dict[Union[NodeType, EdgeType], _T]]] +) -> Optional[_T]: + if x is None: + return None + if isinstance(x, dict): + if len(x) != 1: + raise ValueError( + f"Expected a single value in the dictionary, but got multiple keys: {x.keys()}" + ) + n = next(iter(x.values())) + return n + return x diff --git a/python/gigl/distributed/partitioner/__init__.py b/python/gigl/utils/__init__.py similarity index 100% rename from python/gigl/distributed/partitioner/__init__.py rename to python/gigl/utils/__init__.py diff --git a/python/gigl/utils/data_splitters.py b/python/gigl/utils/data_splitters.py new file mode 100644 index 0000000..7950a72 --- /dev/null +++ b/python/gigl/utils/data_splitters.py @@ -0,0 +1,329 @@ +import gc +from collections import defaultdict +from collections.abc import Mapping, Sequence +from typing import Callable, Literal, Optional, Protocol, Tuple, Union, overload + +import torch + +from gigl.common.logger import Logger +from gigl.src.common.types.graph_data import EdgeType, NodeType +from gigl.types.distributed import ( + DEFAULT_HOMOGENEOUS_EDGE_TYPE, + DEFAULT_HOMOGENEOUS_NODE_TYPE, +) + +logger = Logger() + + +class NodeAnchorLinkSplitter(Protocol): + """Protocol that should be satisfied for anything that is used to split on edges. + + The edges must be provided in COO format, as dense tensors. + https://tbetcke.github.io/hpc_lecture_notes/sparse_data_structures.html + + Args: + edge_index: The edges to split on in COO format. 2 x N + Returns: + The train (1 x X), val (1 X Y), test (1 x Z) nodes. X + Y + Z = N + """ + + @overload + def __call__( + self, + edge_index: torch.Tensor, + ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + ... + + @overload + def __call__( + self, + edge_index: Mapping[EdgeType, torch.Tensor], + ) -> Mapping[NodeType, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]]: + ... + + def __call__( + self, *args, **kwargs + ) -> Union[ + Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + Mapping[NodeType, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]], + ]: + ... + + +def _fast_hash(x: torch.Tensor) -> torch.Tensor: + """Fast hash function. + + Hashes each element of the input tensor `x` using the fast hash function. + Based on https://stackoverflow.com/a/12996028 + + We use the `Tensor.bitwise_xor_` and `Tensor.multiply_` to avoid creating new tensors. + Sadly, we cannot avoid the out-place shifts (I think, there may be some bit-wise voodoo here), + but in testing we do not increase memory but more than a few MB for a 1G input so it should be fine. + + Note that _fast_hash(0) = 0. + + Arguments: + x (torch.Tensor): The input tensor to hash. N x M + + Returns: + The hash values of the input tensor `x`. N x M + """ + x = x.clone().detach() + if x.dtype == torch.int32: + x.bitwise_xor_(x >> 16) + x.multiply_(0x7FEB352D) + x.bitwise_xor_(x >> 15) + x.multiply_(0x846CA68B) + x.bitwise_xor_(x >> 16) + elif x.dtype == torch.int64: + x.bitwise_xor_(x >> 30) + x.multiply_(0xBF58476D1CE4E5B9) + x.bitwise_xor_(x >> 27) + x.multiply_(0x94D049BB133111EB) + x.bitwise_xor_(x >> 31) + else: + raise ValueError(f"Unsupported dtype {x.dtype}") + + return x + + +class HashedNodeAnchorLinkSplitter: + """Selects train, val, and test nodes based on some provided edge index. + + In node-based splitting, a node may only ever live in one split. E.g. if one + node has two label edges, *both* of those edges will be placed into the same split. + + The edges must be provided in COO format, as dense tensors. + https://tbetcke.github.io/hpc_lecture_notes/sparse_data_structures.html + Where the first row of out input are the node ids we that are the "source" of the edge, + and the second row are the node ids that are the "destination" of the edge. + + + Note that there is some tricky interplay with this and the `sampling_direction` parameter. + Take the graph [A -> B] as an example. + If `sampling_direction` is "in", then B is the source and A is the destination. + If `sampling_direction` is "out", then A is the source and B is the destination. + """ + + def __init__( + self, + sampling_direction: Union[Literal["in", "out"], str], + num_val: Union[float, int] = 0.1, + num_test: Union[float, int] = 0.1, + hash_function: Callable[[torch.Tensor], torch.Tensor] = _fast_hash, + edge_types: Optional[Union[EdgeType, Sequence[EdgeType]]] = None, + ): + """Initializes the HashedNodeAnchorLinkSplitter. + + Args: + sampling_direction (Union[Literal["in", "out"], str]): The direction to sample the nodes. Either "in" or "out". + num_val (Union[float, int]): The percentage of nodes to use for training. Defaults to 0.1 (10%). + If an integer is provided, than exactly that number of nodes will be in the validation split. + num_test (Union[float, int]): The percentage of nodes to use for validation. Defaults to 0.1 (10%). + If an integer is provided, than exactly that number of nodes will be in the test split. + hash_function (Callable[[torch.Tensor, torch.Tensor], torch.Tensor]): The hash function to use. Defaults to `_fast_hash`. + edge_types: The supervision edge types we should use for splitting. + Must be provided if we are splitting a heterogeneous graph. + """ + _check_sampling_direction(sampling_direction) + _check_val_test_percentage(num_val, num_test) + + self._sampling_direction = sampling_direction + self._num_val = num_val + self._num_test = num_test + self._hash_function = hash_function + + if edge_types is None: + edge_types = [DEFAULT_HOMOGENEOUS_EDGE_TYPE] + elif isinstance(edge_types, EdgeType): + edge_types = [edge_types] + self._supervision_edge_types: Sequence[EdgeType] = edge_types + + def __call__( + self, + edge_index: Union[ + torch.Tensor, Mapping[EdgeType, torch.Tensor] + ], # 2 x N (num_edges) + ) -> Union[ + Tuple[torch.Tensor, torch.Tensor, torch.Tensor], + Mapping[NodeType, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]], + ]: + if isinstance(edge_index, torch.Tensor): + if self._supervision_edge_types != [DEFAULT_HOMOGENEOUS_EDGE_TYPE]: + logger.warning( + f"You provided edge-types {self._supervision_edge_types} but the edge index is homogeneous. Ignoring edge types." + ) + is_heterogeneous = False + edge_index = {DEFAULT_HOMOGENEOUS_EDGE_TYPE: edge_index} + + else: + if ( + self._supervision_edge_types == [DEFAULT_HOMOGENEOUS_EDGE_TYPE] + or not self._supervision_edge_types + ): + raise ValueError( + "If edge_index is a mapping, edges_to_split must be provided." + ) + missing = set(self._supervision_edge_types) - edge_index.keys() + if missing: + raise ValueError( + f"Missing edge types from provided edge index: {missing}. Expected edges types {self._supervision_edge_types} to be in the mapping, but got {edge_index.keys()}." + ) + is_heterogeneous = True + + # First, find max node id per node type. + # This way, we can de-dup via torch.bincount, which is much faster than torch.unique. + # NOTE: For cases where we have large ranges of nodes ids that are all much > 0 (e. [0, 100_000, ...,1_000_000])]) + # It may be faster to use `torch.unique` instead of `torch.bincount`, since `torch.bincount` will create a tensor of size 1_000_000. + # TODO(kmonte): investigate this. + # We also store references to all tensors of a given node type, for convenient access later. + max_node_id_by_type: dict[NodeType, int] = defaultdict(int) + node_ids_by_node_type: dict[NodeType, list[torch.Tensor]] = defaultdict(list) + for edge_type_to_split in self._supervision_edge_types: + coo_edges = edge_index[edge_type_to_split] + _check_edge_index(coo_edges) + anchor_nodes = ( + coo_edges[1] if self._sampling_direction == "in" else coo_edges[0] + ) + anchor_node_type = ( + edge_type_to_split.dst_node_type + if self._sampling_direction == "in" + else edge_type_to_split.src_node_type + ) + max_node_id_by_type[anchor_node_type] = int( + max( + max_node_id_by_type[anchor_node_type], + torch.max(anchor_nodes).item() + 1, + ) + ) + node_ids_by_node_type[anchor_node_type].append(anchor_nodes) + # Second, we go through all node types and split them. + # Note the approach here (with `torch.argsort`) isn't the quickest + # we could avoid calling `torch.argsort` and do something like: + # hash_values = ... + # train_mask = hash_values < train_percentage + # train = nodes_to_select[train_mask] + # That approach is about 2x faster (30s -> 15s on 1B nodes), + # but with this `argsort` approach we can be more exact with the number of nodes per split. + # The memory usage seems the same across both approaches. + + # De-dupe this way instead of using `unique` to avoid the overhead of sorting. + # This approach, goes from ~60s to ~30s on 1B edges. + # collected_anchor_nodes (the values of node_ids_by_node_type) is a list of tensors for a given node type. + # For example if we have `{(A to B): [0, 1], (A to C): [0, 2]}` then we will have + # `collected_anchor_nodes` = [[0, 1], [0, 2]]. + splits: dict[NodeType, Tuple[torch.Tensor, torch.Tensor, torch.Tensor]] = {} + for anchor_node_type, collected_anchor_nodes in node_ids_by_node_type.items(): + max_node_id = max_node_id_by_type[anchor_node_type] + node_id_count = torch.zeros(max_node_id, dtype=torch.int64) + for anchor_nodes in collected_anchor_nodes: + node_id_count.add_(torch.bincount(anchor_nodes, minlength=max_node_id)) + # This line takes us from a count of all node ids, e.g. `[0, 2, 0, 1]` + # To a tensor of the non-zero counts, e.g. `[[1], [3]]` + # and the `squeeze` converts that to a 1d tensor (`[1, 3]`). + nodes_to_select = torch.nonzero(node_id_count).squeeze() + # node_id_count no longer needed, so we can clean up it's memory. + del node_id_count + gc.collect() + + hash_values = torch.argsort(self._hash_function(nodes_to_select)) # 1 x M + nodes_to_select = nodes_to_select[hash_values] # 1 x M + + # hash_values no longer needed, so we can clean up it's memory. + del hash_values + gc.collect() + + if isinstance(self._num_val, int): + num_val = self._num_val + else: + num_val = int(nodes_to_select.numel() * self._num_val) + if isinstance(self._num_test, int): + num_test = self._num_test + else: + num_test = int(nodes_to_select.numel() * self._num_test) + + num_train = nodes_to_select.numel() - num_val - num_test + if num_train <= 0: + raise ValueError( + f"Invalid number of nodes to split. Expected more than 0. Originally had {nodes_to_select.numel()} nodes but due to having `num_test` = {self._num_test} and `num_val` = {self._num_val} got no training node.." + ) + + train = nodes_to_select[:num_train] # 1 x num_train_nodes + val = nodes_to_select[num_train : num_val + num_train] # 1 x num_val_nodes + test = nodes_to_select[num_train + num_val :] # 1 x num_test_nodes + splits[anchor_node_type] = (train, val, test) + if is_heterogeneous: + return splits + else: + return splits[DEFAULT_HOMOGENEOUS_NODE_TYPE] + + +def _check_sampling_direction(sampling_direction: str): + if sampling_direction not in ["in", "out"]: + raise ValueError( + f"Invalid sampling direction {sampling_direction}. Expected 'in' or 'out'." + ) + + +def _check_val_test_percentage( + val_percentage: Union[float, int], test_percentage: Union[float, int] +): + """Checks that the val and test percentages make sense, e.g. we can still have train nodes, and they are non-negative.""" + if val_percentage < 0: + raise ValueError( + f"Invalid val percentage {val_percentage}. Expected a value greater than 0." + ) + if test_percentage < 0: + raise ValueError( + f"Invalid test percentage {test_percentage}. Expected a value greater than 0." + ) + if isinstance(val_percentage, float) and isinstance(test_percentage, float): + if not 0 <= test_percentage < 1: + raise ValueError( + f"Invalid test percentage {test_percentage}. Expected a value between 0 and 1." + ) + if val_percentage <= 0: + raise ValueError( + f"Invalid val percentage {val_percentage}. Expected a value greater than 0." + ) + if val_percentage + test_percentage >= 1: + raise ValueError( + f"Invalid val percentage {val_percentage} and test percentage ({test_percentage}). Expected values such that test percentages + val percentage < 1." + ) + + +def _check_edge_index(edge_index: torch.Tensor): + """Asserts edge index is the appropriate shape and is not sparse.""" + size = edge_index.size() + if size[0] != 2 or len(size) != 2: + raise ValueError( + f"Expected edges to be provided in COO format in the form of a 2xN tensor. Recieved a tensor of shape: {size}." + ) + if edge_index.is_sparse: + raise ValueError("Expected a dense tensor. Received a sparse tensor.") + + +def select_ssl_positive_label_edges( + edge_index: torch.Tensor, positive_label_percentage: float +) -> torch.Tensor: + """ + Selects a percentage of edges from an edge index to use for self-supervised positive labels. + Note that this function does not mask these labeled edges from the edge index tensor. + + Args: + edge_index (torch.Tensor): Edge Index tensor of shape [2, num_edges] + positive_label_percentage (float): Percentage of edges to select as positive labels + Returns: + torch.Tensor: Tensor of positive edges of shape [2, num_labels] + """ + if not (0 <= positive_label_percentage <= 1): + raise ValueError( + f"Label percentage must be between 0 and 1, got {positive_label_percentage}" + ) + if len(edge_index.shape) != 2 or edge_index.shape[0] != 2: + raise ValueError( + f"Provided edge index tensor must have shape [2, num_edges], got {edge_index.shape}" + ) + num_labels = int(edge_index.shape[1] * positive_label_percentage) + label_inds = torch.randperm(edge_index.size(1))[:num_labels] + return edge_index[:, label_inds] diff --git a/python/gigl/utils/share_memory.py b/python/gigl/utils/share_memory.py new file mode 100644 index 0000000..c7d7a16 --- /dev/null +++ b/python/gigl/utils/share_memory.py @@ -0,0 +1,44 @@ +from collections import abc +from typing import Dict, Optional, TypeVar, Union + +import torch +from graphlearn_torch.partition import PartitionBook, RangePartitionBook + +_KeyType = TypeVar("_KeyType") # Generic Key Type + + +def share_memory( + entity: Optional[ + Union[ + torch.Tensor, + PartitionBook, + Dict[_KeyType, torch.Tensor], + Dict[_KeyType, PartitionBook], + ] + ], +) -> None: + """ + Based on GraphLearn-for-PyTorch's `share_memory` implementation, with additional support for handling empty tensors with share_memory. + https://github.com/alibaba/graphlearn-for-pytorch/blob/main/graphlearn_torch/python/utils/tensor.py#L88 + + Calling `share_memory_()` on an empty tensor may cause processes to hang, although the root cause of this is currently unknown. As a result, + we opt to not move empty tensors to shared memory if they are provided. + + Args: + entity (Optional[Union[torch.Tensor, Dict[_KeyType, torch.Tensor]]]): + Homogeneous or heterogeneous entity of tensors which is being moved to shared memory + """ + + if entity is None: + return None + elif isinstance(entity, abc.Mapping): + for entity_tensor in entity.values(): + share_memory(entity_tensor) + elif isinstance(entity, RangePartitionBook): + share_memory(entity.partition_bounds) + else: + # If the tensor has a dimension which is 0, it is an empty tensor. As a result, we don't move this + # to shared_memory, since share_memory_() is unsafe on empty tensors, which may cause processes to hang. + if 0 in entity.shape: + return None + entity.share_memory_() diff --git a/python/pyproject.toml b/python/pyproject.toml index 512587b..e025b09 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -61,7 +61,7 @@ torch25-cuda-121 = [ # Torch 2.5.1 w/ Cuda 12.1 # ===================================== Deps for PyTorch ===================================== "torch @ https://download.pytorch.org/whl/cu121/torch-2.5.1%2Bcu121-cp39-cp39-linux_x86_64.whl#sha256=3c96b2ec4723e7d97259964ee73e2d6a2bace42511a49005b083ea7be1a0b0ac ; platform_system=='Linux' and python_version=='3.9'", - + # ===================================== Deps for PyTorch Geometric ===================================== "pyg-lib @ https://data.pyg.org/whl/torch-2.5.0+cu121/pyg_lib-0.4.0+pt25cu121-cp39-cp39-linux_x86_64.whl ; platform_system=='Linux' and python_version=='3.9'", "torch-cluster @ https://data.pyg.org/whl/torch-2.5.0+cu121/torch_cluster-1.6.3+pt25cu121-cp39-cp39-linux_x86_64.whl ; platform_system=='Linux' and python_version=='3.9'", @@ -79,10 +79,10 @@ torch25-cuda-121 = [ # Torch 2.5.1 w/ Cuda 12.1 ] torch25-cpu = [ - # PYG has not prebuild wheels for arm64 MACS i.e. (M1,M2,M3 chips), they need to be built from scratch - # Neither has some tensorflow-transform, and graphlearn-tprch. - # We currently don't suppoer MAC environment as a result. - + # PYG, tensorflow-transform, and graphlearn-torch do not have prebuilt wheels for arm64 MACS + # i.e. (M1,M2,M3 chips), they need to be built from scratch + # We currently don't support MAC environment as a result. + # ===================================== Deps for PyTorch ===================================== "torch==2.5.1", # ===================================== Deps for PyG ===================================== @@ -108,14 +108,13 @@ torch25-cpu = [ transform = [ "apache-beam[gcp]==2.56.0", "pyarrow==10.0.1", - # Tensorflow-transform natively doesnt provide wheels for arm64 Macs. - - "tfx-bsl~=1.14.0 ; platform_machine!='arm64'", - - "tensorflow_data_validation==1.14.0 ; platform_machine!='arm64'", - "tensorflow-transform~=1.14.0 ; platform_machine!='arm64'", - "tensorflow-metadata==1.14.0 ; platform_machine!='arm64'", - "tensorflow-serving-api==2.15.1 ; platform_machine!='arm64'" + + # Tensorflow-transform natively doesnt provide wheels for arm64 Macs. + "tfx-bsl~=1.14.0 ; platform_system!='Darwin'", + "tensorflow_data_validation==1.14.0 ; platform_system!='Darwin'", + "tensorflow-transform~=1.14.0 ; platform_system!='Darwin'", + "tensorflow-metadata==1.14.0 ; platform_system!='Darwin'", + "tensorflow-serving-api==2.15.1 ; platform_system!='Darwin'" ] dev = [ @@ -151,7 +150,7 @@ docs = [ ] [project.urls] -Homepage = "https://github.com/snap-research/GiGL" +Homepage = "https://github.com/research/GiGL" [tool.setuptools.packages.find] where = ["."] # list of folders that contain the packages (["."] by default) diff --git a/python/tests/integration/common_tests/__init__.py b/python/tests/integration/common/__init__.py similarity index 100% rename from python/tests/integration/common_tests/__init__.py rename to python/tests/integration/common/__init__.py diff --git a/python/tests/integration/pipeline_tests/__init__.py b/python/tests/integration/common/data/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/__init__.py rename to python/tests/integration/common/data/__init__.py diff --git a/python/tests/integration/common/data/export_test.py b/python/tests/integration/common/data/export_test.py new file mode 100644 index 0000000..1ba247e --- /dev/null +++ b/python/tests/integration/common/data/export_test.py @@ -0,0 +1,82 @@ +import unittest +import uuid + +import torch + +from gigl.common import GcsUri +from gigl.common.data.export import EmbeddingExporter, load_embeddings_to_bigquery +from gigl.common.logger import Logger +from gigl.common.utils.gcs import GcsUtils +from gigl.env.pipelines_config import get_resource_config +from gigl.src.common.utils.bq import BqUtils + +logger = Logger() + + +class EmbeddingExportIntergrationTest(unittest.TestCase): + def setUp(self): + resource_config = get_resource_config() + test_unique_name = f"GiGL-Intergration-Exporter-{uuid.uuid4().hex}" + self.embedding_output_dir = GcsUri.join( + resource_config.temp_assets_regional_bucket_path, + test_unique_name, + "embeddings", + ) + self.embedding_output_bq_project = resource_config.project + self.embedding_output_bq_dataset = resource_config.temp_assets_bq_dataset_name + self.embedding_output_bq_table = test_unique_name + + def tearDown(self): + gcs_utils = GcsUtils() + gcs_utils.delete_files_in_bucket_dir(self.embedding_output_dir) + bq_client = BqUtils() + bq_export_table_path = bq_client.join_path( + self.embedding_output_bq_project, + self.embedding_output_bq_dataset, + self.embedding_output_bq_table, + ) + bq_client.delete_bq_table_if_exist( + bq_table_path=bq_export_table_path, + ) + + def test_embedding_export(self): + num_nodes = 1_000 + with EmbeddingExporter(export_dir=self.embedding_output_dir) as exporter: + for i in torch.arange(num_nodes): + exporter.add_embedding( + torch.tensor([i]), torch.ones(128, 1) * i, "node" + ) + + # We also want nested directories to be picked up. + # e.g. if we have: + # gs://MY BUCKET/embeddings/shard_0000.avro + # gs://MY BUCKET/embeddings/nested/shard_0000.avro + # The files under "nested" should be included. + with EmbeddingExporter( + export_dir=GcsUri.join(self.embedding_output_dir, "nested") + ) as exporter: + for i in torch.arange(num_nodes, num_nodes * 2): + exporter.add_embedding( + torch.tensor([i]), torch.ones(128, 1) * i, "node" + ) + bq_client = BqUtils() + bq_export_table_path = bq_client.join_path( + self.embedding_output_bq_project, + self.embedding_output_bq_dataset, + self.embedding_output_bq_table, + ) + logger.info( + f"Will try exporting to {self.embedding_output_dir} to BQ: {bq_export_table_path}" + ) + load_embeddings_to_bigquery( + gcs_folder=self.embedding_output_dir, + project_id=self.embedding_output_bq_project, + dataset_id=self.embedding_output_bq_dataset, + table_id=self.embedding_output_bq_table, + ) + + # Check that data in BQ is as expected... + self.assertEqual( + bq_client.count_number_of_rows_in_bq_table(bq_export_table_path), + num_nodes * 2, + ) diff --git a/python/tests/integration/common_tests/dataflow_test.py b/python/tests/integration/common/dataflow_test.py similarity index 100% rename from python/tests/integration/common_tests/dataflow_test.py rename to python/tests/integration/common/dataflow_test.py diff --git a/python/tests/integration/common_tests/file_loader_test.py b/python/tests/integration/common/file_loader_test.py similarity index 100% rename from python/tests/integration/common_tests/file_loader_test.py rename to python/tests/integration/common/file_loader_test.py diff --git a/python/tests/integration/common_tests/gcs_test.py b/python/tests/integration/common/gcs_test.py similarity index 96% rename from python/tests/integration/common_tests/gcs_test.py rename to python/tests/integration/common/gcs_test.py index 7d555e3..6c20f9d 100644 --- a/python/tests/integration/common_tests/gcs_test.py +++ b/python/tests/integration/common/gcs_test.py @@ -17,7 +17,6 @@ def test_join_path(self): GcsUri.join("back_slashes_in_name\\", "some_file.txt") def test_download_to_temp_file(self): - # TODO: (Open Source) Make this the external GCP project gcs_utils = GcsUtils() f = gcs_utils.download_file_from_gcs_to_temp_file( GcsUri( diff --git a/python/tests/integration/pipeline_tests/config_populator/__init__.py b/python/tests/integration/common/services/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/config_populator/__init__.py rename to python/tests/integration/common/services/__init__.py diff --git a/python/tests/integration/common/services/vertex_ai_test.py b/python/tests/integration/common/services/vertex_ai_test.py new file mode 100644 index 0000000..d970a9b --- /dev/null +++ b/python/tests/integration/common/services/vertex_ai_test.py @@ -0,0 +1,89 @@ +import os +import tempfile +import unittest +import uuid + +import kfp + +from gigl.common import UriFactory +from gigl.common.services.vertex_ai import VertexAiJobConfig, VertexAIService +from gigl.env.pipelines_config import get_resource_config + + +@kfp.dsl.component +def source() -> int: + return 42 + + +@kfp.dsl.component +def doubler(a: int) -> int: + return a * 2 + + +@kfp.dsl.component +def adder(a: int, b: int) -> int: + return a + b + + +@kfp.dsl.pipeline(name="kfp-integration-test") +def get_pipeline() -> int: + source_task = source() + double_task = doubler(a=source_task.output) + adder_task = adder(a=source_task.output, b=double_task.output) + return adder_task.output + + +class VertexAIPipelineIntegrationTest(unittest.TestCase): + def test_launch_job(self): + resource_config = get_resource_config() + project = resource_config.project + location = resource_config.region + service_account = resource_config.service_account_email + staging_bucket = resource_config.temp_assets_regional_bucket_path.uri + job_name = f"GiGL-Intergration-Test-{uuid.uuid4()}" + container_uri = "continuumio/miniconda3:4.12.0" + command = ["python", "-c", "import logging; logging.info('Hello, World!')"] + + job_config = VertexAiJobConfig( + job_name=job_name, container_uri=container_uri, command=command + ) + + vertex_ai_service = VertexAIService( + project=project, + location=location, + service_account=service_account, + staging_bucket=staging_bucket, + ) + + vertex_ai_service.launch_job(job_config) + + def test_run_pipeline(self): + with tempfile.TemporaryDirectory() as tmpdir: + pipeline_def = os.path.join(tmpdir, "pipeline.yaml") + kfp.compiler.Compiler().compile(get_pipeline, pipeline_def) + resource_config = get_resource_config() + ps = VertexAIService( + project=resource_config.project, + location=resource_config.region, + service_account=resource_config.service_account_email, + staging_bucket=resource_config.temp_assets_regional_bucket_path.uri, + ) + job = ps.run_pipeline( + display_name="integration-test-pipeline", + template_path=UriFactory.create_uri(pipeline_def), + run_keyword_args={}, + experiment="gigl-integration-tests", + ) + # Wait for the run to complete, 30 minutes is probably too long but + # we don't want this test to be flaky. + ps.wait_for_run_completion( + job.resource_name, timeout=60 * 30, polling_period_s=10 + ) + + # Also verify that we can fetch a pipeline. + run = ps.get_pipeline_job_from_job_name(job.name) + self.assertEqual(run.resource_name, job.resource_name) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/integration/pipeline_tests/data_preprocessor/__init__.py b/python/tests/integration/distributed/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/data_preprocessor/__init__.py rename to python/tests/integration/distributed/__init__.py diff --git a/python/tests/integration/distributed/distributed_dataset_test.py b/python/tests/integration/distributed/distributed_dataset_test.py new file mode 100644 index 0000000..3c3d699 --- /dev/null +++ b/python/tests/integration/distributed/distributed_dataset_test.py @@ -0,0 +1,230 @@ +import unittest +from collections import abc, defaultdict +from typing import MutableMapping, Optional + +import graphlearn_torch as glt +import torch +import torch.multiprocessing as mp +from graphlearn_torch.data import Feature, Graph +from parameterized import param, parameterized +from torch.multiprocessing import Manager +from torch.testing import assert_close + +from gigl.distributed.dist_link_prediction_dataset import DistLinkPredictionDataset +from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation +from gigl.src.mocking.lib.mocked_dataset_resources import MockedDatasetInfo +from gigl.src.mocking.mocking_assets.mocked_datasets_for_pipeline_tests import ( + HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + TOY_GRAPH_USER_DEFINED_NODE_ANCHOR_MOCKED_DATASET_INFO, +) +from gigl.types.distributed import ( + DEFAULT_HOMOGENEOUS_EDGE_TYPE, + DEFAULT_HOMOGENEOUS_NODE_TYPE, +) +from gigl.utils.data_splitters import ( + HashedNodeAnchorLinkSplitter, + NodeAnchorLinkSplitter, +) +from tests.test_assets.distributed.run_distributed_dataset import ( + run_distributed_dataset, +) + + +class DistDatasetTestCase(unittest.TestCase): + def setUp(self): + self._master_ip_address = "localhost" + self._world_size = 2 + self._num_rpc_threads = 4 + + @parameterized.expand( + [ + param( + "Test GLT Dataset Load in sequence with homogeneous toy NABLP dataset", + mocked_dataset_info=TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + should_load_tensors_in_parallel=False, + is_heterogeneous=False, + ), + param( + "Test GLT Dataset Load in sequence with homogeneous toy dataset with user defined labels", + mocked_dataset_info=TOY_GRAPH_USER_DEFINED_NODE_ANCHOR_MOCKED_DATASET_INFO, + should_load_tensors_in_parallel=False, + is_heterogeneous=False, + ), + param( + "Test GLT Dataset Load in sequence with heterogeneous toy dataset", + mocked_dataset_info=HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + should_load_tensors_in_parallel=False, + is_heterogeneous=True, + ), + param( + "Test GLT Dataset Load in parallel with heterogeneous toy dataset", + mocked_dataset_info=HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + should_load_tensors_in_parallel=True, + is_heterogeneous=True, + ), + ] + ) + def test_dataset_correctness( + self, + _, + mocked_dataset_info: MockedDatasetInfo, + should_load_tensors_in_parallel: bool, + is_heterogeneous: bool, + ) -> None: + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + + mp.spawn( + run_distributed_dataset, + args=( + self._world_size, + mocked_dataset_info, + output_dict, + should_load_tensors_in_parallel, + self._master_ip_address, + master_port, + ), + nprocs=self._world_size, + join=True, + ) + + for dataset in output_dict.values(): + graph = dataset.graph + node_ids = dataset.node_ids + node_features = dataset.node_features + edge_features = dataset.edge_features + + if is_heterogeneous: + assert isinstance(node_features, abc.Mapping) + assert isinstance(node_ids, abc.Mapping) + assert isinstance(graph, abc.Mapping) + assert isinstance(edge_features, abc.Mapping) + else: + assert isinstance(node_features, Feature) + assert isinstance(node_ids, torch.Tensor) + assert isinstance(graph, Graph) + assert isinstance(edge_features, Feature) + node_features = {DEFAULT_HOMOGENEOUS_NODE_TYPE: node_features} + node_ids = {DEFAULT_HOMOGENEOUS_NODE_TYPE: node_ids} + edge_features = {DEFAULT_HOMOGENEOUS_EDGE_TYPE: edge_features} + graph = {DEFAULT_HOMOGENEOUS_EDGE_TYPE: graph} + + # id2index is a tensor that is used to get the map the global node/edge ids to the local indices in the feature tensors. + # As a result, we ensure that all global ids are indexable in the id2index tensor and that all local indices are indexable from the id2index tensor. + + # Validating Node Correctness + for node_type in node_features: + # We use lazy_init_with_ipc_handle() to populate the feature_tensor and id2index fields + node_features[node_type].lazy_init_with_ipc_handle() + # The max node id + 1 should be equal to the shape of the id2index tensor. We add one since ids are 0-indexed. + self.assertEqual( + torch.max(node_ids[node_type]).item() + 1, + node_features[node_type].id2index.size(0), + ) + # We ensure that each local index in node_features is indexable from id2index + for local_index in range(node_features[node_type].size(0)): + self.assertTrue(local_index in node_features[node_type].id2index) + + # Validating Edge Correctness + for edge_type in edge_features: + # We use lazy_init_with_ipc_handle() to populate the feature_tensor and id2index fields + edge_features[edge_type].lazy_init_with_ipc_handle() + # The max edge id + 1 should be equal to the shape of the id2index tensor. We add one since ids are 0-indexed. + self.assertEqual( + torch.max(graph[edge_type].topo.edge_ids).item() + 1, + edge_features[edge_type].id2index.size(0), + ) + # We ensure that each local index in edge_features is indexable from id2index + for local_index in range(edge_features[edge_type].size(0)): + self.assertTrue(local_index in edge_features[edge_type].id2index) + + @parameterized.expand( + [ + param( + "Test GLT Dataset Split with heterogeneous toy dataset", + mocked_dataset_info=TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + is_heterogeneous=False, + split_fn=HashedNodeAnchorLinkSplitter(sampling_direction="out"), + ), + param( + "Test GLT Dataset Load in parallel with homogeneous toy NABLP dataset", + mocked_dataset_info=HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + is_heterogeneous=True, + split_fn=HashedNodeAnchorLinkSplitter( + sampling_direction="out", + edge_types=EdgeType( + NodeType("story"), Relation("to"), NodeType("user") + ), + ), + ), + param( + "Test GLT Dataset Load in parallel with homogeneous toy NABLP dataset - two supervision edge types", + mocked_dataset_info=HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + is_heterogeneous=True, + split_fn=HashedNodeAnchorLinkSplitter( + sampling_direction="out", + num_test=1, + num_val=1, + edge_types=[ + EdgeType(NodeType("story"), Relation("to"), NodeType("user")), + EdgeType(NodeType("user"), Relation("to"), NodeType("story")), + ], + ), + ), + ] + ) + def test_split_dataset_correctness( + self, + _, + mocked_dataset_info: MockedDatasetInfo, + is_heterogeneous: bool, + split_fn: Optional[NodeAnchorLinkSplitter], + ) -> None: + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + + mp.spawn( + run_distributed_dataset, + args=( + self._world_size, + mocked_dataset_info, + output_dict, + True, # should_load_tensors_in_parallel + self._master_ip_address, + master_port, + None, # partitioner + None, # dataset + split_fn, + ), + nprocs=self._world_size, + join=True, + ) + + node_ids_by_rank_by_type: dict[NodeType, dict[int, torch.Tensor]] = defaultdict( + dict + ) + for rank, dataset in output_dict.items(): + node_ids = dataset.node_ids + if is_heterogeneous: + assert isinstance(node_ids, abc.Mapping) + else: + assert isinstance(node_ids, torch.Tensor) + node_ids = {DEFAULT_HOMOGENEOUS_NODE_TYPE: node_ids} + + for node_type, node_ids_tensor in node_ids.items(): + node_ids_by_rank_by_type[node_type][rank] = node_ids_tensor + + # Assert that the node ids are disjoint across all ranks: + for node_type, node_ids_by_rank in node_ids_by_rank_by_type.items(): + all_node_ids = torch.cat(list(node_ids_by_rank.values())) + unique_node_ids = torch.unique(all_node_ids) + with self.subTest(f"Node type disjointness for {node_type}"): + # Check that all node ids are unique across ranks + assert_close(all_node_ids.msort(), unique_node_ids.msort()) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/integration/pipeline_tests/inferencer/__init__.py b/python/tests/integration/pipeline/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/inferencer/__init__.py rename to python/tests/integration/pipeline/__init__.py diff --git a/python/tests/integration/pipeline_tests/split_generator/__init__.py b/python/tests/integration/pipeline/config_populator/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/split_generator/__init__.py rename to python/tests/integration/pipeline/config_populator/__init__.py diff --git a/python/tests/integration/pipeline_tests/config_populator/config_populator_pipeline_test.py b/python/tests/integration/pipeline/config_populator/config_populator_pipeline_test.py similarity index 100% rename from python/tests/integration/pipeline_tests/config_populator/config_populator_pipeline_test.py rename to python/tests/integration/pipeline/config_populator/config_populator_pipeline_test.py diff --git a/python/tests/integration/pipeline_tests/subgraph_sampler/__init__.py b/python/tests/integration/pipeline/data_preprocessor/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/subgraph_sampler/__init__.py rename to python/tests/integration/pipeline/data_preprocessor/__init__.py diff --git a/python/tests/integration/pipeline_tests/data_preprocessor/data_preprocessor_pipeline_test.py b/python/tests/integration/pipeline/data_preprocessor/data_preprocessor_pipeline_test.py similarity index 96% rename from python/tests/integration/pipeline_tests/data_preprocessor/data_preprocessor_pipeline_test.py rename to python/tests/integration/pipeline/data_preprocessor/data_preprocessor_pipeline_test.py index dc5e39c..99d2bcb 100644 --- a/python/tests/integration/pipeline_tests/data_preprocessor/data_preprocessor_pipeline_test.py +++ b/python/tests/integration/pipeline/data_preprocessor/data_preprocessor_pipeline_test.py @@ -164,10 +164,14 @@ def __assert_graph_metadata_reflects_mocked_dataset_info( + f"Source mock dataset info: {mocked_dataset_info}" ) - condensed_node_type_to_node_type_map: Dict[CondensedNodeType, NodeType] = ( + condensed_node_type_to_node_type_map: Dict[ + CondensedNodeType, NodeType + ] = ( gbml_config_pb_wrapper.graph_metadata_pb_wrapper.condensed_node_type_to_node_type_map ) - condensed_edge_type_to_edge_type_map: Dict[CondensedEdgeType, EdgeType] = ( + condensed_edge_type_to_edge_type_map: Dict[ + CondensedEdgeType, EdgeType + ] = ( gbml_config_pb_wrapper.graph_metadata_pb_wrapper.condensed_edge_type_to_edge_type_map ) @@ -229,9 +233,9 @@ def __assert_node_metadata_output_reflects_mocked_dataset_info( for column_tensor, node_feat_name in zip( mocked_dataset_info.node_feats[node_type].T, expected_node_feature_names ): - expected_node_feats_indexed_by_feat_name[node_feat_name] = ( - column_tensor.numpy() - ) + expected_node_feats_indexed_by_feat_name[ + node_feat_name + ] = column_tensor.numpy() self.assertEqual( node_metadata_output_pb.node_id_key, @@ -276,10 +280,10 @@ def __assert_node_metadata_output_reflects_mocked_dataset_info( f"{node_data_features_prefix.uri}*.tfrecord" ) self.assertIsNotNone(node_tfrecords) - node_info: Dict[str, np.ndarray] = ( - DataPreprocessorPipelineTest.__get_np_arrays_from_tfrecords( - schema_path=node_data_schema, tfrecord_files=node_tfrecords - ) + node_info: Dict[ + str, np.ndarray + ] = DataPreprocessorPipelineTest.__get_np_arrays_from_tfrecords( + schema_path=node_data_schema, tfrecord_files=node_tfrecords ) self.assertEqual( @@ -356,10 +360,10 @@ def __assert_edge_metadata_info_reflects_mocked_dataset_info( # Check node data is same. edge_tfrecords = tf.io.gfile.glob(f"{edge_data_prefix.uri}*.tfrecord") self.assertIsNotNone(edge_tfrecords) - edge_info: Dict[str, np.ndarray] = ( - DataPreprocessorPipelineTest.__get_np_arrays_from_tfrecords( - schema_path=edge_data_schema, tfrecord_files=edge_tfrecords - ) + edge_info: Dict[ + str, np.ndarray + ] = DataPreprocessorPipelineTest.__get_np_arrays_from_tfrecords( + schema_path=edge_data_schema, tfrecord_files=edge_tfrecords ) self.assertEqual( @@ -522,9 +526,9 @@ def __assert_edge_metadata_output_reflects_mocked_dataset_info( mocked_dataset_info.edge_feats[edge_type].T, expected_main_edge_feature_names, ): - expected_main_edge_feats_indexed_by_feat_name[edge_feat_name] = ( - column_tensor.numpy() - ) + expected_main_edge_feats_indexed_by_feat_name[ + edge_feat_name + ] = column_tensor.numpy() expected_num_main_edge_features = mocked_dataset_info.num_edge_features[ edge_type @@ -614,11 +618,9 @@ def __run_test_for_mocked_dataset(self, mocked_dataset_info: MockedDatasetInfo): applied_task_id = AppliedTaskIdentifier( f"data_preprocessor_pipeline_test_{mocked_dataset_info.name}_{current_formatted_datetime()}" ) - frozen_gbml_config_uri: LocalUri = ( - self.__generate_gbml_config_pb_for_mocked_dataset_using_passthrough_preprocessor( - applied_task_identifier=applied_task_id, - mocked_dataset_info=mocked_dataset_info, - ) + frozen_gbml_config_uri: LocalUri = self.__generate_gbml_config_pb_for_mocked_dataset_using_passthrough_preprocessor( + applied_task_identifier=applied_task_id, + mocked_dataset_info=mocked_dataset_info, ) preprocessed_metadata_pb: preprocessed_metadata_pb2.PreprocessedMetadata = ( self.__run_data_preprocessor_pipeline( diff --git a/python/tests/integration/pipeline_tests/data_preprocessor/enumerator_test.py b/python/tests/integration/pipeline/data_preprocessor/enumerator_test.py similarity index 78% rename from python/tests/integration/pipeline_tests/data_preprocessor/enumerator_test.py rename to python/tests/integration/pipeline/data_preprocessor/enumerator_test.py index f6ad75c..97a7420 100644 --- a/python/tests/integration/pipeline_tests/data_preprocessor/enumerator_test.py +++ b/python/tests/integration/pipeline/data_preprocessor/enumerator_test.py @@ -1,5 +1,5 @@ import unittest -from typing import Any, Dict, List, Tuple, Union, cast +from typing import Any, Dict, List, Tuple, Union import google.cloud.bigquery as bigquery import pandas as pd @@ -12,10 +12,6 @@ from gigl.src.common.types.graph_data import EdgeType, EdgeUsageType, NodeType, Relation from gigl.src.common.utils.bq import BqUtils from gigl.src.common.utils.time import current_formatted_datetime -from gigl.src.data_preprocessor.lib.data_preprocessor_config import ( - build_ingestion_feature_spec_fn, - build_passthrough_transform_preprocessing_fn, -) from gigl.src.data_preprocessor.lib.enumerate.utils import ( Enumerator, EnumeratorEdgeTypeMetadata, @@ -24,14 +20,6 @@ from gigl.src.data_preprocessor.lib.ingest.bigquery import ( BigqueryEdgeDataReference, BigqueryNodeDataReference, - EdgeDataReference, - NodeDataReference, -) -from gigl.src.data_preprocessor.lib.types import ( - EdgeDataPreprocessingSpec, - EdgeOutputIdentifier, - NodeDataPreprocessingSpec, - NodeOutputIdentifier, ) logger = Logger() @@ -54,12 +42,12 @@ _NEGATIVE_EDGES = [("Alice", "Alice"), ("Bob", "Bob"), ("Charlie", "Charlie")] -_PERSON_NODE_OUTPUT_IDENTIFIER = NodeOutputIdentifier("person") +_PERSON_NODE_IDENTIFIER_FIELD = "person" # Define node features rows for each node _PERSON_NODE_FEATURE_FLOAT_FIELDS = ["height", "age", "weight"] _PERSON_NODE_FEATURE_RECORDS: List[Dict[str, Any]] = [ { - str(_PERSON_NODE_OUTPUT_IDENTIFIER): node, + _PERSON_NODE_IDENTIFIER_FIELD: node, "height": float(i), "age": float(i), "weight": float(i), @@ -68,16 +56,15 @@ ] -_MESSAGES_EDGE_OUTPUT_IDENTIFIER = EdgeOutputIdentifier( - src_node=NodeOutputIdentifier("from_person"), - dst_node=NodeOutputIdentifier("to_person"), -) +_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD = "from_preson" +_MESSAGES_EDGE_DST_IDENTIFIER_FIELD = "to_person" + # Define feature rows for each edge _MESSAGE_EDGE_FEATURE_INT_FIELDS = ["is_friends_with"] _MESSAGE_EDGE_FEATURE_RECORDS = [ { - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.src_node): src, - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.dst_node): dst, + _MESSAGES_EDGE_SRC_IDENTIFIER_FIELD: src, + _MESSAGES_EDGE_DST_IDENTIFIER_FIELD: dst, "is_friends_with": 1, } for (src, dst) in _MESSAGE_EDGES @@ -85,8 +72,8 @@ _POSITIVE_EDGE_FEATURE_INT_FIELDS = ["is_friends_with", "messages_every_day"] _POSITIVE_EDGE_FEATURE_RECORDS = [ { - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.src_node): src, - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.dst_node): dst, + _MESSAGES_EDGE_SRC_IDENTIFIER_FIELD: src, + _MESSAGES_EDGE_DST_IDENTIFIER_FIELD: dst, "is_friends_with": 1, "messages_every_day": 1, } @@ -95,8 +82,8 @@ _NEGATIVE_EDGE_FEATURE_INT_FIELDS: List[str] = [] _NEGATIVE_EDGE_FEATURE_RECORDS = [ { - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.src_node): src, - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.dst_node): dst, + _MESSAGES_EDGE_SRC_IDENTIFIER_FIELD: src, + _MESSAGES_EDGE_DST_IDENTIFIER_FIELD: dst, } for (src, dst) in _NEGATIVE_EDGES ] @@ -165,6 +152,7 @@ def setUp(self) -> None: f"node_features_{self.__applied_task_identifier}", ), node_type=_PERSON_NODE_TYPE, + identifier=_PERSON_NODE_IDENTIFIER_FIELD, ) self.__input_main_edges_data_reference = BigqueryEdgeDataReference( @@ -175,6 +163,8 @@ def setUp(self) -> None: ), edge_type=_MESSAGES_EDGE_TYPE, edge_usage_type=EdgeUsageType.MAIN, + src_identifier=_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD, + dst_identifier=_MESSAGES_EDGE_DST_IDENTIFIER_FIELD, ) self.__input_positive_edges_data_reference = BigqueryEdgeDataReference( reference_uri=BqUtils.join_path( @@ -184,6 +174,8 @@ def setUp(self) -> None: ), edge_type=_MESSAGES_EDGE_TYPE, edge_usage_type=EdgeUsageType.POSITIVE, + src_identifier=_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD, + dst_identifier=_MESSAGES_EDGE_DST_IDENTIFIER_FIELD, ) self.__input_negative_edges_data_reference = BigqueryEdgeDataReference( reference_uri=BqUtils.join_path( @@ -193,64 +185,20 @@ def setUp(self) -> None: ), edge_type=_MESSAGES_EDGE_TYPE, edge_usage_type=EdgeUsageType.NEGATIVE, + src_identifier=_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD, + dst_identifier=_MESSAGES_EDGE_DST_IDENTIFIER_FIELD, ) # Create the node preprocessing specs - self.node_preprocessing_specs = cast( - Dict[NodeDataReference, NodeDataPreprocessingSpec], - { - self.__input_nodes_data_reference: NodeDataPreprocessingSpec( - feature_spec_fn=build_ingestion_feature_spec_fn( - fixed_int_fields=[str(_PERSON_NODE_OUTPUT_IDENTIFIER)], - fixed_float_fields=_PERSON_NODE_FEATURE_FLOAT_FIELDS, - ), - preprocessing_fn=build_passthrough_transform_preprocessing_fn(), - identifier_output=_PERSON_NODE_OUTPUT_IDENTIFIER, - features_outputs=_PERSON_NODE_FEATURE_FLOAT_FIELDS, - ) - }, - ) - + self.node_data_references = [ + self.__input_nodes_data_reference, + ] # Create the edge preprocessing specs - common_edge_fixed_int_fields = [ - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.src_node), - str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.dst_node), + self.edge_data_references = [ + self.__input_main_edges_data_reference, + self.__input_positive_edges_data_reference, + self.__input_negative_edges_data_reference, ] - common_edge_identifier_output = EdgeOutputIdentifier( - src_node=_MESSAGES_EDGE_OUTPUT_IDENTIFIER.src_node, - dst_node=_MESSAGES_EDGE_OUTPUT_IDENTIFIER.dst_node, - ) - self.edge_preprocessing_specs = cast( - Dict[EdgeDataReference, EdgeDataPreprocessingSpec], - { - self.__input_main_edges_data_reference: EdgeDataPreprocessingSpec( - feature_spec_fn=build_ingestion_feature_spec_fn( - fixed_int_fields=common_edge_fixed_int_fields - + _MESSAGE_EDGE_FEATURE_INT_FIELDS, - ), - preprocessing_fn=build_passthrough_transform_preprocessing_fn(), - identifier_output=common_edge_identifier_output, - features_outputs=_MESSAGE_EDGE_FEATURE_INT_FIELDS, - ), - self.__input_positive_edges_data_reference: EdgeDataPreprocessingSpec( - feature_spec_fn=build_ingestion_feature_spec_fn( - fixed_int_fields=common_edge_fixed_int_fields - + _POSITIVE_EDGE_FEATURE_INT_FIELDS, - ), - preprocessing_fn=build_passthrough_transform_preprocessing_fn(), - identifier_output=common_edge_identifier_output, - features_outputs=_POSITIVE_EDGE_FEATURE_INT_FIELDS, - ), - self.__input_negative_edges_data_reference: EdgeDataPreprocessingSpec( - feature_spec_fn=build_ingestion_feature_spec_fn( - fixed_int_fields=common_edge_fixed_int_fields - ), - preprocessing_fn=build_passthrough_transform_preprocessing_fn(), - identifier_output=common_edge_identifier_output, - features_outputs=_NEGATIVE_EDGE_FEATURE_INT_FIELDS, - ), - }, - ) self.__bq_tables_to_cleanup_on_teardown = [ self.__input_nodes_data_reference.reference_uri, @@ -334,7 +282,7 @@ def assert_enumerated_node_features_correctness( _PERSON_NODE_TYPE ] - node_id_field = str(_PERSON_NODE_OUTPUT_IDENTIFIER) + node_id_field = _PERSON_NODE_IDENTIFIER_FIELD expected_node_id_fields = _PERSON_NODE_FEATURE_FLOAT_FIELDS self.__assert_bq_table_schema_contains_all_fields( @@ -344,7 +292,7 @@ def assert_enumerated_node_features_correctness( # Check that all the rows have unique ids and feature values. result = self.__bq_utils.run_query( - query=f"""SELECT {node_id_field}, {', '.join(expected_node_id_fields)} + query=f"""SELECT {node_id_field}, {', '.join(expected_node_id_fields)} FROM `{person_enumerated_node_type_metadata.enumerated_node_data_reference.reference_uri}` """, labels=get_resource_config().get_resource_labels(), @@ -405,24 +353,30 @@ def assert_enumerated_edge_features_correctness( self.assertIsNotNone(positive_enumerated_edge_type_metadata) self.assertIsNotNone(negative_enumerated_edge_type_metadata) - src_node_id_field = str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.src_node) - dst_node_id_field = str(_MESSAGES_EDGE_OUTPUT_IDENTIFIER.dst_node) - # Check that the schema of the enumerated edge tablescontains all the expected fields. self.__assert_bq_table_schema_contains_all_fields( table_name=main_enumerated_edge_type_metadata.enumerated_edge_data_reference.reference_uri, expected_fields=_MESSAGE_EDGE_FEATURE_INT_FIELDS - + [src_node_id_field, dst_node_id_field], + + [ + _MESSAGES_EDGE_SRC_IDENTIFIER_FIELD, + _MESSAGES_EDGE_DST_IDENTIFIER_FIELD, + ], ) self.__assert_bq_table_schema_contains_all_fields( table_name=positive_enumerated_edge_type_metadata.enumerated_edge_data_reference.reference_uri, expected_fields=_POSITIVE_EDGE_FEATURE_INT_FIELDS - + [src_node_id_field, dst_node_id_field], + + [ + _MESSAGES_EDGE_SRC_IDENTIFIER_FIELD, + _MESSAGES_EDGE_DST_IDENTIFIER_FIELD, + ], ) self.__assert_bq_table_schema_contains_all_fields( table_name=negative_enumerated_edge_type_metadata.enumerated_edge_data_reference.reference_uri, - expected_fields=[src_node_id_field, dst_node_id_field], + expected_fields=[ + _MESSAGES_EDGE_SRC_IDENTIFIER_FIELD, + _MESSAGES_EDGE_DST_IDENTIFIER_FIELD, + ], ) # Check that all the rows have unique ids and feature values. @@ -433,9 +387,9 @@ def __assert_enumerated_table_rows_match_original_rows( ): result = list( self.__bq_utils.run_query( - query=f"""SELECT {src_node_id_field}, - {dst_node_id_field}, - {', '.join(expected_edge_feature_fields)} + query=f"""SELECT {_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD}, + {_MESSAGES_EDGE_DST_IDENTIFIER_FIELD}, + {', '.join(expected_edge_feature_fields)} FROM `{table_name}`""", labels=get_resource_config().get_resource_labels(), ) @@ -447,8 +401,8 @@ def __assert_enumerated_table_rows_match_original_rows( hash( tuple( # List is not hashable, so we convert to tuple [ - record[src_node_id_field], - record[dst_node_id_field], + record[_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD], + record[_MESSAGES_EDGE_DST_IDENTIFIER_FIELD], ] + [record[field] for field in expected_edge_feature_fields] ) @@ -462,8 +416,12 @@ def __assert_enumerated_table_rows_match_original_rows( hash( tuple( # List is not hashable, so we convert to tuple [ - int_to_orig_node_id_map[row[src_node_id_field]], - int_to_orig_node_id_map[row[dst_node_id_field]], + int_to_orig_node_id_map[ + row[_MESSAGES_EDGE_SRC_IDENTIFIER_FIELD] + ], + int_to_orig_node_id_map[ + row[_MESSAGES_EDGE_DST_IDENTIFIER_FIELD] + ], ] + [row[field] for field in expected_edge_feature_fields] ) @@ -505,8 +463,8 @@ def test_for_correctness(self): applied_task_identifier=AppliedTaskIdentifier( self.__applied_task_identifier ), - node_preprocessing_specs=self.node_preprocessing_specs, - edge_preprocessing_specs=self.edge_preprocessing_specs, + node_data_references=self.node_data_references, + edge_data_references=self.edge_data_references, gcp_project=get_resource_config().project, ) @@ -535,10 +493,10 @@ def test_for_correctness(self): ): edge_type_metadata for edge_type_metadata in list_enumerator_edge_type_metadata } - int_to_orig_node_id_map: Dict[int, str] = ( - self.fetch_enumerated_node_map_and_assert_correctness( - map_enum_node_type_metadata=map_enum_node_type_metadata - ) + int_to_orig_node_id_map: Dict[ + int, str + ] = self.fetch_enumerated_node_map_and_assert_correctness( + map_enum_node_type_metadata=map_enum_node_type_metadata ) self.assert_enumerated_node_features_correctness( int_to_orig_node_id_map=int_to_orig_node_id_map, diff --git a/python/tests/integration/pipeline_tests/trainer/__init__.py b/python/tests/integration/pipeline/inferencer/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/trainer/__init__.py rename to python/tests/integration/pipeline/inferencer/__init__.py diff --git a/python/tests/integration/pipeline_tests/inferencer/inferencer_test.py b/python/tests/integration/pipeline/inferencer/inferencer_test.py similarity index 91% rename from python/tests/integration/pipeline_tests/inferencer/inferencer_test.py rename to python/tests/integration/pipeline/inferencer/inferencer_test.py index 17b9330..6b35d39 100644 --- a/python/tests/integration/pipeline_tests/inferencer/inferencer_test.py +++ b/python/tests/integration/pipeline/inferencer/inferencer_test.py @@ -25,7 +25,6 @@ ) from snapchat.research.gbml import gbml_config_pb2 from snapchat.research.gbml.inference_metadata_pb2 import InferenceOutput -from tests.test_assets.uri_constants import DEFAULT_TEST_RESOURCE_CONFIG_URI logger = Logger() @@ -35,7 +34,7 @@ def setUp(self) -> None: self.__bq_utils = BqUtils() self.__gcs_utils = GcsUtils() self.__proto_utils = ProtoUtils() - self.__inferencer = InferencerV1() + self.__inferencer = InferencerV1(bq_gcp_project=get_resource_config().project) def __clean_up_inferencer_test_assets( self, @@ -92,17 +91,17 @@ def __populate_gbml_config_with_inference_paths( len(node_types) == 1 ), "Node classification only supports single node types for inference output" for node_type in node_types: - node_type_to_inferencer_output_info_map[node_type].embeddings_path = ( - get_embeddings_table( - applied_task_identifier=applied_task_identifier, - node_type=NodeType(node_type), - ) + node_type_to_inferencer_output_info_map[ + node_type + ].embeddings_path = get_embeddings_table( + applied_task_identifier=applied_task_identifier, + node_type=NodeType(node_type), ) - node_type_to_inferencer_output_info_map[node_type].predictions_path = ( - get_predictions_table( - applied_task_identifier=applied_task_identifier, - node_type=NodeType(node_type), - ) + node_type_to_inferencer_output_info_map[ + node_type + ].predictions_path = get_predictions_table( + applied_task_identifier=applied_task_identifier, + node_type=NodeType(node_type), ) elif ( @@ -113,11 +112,11 @@ def __populate_gbml_config_with_inference_paths( should_include_src_nodes=True, should_include_dst_nodes=True, ): - node_type_to_inferencer_output_info_map[node_type].embeddings_path = ( - get_embeddings_table( - applied_task_identifier=applied_task_identifier, - node_type=NodeType(node_type), - ) + node_type_to_inferencer_output_info_map[ + node_type + ].embeddings_path = get_embeddings_table( + applied_task_identifier=applied_task_identifier, + node_type=NodeType(node_type), ) else: raise ValueError( @@ -159,7 +158,6 @@ def __validate_inferencer_for_mocked_dataset( self.__inferencer.run( applied_task_identifier=applied_task_id, task_config_uri=frozen_gbml_config_uri, - resource_config_uri=DEFAULT_TEST_RESOURCE_CONFIG_URI, ) node_type_to_inferencer_output_info_map = dict( gbml_config_pb.shared_config.inference_metadata.node_type_to_inferencer_output_info_map diff --git a/python/tests/integration/pipeline_tests/inferencer/unenumeration_test.py b/python/tests/integration/pipeline/inferencer/unenumeration_test.py similarity index 100% rename from python/tests/integration/pipeline_tests/inferencer/unenumeration_test.py rename to python/tests/integration/pipeline/inferencer/unenumeration_test.py diff --git a/python/tests/integration/pipeline_tests/trainer/task_spec_tests/__init__.py b/python/tests/integration/pipeline/split_generator/__init__.py similarity index 100% rename from python/tests/integration/pipeline_tests/trainer/task_spec_tests/__init__.py rename to python/tests/integration/pipeline/split_generator/__init__.py diff --git a/python/tests/integration/pipeline_tests/split_generator/lib/node_anchor_based_link_prediction.py b/python/tests/integration/pipeline/split_generator/lib/node_anchor_based_link_prediction.py similarity index 100% rename from python/tests/integration/pipeline_tests/split_generator/lib/node_anchor_based_link_prediction.py rename to python/tests/integration/pipeline/split_generator/lib/node_anchor_based_link_prediction.py diff --git a/python/tests/integration/pipeline_tests/split_generator/lib/supervised_node_classification.py b/python/tests/integration/pipeline/split_generator/lib/supervised_node_classification.py similarity index 100% rename from python/tests/integration/pipeline_tests/split_generator/lib/supervised_node_classification.py rename to python/tests/integration/pipeline/split_generator/lib/supervised_node_classification.py diff --git a/python/tests/integration/pipeline_tests/split_generator/split_generator_pipeline_test.py b/python/tests/integration/pipeline/split_generator/split_generator_pipeline_test.py similarity index 97% rename from python/tests/integration/pipeline_tests/split_generator/split_generator_pipeline_test.py rename to python/tests/integration/pipeline/split_generator/split_generator_pipeline_test.py index ddd4aef..e7ab963 100644 --- a/python/tests/integration/pipeline_tests/split_generator/split_generator_pipeline_test.py +++ b/python/tests/integration/pipeline/split_generator/split_generator_pipeline_test.py @@ -9,6 +9,7 @@ import numpy as np from gigl.common import GcsUri, LocalUri, Uri, UriFactory +from gigl.common.constants import SPARK_35_TFRECORD_JAR_LOCAL_PATH from gigl.common.logger import Logger from gigl.common.utils.proto_utils import ProtoUtils from gigl.env.pipelines_config import get_resource_config @@ -38,12 +39,11 @@ gbml_config_pb2, training_samples_schema_pb2, ) -from tests.integration.pipeline_tests.split_generator.lib import ( +from tests.integration.pipeline.split_generator.lib import ( node_anchor_based_link_prediction, supervised_node_classification, ) -from gigl.common.constants import SPARK_35_TFRECORD_JAR_LOCAL_PATH -from tests.integration.pipeline_tests.utils import ( +from tests.integration.pipeline.utils import ( get_gcs_assets_dir_from_frozen_gbml_config_uri, ) @@ -248,30 +248,30 @@ def __overwrite_splitgen_output_paths_link_prediction( ) for node_type in random_negative_node_types: - outputPaths.train_node_type_to_random_negative_data_uri[node_type] = ( - LocalUri.join( - tmp_split_generator_dir, - "train/", - "random_negative_samples/", - f"{node_type}/", - ).uri - ) - outputPaths.val_node_type_to_random_negative_data_uri[node_type] = ( - LocalUri.join( - tmp_split_generator_dir, - "val/", - "random_negative_samples/", - f"{node_type}/", - ).uri - ) - outputPaths.test_node_type_to_random_negative_data_uri[node_type] = ( - LocalUri.join( - tmp_split_generator_dir, - "test/", - "random_negative_samples/", - f"{node_type}/", - ).uri - ) + outputPaths.train_node_type_to_random_negative_data_uri[ + node_type + ] = LocalUri.join( + tmp_split_generator_dir, + "train/", + "random_negative_samples/", + f"{node_type}/", + ).uri + outputPaths.val_node_type_to_random_negative_data_uri[ + node_type + ] = LocalUri.join( + tmp_split_generator_dir, + "val/", + "random_negative_samples/", + f"{node_type}/", + ).uri + outputPaths.test_node_type_to_random_negative_data_uri[ + node_type + ] = LocalUri.join( + tmp_split_generator_dir, + "test/", + "random_negative_samples/", + f"{node_type}/", + ).uri frozen_gbml_config_uri = LocalUri.join( tmp_split_generator_dir, diff --git a/python/tests/unit/common_tests/__init__.py b/python/tests/integration/pipeline/subgraph_sampler/__init__.py similarity index 100% rename from python/tests/unit/common_tests/__init__.py rename to python/tests/integration/pipeline/subgraph_sampler/__init__.py diff --git a/python/tests/integration/pipeline_tests/subgraph_sampler/subgraph_sampler_test.py b/python/tests/integration/pipeline/subgraph_sampler/subgraph_sampler_test.py similarity index 99% rename from python/tests/integration/pipeline_tests/subgraph_sampler/subgraph_sampler_test.py rename to python/tests/integration/pipeline/subgraph_sampler/subgraph_sampler_test.py index 1c8f084..1f509b1 100644 --- a/python/tests/integration/pipeline_tests/subgraph_sampler/subgraph_sampler_test.py +++ b/python/tests/integration/pipeline/subgraph_sampler/subgraph_sampler_test.py @@ -40,7 +40,7 @@ NodeAnchorBasedLinkPredictionSample, RootedNodeNeighborhood, ) -from tests.integration.pipeline_tests.subgraph_sampler.utils import ( +from tests.integration.pipeline.subgraph_sampler.utils import ( EdgeMetadataInfo, ExpectedGraphFromPreprocessor, bidirectionalize_edge_type_to_edge_to_features_map, @@ -52,7 +52,7 @@ read_output_node_based_task_samples_from_subgraph_sampler, reconstruct_graph_information_from_preprocessor_output, ) -from tests.integration.pipeline_tests.utils import ( +from tests.integration.pipeline.utils import ( get_gcs_assets_dir_from_frozen_gbml_config_uri, ) @@ -897,9 +897,9 @@ def __check_number_of_in_edges_for_dst_node( def __build_condensed_edge_type_to_in_edges_count_map( in_edges: List[EdgePbWrapper], ): - condensed_edge_type_to_in_edges_count: Dict[CondensedEdgeType, int] = ( - defaultdict(lambda: 0) - ) + condensed_edge_type_to_in_edges_count: Dict[ + CondensedEdgeType, int + ] = defaultdict(lambda: 0) for edge_pb_wrapper in in_edges: condensed_edge_type_to_in_edges_count[ edge_pb_wrapper.condensed_edge_type diff --git a/python/tests/integration/pipeline_tests/subgraph_sampler/utils.py b/python/tests/integration/pipeline/subgraph_sampler/utils.py similarity index 95% rename from python/tests/integration/pipeline_tests/subgraph_sampler/utils.py rename to python/tests/integration/pipeline/subgraph_sampler/utils.py index 671a1f1..0369647 100644 --- a/python/tests/integration/pipeline_tests/subgraph_sampler/utils.py +++ b/python/tests/integration/pipeline/subgraph_sampler/utils.py @@ -8,6 +8,10 @@ import tensorflow as tf from gigl.common import GcsUri, LocalUri, UriFactory +from gigl.common.constants import ( + SPARK_31_TFRECORD_JAR_LOCAL_PATH, + SPARK_35_TFRECORD_JAR_LOCAL_PATH, +) from gigl.common.logger import Logger from gigl.common.utils.proto_utils import ProtoUtils from gigl.src.common.constants.local_fs import get_project_root_directory @@ -41,7 +45,6 @@ preprocessed_metadata_pb2, training_samples_schema_pb2, ) -from gigl.common.constants import SPARK_35_TFRECORD_JAR_LOCAL_PATH, SPARK_31_TFRECORD_JAR_LOCAL_PATH logger = Logger() @@ -103,13 +106,13 @@ def read_output_nablp_samples_from_subgraph_sampler( proto_cls=training_samples_schema_pb2.RootedNodeNeighborhood, ) ) - samples: List[training_samples_schema_pb2.NodeAnchorBasedLinkPredictionSample] = ( - read_training_sample_protos_from_tfrecords( - uri_prefix=UriFactory.create_uri( - uri=node_anchor_based_link_prediction_output.tfrecord_uri_prefix - ), - proto_cls=training_samples_schema_pb2.NodeAnchorBasedLinkPredictionSample, - ) + samples: List[ + training_samples_schema_pb2.NodeAnchorBasedLinkPredictionSample + ] = read_training_sample_protos_from_tfrecords( + uri_prefix=UriFactory.create_uri( + uri=node_anchor_based_link_prediction_output.tfrecord_uri_prefix + ), + proto_cls=training_samples_schema_pb2.NodeAnchorBasedLinkPredictionSample, ) return (node_type_to_rooted_neighborhood_samples, samples) @@ -165,11 +168,9 @@ def _build_node_features_map( condensed_node_type, node_metadata_output, ) in preprocessed_metadata_pb.condensed_node_type_to_preprocessed_metadata.items(): - node_type: NodeType = ( - gbml_config_pb_wrapper.graph_metadata_pb_wrapper.condensed_node_type_to_node_type_map[ - CondensedNodeType(condensed_node_type) - ] - ) + node_type: NodeType = gbml_config_pb_wrapper.graph_metadata_pb_wrapper.condensed_node_type_to_node_type_map[ + CondensedNodeType(condensed_node_type) + ] assert node_metadata_output is not None tfrecord_files = tf.io.gfile.glob( f"{node_metadata_output.tfrecord_uri_prefix}*.tfrecord" @@ -212,11 +213,9 @@ def _build_edge_features_map( condensed_edge_type, edge_metadata_output, ) in preprocessed_metadata_pb.condensed_edge_type_to_preprocessed_metadata.items(): - edge_type: EdgeType = ( - gbml_config_pb_wrapper.graph_metadata_pb_wrapper.condensed_edge_type_to_edge_type_map[ - CondensedEdgeType(condensed_edge_type) - ] - ) + edge_type: EdgeType = gbml_config_pb_wrapper.graph_metadata_pb_wrapper.condensed_edge_type_to_edge_type_map[ + CondensedEdgeType(condensed_edge_type) + ] assert edge_metadata_output is not None edge_metadata_info: Optional[ preprocessed_metadata_pb2.PreprocessedMetadata.EdgeMetadataInfo @@ -329,9 +328,9 @@ def bidirectionalize_feasible_adjacency_list_map( not gbml_config_pb_wrapper.graph_metadata_pb_wrapper.is_heterogeneous ), "Bidirectionalizing adjacency list map is only supported for homogeneous graphs." - bidirectional_adjacency_list_map: Dict[NodePbWrapper, List[EdgePbWrapper]] = ( - defaultdict(list) - ) + bidirectional_adjacency_list_map: Dict[ + NodePbWrapper, List[EdgePbWrapper] + ] = defaultdict(list) for _, edge_pbws in src_node_to_edge_map.items(): for edge_pbw in edge_pbws: ( @@ -377,9 +376,9 @@ def bidirectionalize_edge_type_to_edge_to_features_map( bidirectional_edge_pbw = edge_pbw.flip_edge() bidirectional_edge_to_features_map[edge_pbw] = features bidirectional_edge_to_features_map[bidirectional_edge_pbw] = features - bidirectional_edge_type_to_edge_to_features_map[edge_type] = ( - bidirectional_edge_to_features_map - ) + bidirectional_edge_type_to_edge_to_features_map[ + edge_type + ] = bidirectional_edge_to_features_map return bidirectional_edge_type_to_edge_to_features_map diff --git a/python/tests/unit/common_tests/collections_tests/__init__.py b/python/tests/integration/pipeline/trainer/__init__.py similarity index 100% rename from python/tests/unit/common_tests/collections_tests/__init__.py rename to python/tests/integration/pipeline/trainer/__init__.py diff --git a/python/tests/unit/common_tests/data/__init__.py b/python/tests/integration/pipeline/trainer/task_spec/__init__.py similarity index 100% rename from python/tests/unit/common_tests/data/__init__.py rename to python/tests/integration/pipeline/trainer/task_spec/__init__.py diff --git a/python/tests/integration/pipeline_tests/trainer/task_spec_tests/graphsage_template_modeling_spec_test.py b/python/tests/integration/pipeline/trainer/task_spec/graphsage_template_modeling_spec_test.py similarity index 100% rename from python/tests/integration/pipeline_tests/trainer/task_spec_tests/graphsage_template_modeling_spec_test.py rename to python/tests/integration/pipeline/trainer/task_spec/graphsage_template_modeling_spec_test.py diff --git a/python/tests/integration/pipeline_tests/trainer/task_spec_tests/node_anchor_based_link_prediction_modeling_task_spec_test.py b/python/tests/integration/pipeline/trainer/task_spec/node_anchor_based_link_prediction_modeling_task_spec_test.py similarity index 100% rename from python/tests/integration/pipeline_tests/trainer/task_spec_tests/node_anchor_based_link_prediction_modeling_task_spec_test.py rename to python/tests/integration/pipeline/trainer/task_spec/node_anchor_based_link_prediction_modeling_task_spec_test.py diff --git a/python/tests/integration/pipeline_tests/trainer/task_spec_tests/node_classification_modeling_task_spec_test.py b/python/tests/integration/pipeline/trainer/task_spec/node_classification_modeling_task_spec_test.py similarity index 100% rename from python/tests/integration/pipeline_tests/trainer/task_spec_tests/node_classification_modeling_task_spec_test.py rename to python/tests/integration/pipeline/trainer/task_spec/node_classification_modeling_task_spec_test.py diff --git a/python/tests/integration/pipeline_tests/trainer/trainer_test.py b/python/tests/integration/pipeline/trainer/trainer_test.py similarity index 100% rename from python/tests/integration/pipeline_tests/trainer/trainer_test.py rename to python/tests/integration/pipeline/trainer/trainer_test.py diff --git a/python/tests/integration/pipeline_tests/utils.py b/python/tests/integration/pipeline/utils.py similarity index 100% rename from python/tests/integration/pipeline_tests/utils.py rename to python/tests/integration/pipeline/utils.py diff --git a/python/tests/test_assets/dataset_mocking/visualization_test/visualize_walkthrough.ipynb b/python/tests/test_assets/dataset_mocking/visualization_test/visualize_walkthrough.ipynb index 641fc8e..149ea5e 100644 --- a/python/tests/test_assets/dataset_mocking/visualization_test/visualize_walkthrough.ipynb +++ b/python/tests/test_assets/dataset_mocking/visualization_test/visualize_walkthrough.ipynb @@ -5,6 +5,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "# Script URLs are not correct - in progress to make them OSS accessible\n", + "\n", + "\n", "## GiGL Pipeline Visualization\n", "This notebook shows the process of a simple, human-digestable graph being passed through all the pipeline components in GiGL in preperation for training to help understand how each of the components work.\n", "\n", @@ -63,141 +66,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Graph Configuration Yaml:\n", - "adj_list:\n", - "- dst:\n", - " - 1\n", - " - 3\n", - " src: 0\n", - "- dst:\n", - " - 0\n", - " - 5\n", - " src: 1\n", - "- dst:\n", - " - 1\n", - " src: 2\n", - "- dst:\n", - " - 0\n", - " - 5\n", - " - 6\n", - " - 7\n", - " src: 3\n", - "- dst:\n", - " - 3\n", - " src: 4\n", - "- dst:\n", - " - 1\n", - " - 3\n", - " - 6\n", - " src: 5\n", - "- dst:\n", - " - 2\n", - " - 3\n", - " - 7\n", - " src: 6\n", - "- dst:\n", - " - 3\n", - " - 6\n", - " - 8\n", - " - 9\n", - " src: 7\n", - "- dst:\n", - " - 7\n", - " - 9\n", - " - 10\n", - " src: 8\n", - "- dst:\n", - " - 7\n", - " - 8\n", - " - 10\n", - " src: 9\n", - "- dst:\n", - " - 11\n", - " - 9\n", - " - 8\n", - " src: 10\n", - "- dst:\n", - " - 12\n", - " src: 11\n", - "- dst:\n", - " - 13\n", - " src: 12\n", - "- dst: []\n", - " src: 13\n", - "graph:\n", - " edge_type:\n", - " dst_node_type: user\n", - " relation_type: friend\n", - " src_node_type: user\n", - " node_type: user\n", - "nodes:\n", - "- features:\n", - " - 0.0\n", - " - 0\n", - " src: 0\n", - "- features:\n", - " - 0.1\n", - " - 1\n", - " src: 1\n", - "- features:\n", - " - 0.2\n", - " - 2\n", - " src: 2\n", - "- features:\n", - " - 0.3\n", - " - 3\n", - " src: 3\n", - "- features:\n", - " - 0.4\n", - " - 4\n", - " src: 4\n", - "- features:\n", - " - 0.5\n", - " - 5\n", - " src: 5\n", - "- features:\n", - " - 0.6\n", - " - 6\n", - " src: 6\n", - "- features:\n", - " - 0.7\n", - " - 7\n", - " src: 7\n", - "- features:\n", - " - 0.8\n", - " - 8\n", - " src: 8\n", - "- features:\n", - " - 0.9\n", - " - 9\n", - " src: 9\n", - "- features:\n", - " - 0.01\n", - " - 10\n", - " src: 10\n", - "- features:\n", - " - 0.11\n", - " - 11\n", - " src: 11\n", - "- features:\n", - " - 0.12\n", - " - 12\n", - " src: 12\n", - "- features:\n", - " - 0.13\n", - " - 13\n", - " src: 13\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import yaml\n", "\n", @@ -226,70 +97,13 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "GBML Config for Toy Graph:\n", - "datasetConfig:\n", - " dataPreprocessorConfig:\n", - " dataPreprocessorArgs:\n", - " bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_user_friend_user_edges\n", - " bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_user_nodes\n", - " dataPreprocessorConfigClsPath: applied_tasks.mocking.toy_graph.toy_data_preprocessor_config.ToyDataPreprocessorConfig\n", - " splitGeneratorConfig:\n", - " assignerArgs:\n", - " seed: '42'\n", - " test_split: '0.2'\n", - " train_split: '0.7'\n", - " val_split: '0.1'\n", - " assignerClsPath: gigl.src.split_generator.lib.unsupervised_node_anchor_based_link_prediction.transductive.TransductiveEdgeToLinkSplitHashingAssigner\n", - " splitStrategyClsPath: gigl.src.split_generator.lib.unsupervised_node_anchor_based_link_prediction.transductive.TransductiveUnsupervisedNodeAnchorBasedLinkPredictionSplitStrategy\n", - " subgraphSamplerConfig:\n", - " numHops: 1\n", - " numNeighborsToSample: 2\n", - " numPositiveSamples: 1\n", - "inferencerConfig:\n", - " inferencerArgs:\n", - " hid_dim: '128'\n", - " num_layers: '2'\n", - " out_dim: '128'\n", - " inferencerClsPath: gigl.src.common.modeling_task_specs.node_anchor_based_link_prediction_modeling_task_spec.UnsupervisedNodeAnchorBasedLinkPredictionModelingTaskSpec\n", - "taskType: UNSUPERVISED_NODE_ANCHOR_BASED_LINK_PREDICTION\n", - "trainerConfig:\n", - " trainerArgs:\n", - " early_stop_patience: '10'\n", - " hid_dim: '128'\n", - " main_sample_batch_size: '4096'\n", - " margin: '0.3'\n", - " num_layers: '2'\n", - " num_test_batches: '100'\n", - " num_val_batches: '100'\n", - " optim_lr: '0.005'\n", - " optim_weight_decay: '0.0005'\n", - " out_dim: '128'\n", - " random_negative_sample_batch_size: '512'\n", - " random_negative_sample_batch_size_for_evaluation: '1000'\n", - " should_l2_normalize_output: 'True'\n", - " test_main_sample_num_workers: '2'\n", - " test_random_sample_num_workers: '1'\n", - " train_main_sample_num_workers: '4'\n", - " train_random_sample_num_workers: '2'\n", - " val_every_num_batches: '100'\n", - " val_main_sample_num_workers: '2'\n", - " val_random_sample_num_workers: '1'\n", - " trainerClsPath: gigl.src.common.modeling_task_specs.node_anchor_based_link_prediction_modeling_task_spec.UnsupervisedNodeAnchorBasedLinkPredictionModelingTaskSpec\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import yaml\n", "\n", - "yaml_file = \"../../../../../internal/applied_tasks/mocking/toy_graph/configs/gbml_toy_config.yaml\"\n", + "yaml_file = \"TODO/gbml_toy_config.yaml\"\n", "\n", "def visualize_yaml(file_path):\n", " with open(file_path, 'r') as yaml_file:\n", @@ -312,7 +126,7 @@ "\n", "A toy graph preprocessor class can be found [Here](../../../../../internal/applied_tasks/mocking/toy_graph/toy_data_preprocessor_config.py) which will be used in the config for this notebook's demonstration\n", "\n", - "Alternatively, you can pass in `bq_edges_table_name` and `bq_nodes_table_name` in the config file as runtime arguments under dataPreprocessorArgs. These two indicate where the graphs can be found in BigQuery and additional prepare_for_pipeline logic is not needed for pulling data since it is already implemented in the [ToyDataPreprocessorConfig Class](https://github.sc-corp.net/Snapchat/GiGL/blob/622d5b5c5f5dcb07a07b52e7e85848bc318537e8/internal/applied_tasks/mocking/toy_graph/toy_data_preprocessor_config.py#L38). You can use the [Mocking Suite](../dataset_asset_mocking_suite.py) to populate BigQuery with the custom graph that was configured in the [graph_config.yaml](./graph_config.yaml). There is a method in the mocking suite called `_create_custom_toy_graph` which will be used in the `mock_toy_graph_homogeneous_unsupervised_node_anchor_based_link_prediction_dataset` function to populate the custom graph in BigQuery. The steps to do this are the following:\n", + "Alternatively, you can pass in `bq_edges_table_name` and `bq_nodes_table_name` in the config file as runtime arguments under dataPreprocessorArgs. These two indicate where the graphs can be found in BigQuery and additional prepare_for_pipeline logic is not needed for pulling data since it is already implemented in the [ToyDataPreprocessorConfig Class](https://github.com/Snapchat/GiGL/blob/622d5b5c5f5dcb07a07b52e7e85848bc318537e8/internal/applied_tasks/mocking/toy_graph/toy_data_preprocessor_config.py#L38). You can use the [Mocking Suite](../dataset_asset_mocking_suite.py) to populate BigQuery with the custom graph that was configured in the [graph_config.yaml](./graph_config.yaml). There is a method in the mocking suite called `_create_custom_toy_graph` which will be used in the `mock_toy_graph_homogeneous_unsupervised_node_anchor_based_link_prediction_dataset` function to populate the custom graph in BigQuery. The steps to do this are the following:\n", "\n", "1. Locate the `mock_toy_graph_homogeneous_unsupervised_node_anchor_based_link_prediction_dataset` in the mocking suite. \n", "2. Set toy_graph = self._create_custom_toy_graph(\"PATH/TO/GRAPH_CONFIG.yaml\")\n", @@ -351,11 +165,11 @@ "outputs": [], "source": [ "%%bash\n", - "PYTHONPATH=\"/home/$(whoami)/GiGL/TODO/:$PYTHONPATH\" python -m \\\n", + "python -m \\\n", " gigl.src.config_populator.config_populator \\\n", " --job_name toy_graph \\\n", - " --template_uri /home/$(whoami)/GiGL/TODO/applied_tasks/mocking/toy_graph/configs/gbml_toy_config.yaml \\\n", - " --output_file_path_frozen_gbml_config_uri /home/$(whoami)/GiGL/TODO/applied_tasks/mocking/toy_graph/configs/toy_graph-frozen-config_path.yaml" + " --template_uri toy_graph/configs/gbml_toy_config.yaml \\\n", + " --output_file_path_frozen_gbml_config_uri toy_graph/configs/toy_graph-frozen-config_path.yaml" ] }, { @@ -368,17 +182,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-06-16 18:54:19,045[INFO]gigl.common.utils.gcs-140646927939392@__download_blob_from_gcs : Downloading gs://TEMP DEV GBML PLACEHOLDER/toy_graph/config_populator/frozen_gbml_config.yaml to /var/tmp/tmpz6y_y6yf\n" - ] - } - ], + "outputs": [], "source": [ "from gigl.common.utils.proto_utils import ProtoUtils\n", "from gigl.common import UriFactory\n", @@ -398,89 +204,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Copying gs://TEMP DEV GBML PLACEHOLDER/toy_graph/config_populator/frozen_gbml_config.yaml...\n", - "/ [1 files][ 3.7 KiB/ 3.7 KiB] \n", - "Operation completed over 1 objects/3.7 KiB. \n", - "Frozen GBML Config Yaml:\n", - "datasetConfig:\n", - " dataPreprocessorConfig:\n", - " dataPreprocessorArgs:\n", - " bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_user_friend_user_edges\n", - " bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_user_nodes\n", - " dataPreprocessorConfigClsPath: applied_tasks.mocking.toy_graph.toy_data_preprocessor_config.ToyDataPreprocessorConfig\n", - " splitGeneratorConfig:\n", - " assignerArgs:\n", - " seed: '42'\n", - " test_split: '0.2'\n", - " train_split: '0.7'\n", - " val_split: '0.1'\n", - " assignerClsPath: gigl.src.split_generator.lib.unsupervised_node_anchor_based_link_prediction.transductive.TransductiveEdgeToLinkSplitHashingAssigner\n", - " splitStrategyClsPath: gigl.src.split_generator.lib.unsupervised_node_anchor_based_link_prediction.transductive.TransductiveUnsupervisedNodeAnchorBasedLinkPredictionSplitStrategy\n", - " subgraphSamplerConfig:\n", - " numHops: 2\n", - " numNeighborsToSample: 2\n", - " numPositiveSamples: 1\n", - "inferencerConfig:\n", - " inferencerArgs:\n", - " hid_dim: '128'\n", - " num_layers: '2'\n", - " out_dim: '128'\n", - " inferencerClsPath: gigl.src.common.modeling_task_specs.node_anchor_based_link_prediction_modeling_task_spec.UnsupervisedNodeAnchorBasedLinkPredictionModelingTaskSpec\n", - "sharedConfig:\n", - " datasetMetadata:\n", - " unsupervisedNodeAnchorBasedLinkPredictionDataset:\n", - " testMainDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/test/main_samples/samples/\n", - " testRandomNegativeDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/test/random_negatives/neighborhoods-\n", - " trainMainDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/train/main_samples/samples/\n", - " trainRandomNegativeDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/train/random_negatives/neighborhoods-\n", - " valMainDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/val/main_samples/samples/\n", - " valRandomNegativeDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/val/random_negatives/neighborhoods-\n", - " flattenedGraphMetadata:\n", - " unsupervisedNodeAnchorBasedLinkPredictionOutput:\n", - " randomNegativeTfrecordUriPrefix: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/random_negative_rooted_neighborhood_samples/samples/\n", - " tfrecordUriPrefix: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/unsupervised_node_anchor_based_link_prediction_samples/samples/\n", - " inferenceMetadata:\n", - " embeddingsBqPath: external-snap-ci-github-gigl.gbml_embeddings.embeddings_toy_graph\n", - " preprocessedMetadataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/data_preprocess/preprocessed_metadata.yaml\n", - " trainedModelMetadata:\n", - " evalMetricsUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/trainer/models/eval_metrics.json\n", - " scriptedModelUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/trainer/models/scripted_model.pt\n", - " trainedModelUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/trainer/models/model.pt\n", - "taskType: UNSUPERVISED_NODE_ANCHOR_BASED_LINK_PREDICTION\n", - "trainerConfig:\n", - " trainerArgs:\n", - " early_stop_patience: '10'\n", - " hid_dim: '128'\n", - " main_sample_batch_size: '4096'\n", - " margin: '0.3'\n", - " num_layers: '2'\n", - " num_test_batches: '100'\n", - " num_val_batches: '100'\n", - " optim_lr: '0.005'\n", - " optim_weight_decay: '0.0005'\n", - " out_dim: '128'\n", - " random_negative_sample_batch_size: '512'\n", - " random_negative_sample_batch_size_for_evaluation: '1000'\n", - " should_l2_normalize_output: 'True'\n", - " test_main_sample_num_workers: '2'\n", - " test_random_sample_num_workers: '1'\n", - " train_main_sample_num_workers: '4'\n", - " train_random_sample_num_workers: '2'\n", - " val_every_num_batches: '100'\n", - " val_main_sample_num_workers: '2'\n", - " val_random_sample_num_workers: '1'\n", - " trainerClsPath: gigl.src.common.modeling_task_specs.node_anchor_based_link_prediction_modeling_task_spec.UnsupervisedNodeAnchorBasedLinkPredictionModelingTaskSpec\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "# Copy frozen config to local\n", "!gsutil cp gs://TEMP DEV GBML PLACEHOLDER/toy_graph/config_populator/frozen_gbml_config.yaml .\n", @@ -509,37 +235,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sharedConfig:\n", - " datasetMetadata:\n", - " unsupervisedNodeAnchorBasedLinkPredictionDataset:\n", - " testMainDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/test/main_samples/samples/\n", - " testRandomNegativeDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/test/random_negatives/neighborhoods-\n", - " trainMainDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/train/main_samples/samples/\n", - " trainRandomNegativeDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/train/random_negatives/neighborhoods-\n", - " valMainDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/val/main_samples/samples/\n", - " valRandomNegativeDataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/val/random_negatives/neighborhoods-\n", - " flattenedGraphMetadata:\n", - " unsupervisedNodeAnchorBasedLinkPredictionOutput:\n", - " randomNegativeTfrecordUriPrefix: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/random_negative_rooted_neighborhood_samples/samples/\n", - " tfrecordUriPrefix: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/unsupervised_node_anchor_based_link_prediction_samples/samples/\n", - " inferenceMetadata:\n", - " embeddingsBqPath: external-snap-ci-github-gigl.gbml_embeddings.embeddings_toy_graph\n", - " preprocessedMetadataUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/data_preprocess/preprocessed_metadata.yaml\n", - " trainedModelMetadata:\n", - " evalMetricsUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/trainer/models/eval_metrics.json\n", - " scriptedModelUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/trainer/models/scripted_model.pt\n", - " trainedModelUri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/trainer/models/model.pt\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "import yaml\n", "\n", @@ -560,7 +258,7 @@ " diff[key] = dict2[key]\n", " return diff\n", "\n", - "gbml_config = \"../../../../../internal/applied_tasks/mocking/toy_graph/configs/gbml_toy_config.yaml\"\n", + "gbml_config = \"toy_graph/configs/gbml_toy_config.yaml\"\n", "frozen_config_local = \"./frozen_gbml_config.yaml\"\n", "compare_yaml(gbml_config, frozen_config_local)" ] @@ -577,1137 +275,9 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'src': 0, 'dst': [1, 3]}\n", - "{'src': 1, 'dst': [0, 5]}\n", - "{'src': 2, 'dst': [1]}\n", - "{'src': 3, 'dst': [0, 5, 6, 7]}\n", - "{'src': 4, 'dst': [3]}\n", - "{'src': 5, 'dst': [1, 3, 6]}\n", - "{'src': 6, 'dst': [2, 3, 7]}\n", - "{'src': 7, 'dst': [3, 6, 8, 9]}\n", - "{'src': 8, 'dst': [7, 9, 10]}\n", - "{'src': 9, 'dst': [7, 8, 10]}\n", - "{'src': 10, 'dst': [11, 9, 8]}\n", - "{'src': 11, 'dst': [12]}\n", - "{'src': 12, 'dst': [13]}\n", - "{'src': 13, 'dst': []}\n" - ] - }, - { - "data": { - "image/svg+xml": [ - "\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " 2023-06-05T23:59:49.158806\n", - " image/svg+xml\n", - " \n", - " \n", - " Matplotlib v3.6.3, https://matplotlib.org/\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "\n" - ], - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "from visualize import GraphVisualizer\n", "\n", @@ -1728,9 +298,9 @@ "\n", "Overall, this class houses all logic for\n", "\n", - "- Preparing datasets for ingestion and transformation (see [`prepare_for_pipeline`](https://github.sc-corp.net/Snapchat/GiGL/blob/10f1a35196f3946ae14c3e8e57d1cb685f01ffb5/python/gigl/src/data_preprocessor/lib/data_preprocessor_config.py#L40) function) So this is where you would house logic to pull data from a custom data source or perform any specific transformations.\n", - "- Defining transformation imperatives for different node types (see [`get_nodes_preprocessing_spec`](https://github.sc-corp.net/Snapchat/GiGL/blob/10f1a35196f3946ae14c3e8e57d1cb685f01ffb5/python/gigl/src/data_preprocessor/lib/data_preprocessor_config.py#L54) function)\n", - "- Defining transformation imperatives for different edge types (see [`get_edges_preprocessing_spec`](https://github.sc-corp.net/Snapchat/GiGL/blob/10f1a35196f3946ae14c3e8e57d1cb685f01ffb5/python/gigl/src/data_preprocessor/lib/data_preprocessor_config.py#L60))" + "- Preparing datasets for ingestion and transformation (see [`prepare_for_pipeline`](https://github.com/Snapchat/GiGL/blob/10f1a35196f3946ae14c3e8e57d1cb685f01ffb5/python/gigl/src/data_preprocessor/lib/data_preprocessor_config.py#L40) function) So this is where you would house logic to pull data from a custom data source or perform any specific transformations.\n", + "- Defining transformation imperatives for different node types (see [`get_nodes_preprocessing_spec`](https://github.com/Snapchat/GiGL/blob/10f1a35196f3946ae14c3e8e57d1cb685f01ffb5/python/gigl/src/data_preprocessor/lib/data_preprocessor_config.py#L54) function)\n", + "- Defining transformation imperatives for different edge types (see [`get_edges_preprocessing_spec`](https://github.com/Snapchat/GiGL/blob/10f1a35196f3946ae14c3e8e57d1cb685f01ffb5/python/gigl/src/data_preprocessor/lib/data_preprocessor_config.py#L60))" ] }, { @@ -1743,33 +313,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Frozen Config Datapreprocessor Information:\n", - "Preprocessed Metadata Uri: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/data_preprocess/preprocessed_metadata.yaml\n", - "Data Preprocessor Config: data_preprocessor_config_cls_path: \"applied_tasks.mocking.toy_graph.toy_data_preprocessor_config.ToyDataPreprocessorConfig\"\n", - "data_preprocessor_args {\n", - " key: \"bq_edges_table_name\"\n", - " value: \"external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_user_friend_user_edges\"\n", - "}\n", - "data_preprocessor_args {\n", - " key: \"bq_nodes_table_name\"\n", - " value: \"external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_user_nodes\"\n", - "}\n", - "\n", - "Flattened Graph Metadata: unsupervised_node_anchor_based_link_prediction_output {\n", - " tfrecord_uri_prefix: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/unsupervised_node_anchor_based_link_prediction_samples/samples/\"\n", - " random_negative_tfrecord_uri_prefix: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/random_negative_rooted_neighborhood_samples/samples/\"\n", - "}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(\"Frozen Config Datapreprocessor Information:\")\n", "\n", @@ -1791,33 +337,13 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Inferred Graph Metadata:\n", - "condensed_edge_type_map {\n", - " key: 0\n", - " value {\n", - " relation: \"is_friends_with\"\n", - " src_node_type: \"user\"\n", - " dst_node_type: \"user\"\n", - " }\n", - "}\n", - "condensed_node_type_map {\n", - " key: 0\n", - " value: \"user\"\n", - "}\n" - ] - } - ], + "outputs": [], "source": [ "%%bash -s \"$frozen_config_uri\"\n", "\n", - "PYTHONPATH=\"/home/$(whoami)/GiGL/TODO/:$PYTHONPATH\" python -m \\\n", + "python -m \\\n", " gigl.src.data_preprocessor.data_preprocessor \\\n", " --job_name toy_graph \\\n", " --task_config_uri \"$1\"" @@ -1833,102 +359,9 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Downloading gs://TEMP DEV GBML PLACEHOLDER/toy_graph/data_preprocess/preprocessed_metadata.yaml to /home/abatra2/GiGL/python/tests/test_assets/dataset_mocking/visualization_test/temp_metadata_dict.yaml\n", - "\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-06-06 00:28:09.205194: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA\n", - "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Reading TFRecord file: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/data_preprocess/staging/transformed_node_features_dir/user/features/-00000-of-00001.tfrecord\n", - "\n", - "One Sample Straight From TFRecord (Unformatted):\n", - " features {\n", - " feature {\n", - " key: \"f0\"\n", - " value {\n", - " float_list {\n", - " value: 0.8999999761581421\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"f1\"\n", - " value {\n", - " float_list {\n", - " value: 9.0\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"node_id\"\n", - " value {\n", - " int64_list {\n", - " value: 9\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n", - " Node Type f0 f1\n", - "node_id \n", - "9 user 0.90 9.0\n", - "10 user 0.01 10.0\n", - "7 user 0.70 7.0\n", - "5 user 0.50 5.0\n", - "13 user 0.13 13.0\n", - "3 user 0.30 3.0\n", - "2 user 0.20 2.0\n", - "6 user 0.60 6.0\n", - "12 user 0.12 12.0\n", - "0 user 0.00 0.0\n", - "1 user 0.10 1.0\n", - "8 user 0.80 8.0\n", - "11 user 0.11 11.0\n", - "4 user 0.40 4.0\n", - "Displaying Edge Metadata: \n", - "\n", - "Reading TFRecord file: gs://TEMP DEV GBML PLACEHOLDER/toy_graph/data_preprocess/staging/transformed_edge_features_dir/user-is_friends_with-user/features/-00000-of-00001.tfrecord\n", - "\n", - "One Sample Straight From TFRecord (Unformatted):\n", - " features {\n", - " feature {\n", - " key: \"dst\"\n", - " value {\n", - " int64_list {\n", - " value: 1\n", - " }\n", - " }\n", - " }\n", - " feature {\n", - " key: \"src\"\n", - " value {\n", - " int64_list {\n", - " value: 0\n", - " }\n", - " }\n", - " }\n", - "}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "from visualize_preprocessor_output import visualize_preprocessed_graph\n", "\n", @@ -1946,549 +379,18 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Node Typef0f1
node_id
9user0.909.0
10user0.0110.0
7user0.707.0
5user0.505.0
13user0.1313.0
3user0.303.0
2user0.202.0
6user0.606.0
12user0.1212.0
0user0.000.0
1user0.101.0
8user0.808.0
11user0.1111.0
4user0.404.0
\n", - "
" - ], - "text/plain": [ - " Node Type f0 f1\n", - "node_id \n", - "9 user 0.90 9.0\n", - "10 user 0.01 10.0\n", - "7 user 0.70 7.0\n", - "5 user 0.50 5.0\n", - "13 user 0.13 13.0\n", - "3 user 0.30 3.0\n", - "2 user 0.20 2.0\n", - "6 user 0.60 6.0\n", - "12 user 0.12 12.0\n", - "0 user 0.00 0.0\n", - "1 user 0.10 1.0\n", - "8 user 0.80 8.0\n", - "11 user 0.11 11.0\n", - "4 user 0.40 4.0" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "node_df" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Condensed Edge Typedst node typedst node valuesrc node typesrc node valuerelation
00user[1]user[0]is_friends_with
10user[3]user[0]is_friends_with
20user[5]user[1]is_friends_with
30user[0]user[1]is_friends_with
40user[1]user[2]is_friends_with
50user[5]user[3]is_friends_with
60user[0]user[3]is_friends_with
70user[7]user[3]is_friends_with
80user[6]user[3]is_friends_with
90user[3]user[4]is_friends_with
100user[3]user[5]is_friends_with
110user[1]user[5]is_friends_with
120user[6]user[5]is_friends_with
130user[3]user[6]is_friends_with
140user[7]user[6]is_friends_with
150user[2]user[6]is_friends_with
160user[8]user[7]is_friends_with
170user[6]user[7]is_friends_with
180user[3]user[7]is_friends_with
190user[9]user[7]is_friends_with
200user[9]user[8]is_friends_with
210user[7]user[8]is_friends_with
220user[10]user[8]is_friends_with
230user[10]user[9]is_friends_with
240user[7]user[9]is_friends_with
250user[8]user[9]is_friends_with
260user[9]user[10]is_friends_with
270user[8]user[10]is_friends_with
280user[11]user[10]is_friends_with
290user[12]user[11]is_friends_with
300user[13]user[12]is_friends_with
\n", - "
" - ], - "text/plain": [ - " Condensed Edge Type dst node type dst node value src node type \\\n", - "0 0 user [1] user \n", - "1 0 user [3] user \n", - "2 0 user [5] user \n", - "3 0 user [0] user \n", - "4 0 user [1] user \n", - "5 0 user [5] user \n", - "6 0 user [0] user \n", - "7 0 user [7] user \n", - "8 0 user [6] user \n", - "9 0 user [3] user \n", - "10 0 user [3] user \n", - "11 0 user [1] user \n", - "12 0 user [6] user \n", - "13 0 user [3] user \n", - "14 0 user [7] user \n", - "15 0 user [2] user \n", - "16 0 user [8] user \n", - "17 0 user [6] user \n", - "18 0 user [3] user \n", - "19 0 user [9] user \n", - "20 0 user [9] user \n", - "21 0 user [7] user \n", - "22 0 user [10] user \n", - "23 0 user [10] user \n", - "24 0 user [7] user \n", - "25 0 user [8] user \n", - "26 0 user [9] user \n", - "27 0 user [8] user \n", - "28 0 user [11] user \n", - "29 0 user [12] user \n", - "30 0 user [13] user \n", - "\n", - " src node value relation \n", - "0 [0] is_friends_with \n", - "1 [0] is_friends_with \n", - "2 [1] is_friends_with \n", - "3 [1] is_friends_with \n", - "4 [2] is_friends_with \n", - "5 [3] is_friends_with \n", - "6 [3] is_friends_with \n", - "7 [3] is_friends_with \n", - "8 [3] is_friends_with \n", - "9 [4] is_friends_with \n", - "10 [5] is_friends_with \n", - "11 [5] is_friends_with \n", - "12 [5] is_friends_with \n", - "13 [6] is_friends_with \n", - "14 [6] is_friends_with \n", - "15 [6] is_friends_with \n", - "16 [7] is_friends_with \n", - "17 [7] is_friends_with \n", - "18 [7] is_friends_with \n", - "19 [7] is_friends_with \n", - "20 [8] is_friends_with \n", - "21 [8] is_friends_with \n", - "22 [8] is_friends_with \n", - "23 [9] is_friends_with \n", - "24 [9] is_friends_with \n", - "25 [9] is_friends_with \n", - "26 [10] is_friends_with \n", - "27 [10] is_friends_with \n", - "28 [10] is_friends_with \n", - "29 [11] is_friends_with \n", - "30 [12] is_friends_with " - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "edge_df" ] @@ -2533,21 +435,9 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unsupervised_node_anchor_based_link_prediction_output {\n", - " tfrecord_uri_prefix: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/unsupervised_node_anchor_based_link_prediction_samples/samples/\"\n", - " random_negative_tfrecord_uri_prefix: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/subgraph_sampler/unsupervised_node_anchor_based_link_prediction/random_negative_rooted_neighborhood_samples/samples/\"\n", - "}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(frozen_config.shared_config.flattened_graph_metadata)" ] @@ -2568,57 +458,9 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2023-06-16 20:32:20,527[INFO]gigl.common.utils.gcs-140646927939392@__download_blob_from_gcs : Downloading gs://TEMP DEV GBML PLACEHOLDER/toy_graph/config_populator/frozen_gbml_config.yaml to /var/tmp/tmpw1xxi3ii\n" - ] - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAAB7VklEQVR4nO3dd1hT5+MF8BP2UhQXYAVrRVxFEAe4B4o4ch04Wm1pXdRdtXW2VK2jrlpt3bXOWq0b3AsHKE4QByoiIlociCg7QO7vD77mZwQVZNwknM/z8BSSO06AhuP73iETRVEEEREREdEH0pM6ABERERFpNxZKIiIiIioUFkoiIiIiKhQWSiIiIiIqFBZKIiIiIioUFkoiIiIiKhQWSiIiIiIqFBZKIiIiIioUFkoiIiIiKhQWSiIiIiIqFBZKIiIiIioUFkoiIiIiKhQWSiIiIiIqFBZKIiIiIioUA6kDEBFRyVAqlVAoFFLHICINYWhoCH19/SLZFgslEVEpoFAoEB0dDaVSKXUUItIg5cqVg7W1NWQyWaG2w0JJRKTjRFFEXFwc9PX1Ua1aNejp8WgnotJOFEWkpqbiyZMnAAAbG5tCbY+FkohIx2VlZSE1NRW2trYwMzOTOg4RaQhTU1MAwJMnT1C5cuVCTX/zn6lERDouOzsbAGBkZCRxEiLSNK/+kZmZmVmo7XCEkoiolCjMMVJXrgArVwJhYUBSElCmDODsDPj6Ag0aFFlEIiphhT128hUWSiIieqsLF4BvvwXOnMn93NmzwPLlQPPmwKJFQOPGJR6PiDQEp7yJiChP+/YBrVrlLpMmJupfBwfnLLdvX8ll00UymQy7d++WOgYAYN26dShXrlyB1vnqq6/QvXv3dy5TvXp1/Pbbbx+cqzBOnDgBmUyGxMTEEt/3tGnT4OzsXOL7LUkslERElMuFC4C3N5CenvO1oyOwbBnw4gWQlgYkJuZ87eiY83x6es7yFy4UXYavvvoKMpkMMpkMhoaG+PjjjzFhwgSkvwpVRNq0aYNvv/02X8vJZDJs2bJF7fHffvsN1atXL9JMH+LevXuQyWSoXLkykpKS1J5zdnbGtGnT8r2tvn374vbt20WckHQZCyUREeXy7bf/Xyb79AHCw4Fhw4CyZXMes7TM+frKFaB375zH0tOBsWOLNkenTp0QFxeHu3fvYtGiRVi5ciV++umnot1JAZiYmOCHH34o9AkMxSkpKQkLFiwo1DZMTU1RuXLlIkokPV7Qv/ixUBIRkZqwsP+f5nZ0BDZuBN52grixMbBp0/+PVAYH55TMomJsbAxra2tUq1YN3bt3h4eHB44cOaJ6PiMjA6NHj0blypVhYmKCFi1a4MIbw6QnT55EkyZNYGxsDBsbG0yaNAlZWVkAckZBT548icWLF6tGQ+/du/fWPJ999hkSExOxevXqd+Zevnw5PvnkExgZGcHR0REbN25Uez4yMhKtWrWCiYkJ6tatq/aaXomNjUWfPn1Qrlw5WFlZQRCEd2Z7ZdSoUfj1119V1xfMS0ZGBr777jtUrVoV5ubmaNq0KU6cOKF6Pq8p75kzZ6Jy5cooU6YMBg8ejEmTJuU5jbtgwQLY2NigQoUKGDFiRK7ynZSUhM8++wzm5uaoWrUqli5dqvb8/fv3IQgCLCwsULZsWfTp0wePHz9WPZ/X1Pq3336LNm3aqL5u06YNRo4ciW+//RYVK1aEp6en6rlLly6hUaNGMDMzQ7NmzXDr1i21bb3vZ/e+fADwyy+/oEqVKihTpgwGDRpU5KPqmoiFkoiI1Kxa9f+fjxnz9jL5ipERMHp03usXpWvXruHMmTNqlz+aMGECduzYgfXr1+Py5cuoWbMmPD09kZCQAAB4+PAhOnfujMaNG+PKlStYvnw51qxZg5kzZwIAFi9eDHd3dwwZMgRxcXGIi4tDtWrV3pqhbNmymDp1KmbMmIGUlJQ8l9m1axfGjBmD8ePH49q1a/D19cXXX3+NwMBAADm3wOzZsyeMjIxw7tw5rFixAhMnTlTbRmZmJjw9PVGmTBmcPn0awcHBsLCwQKdOnd472vbZZ5+hZs2amDFjxluXGTlyJM6ePYstW7YgPDwcvXv3RqdOnRAZGZnn8n///TdmzZqFuXPn4tKlS7Czs8Py5ctzLRcYGIioqCgEBgZi/fr1WLduHdatW6e2zPz589GgQQOEhoZi0qRJGDNmjKpQK5VKCIKAhIQEnDx5EkeOHMHdu3fRt2/fd77mvKxfvx5GRkYIDg7GihUrVI9PnToVCxcuxMWLF2FgYICBAweqnsvPz+59+f79919MmzYNs2fPxsWLF2FjY4Nly5YVOL/WEYmISKelpaWJN27cENPS0vK1vLu7KAI5H4mJ+dtHYuL/r9OsWSHCvsbHx0fU19cXzc3NRWNjYxGAqKenJ27fvl0URVFMTk4WDQ0Nxb///lu1jkKhEG1tbcV58+aJoiiKU6ZMER0dHUWlUqlaZunSpaKFhYWYnZ0tiqIotm7dWhwzZsx787xaLj09XbS3txdnzJghiqIoLlq0SLS3t1ct16xZM3HIkCFq6/bu3Vvs3LmzKIqieOjQIdHAwEB8+PCh6vkDBw6IAMRdu3aJoiiKGzduzJU7IyNDNDU1FQ8dOpRnvujoaBGAGBoaKh48eFA0NDQU79y5I4qiKDZo0ED86aefRFEUxZiYGFFfX19t/6Ioiu3btxcnT54siqIorl27VrS0tFQ917RpU3HEiBFqyzdv3lxs0KCB6msfHx/R3t5ezMrKUnvdffv2VX1tb28vdurUSW07ffv2Fb28vERRFMXDhw+L+vr64v3791XPX79+XQQgnj9/XrUfQRDUtjFmzBixdevWqq9bt24turi4qC0TGBgoAhCPHj2qemzfvn0iANX/G+/72eUnn7u7uzh8+HC1bTRt2lTte6VJCvr+8DYcoSQiIjWvzucwMck5VjI/LC1zpr9fX78otG3bFmFhYTh37hx8fHzw9ddfo1evXgCAqKgoZGZmonnz5qrlDQ0N0aRJE0RERAAAIiIi4O7urnatvebNmyM5ORkPHjz4oEzGxsaYMWMGFixYgPj4+FzPR0REqGV6tc/XM1WrVg22traq593d3dWWv3LlCu7cuYMyZcrAwsICFhYWsLKyQnp6OqKiot6b0dPTEy1atMCPP/6Y67mrV68iOzsbtWrVUm3bwsICJ0+efOu2b926hSZNmqg99ubXAFCvXj21u63Y2Njkmnp/87W6u7vn+t68Pkpct25dlCtXTrVMfrm6uub5uJOTk1o+AKqM+f3ZvStfREQEmjZtmus16jpeh5KIiNSUKZPz3/T0nLO681MqX7wAMjLU1y8K5ubmqFmzJgDgr7/+QoMGDbBmzRoMGjSo6HbyAQYMGIAFCxZg5syZxXKGd3JyMlxdXfH333/neq5SpUr52sYvv/wCd3d3fP/997m2ra+vj0uXLuW61Z6FhcWHh0ZOoX+dTCaDUqks1DbfpKenB1EU1R7L6yQpc3PzPNd/PeOrf2gUdcbSiCOURESk5vXzLDZvzt86r/ee4rrcnp6eHqZMmYIffvgBaWlpqhMngoODVctkZmbiwoULqFu3LgCgTp06OHv2rFoBCQ4ORpkyZfDRRx8ByLkl5avbUxYky5w5c7B8+fJcJ8rUqVNHLdOrfb6eKTY2FnFxcarnQ0JC1JZv2LAhIiMjUblyZdSsWVPtwzKfw8ZNmjRBz549MWnSJLXHXVxckJ2djSdPnuTatrW1dZ7bcnR0zHWy05tf59ebrzUkJAR16tQB8P/fm9jYWNXzN27cQGJiour7V6lSJbXvHQCEhYV9UJY35fdn9658derUwblz53K9Rp1XJBPwRESksQp6jFRo6P8fD+noKIoZGe9ePj09Z7lX64SFFT6zKOZ9rFxmZqZYtWpVcf78+aIo5hw7Z2trKx44cEC8fv266OPjI5YvX15MSEgQRVEUHzx4IJqZmYkjRowQIyIixN27d4sVK1ZUHU8oiqI4ZMgQsXHjxmJ0dLT49OlT1bGVb8rrWMuWLVuKJiYmasdQ7tq1SzQ0NBSXLVsm3r59W1y4cKGor68vBgYGiqIoitnZ2WLdunXFDh06iGFhYeKpU6dEV1dXtWMoU1JSRAcHB7FNmzbiqVOnxLt374qBgYHiqFGjxNjY2DzzvX4M5Su3bt0SDQwMRBMTE7XX3L9/f7F69erijh07xLt374rnzp0TZ8+eLe7du1cUxdzHUG7atEk0NTUV161bJ96+fVv8+eefxbJly4rOzs7v/Hm9eWyjvb29WLZsWXHu3LnirVu3xD/++EPU19cXDx48KIqiKCqVStHZ2Vls2bKleOnSJfHcuXOiq6ur2jYOHjwoymQycf369eLt27dFPz8/sWzZsrmOoXzzZ/XqGMrnz5+rHgsNDRUBiNHR0fn62eUn35YtW0QTExPxr7/+Em/duiX6+fmJZcqU0fljKFkoiYh03If8wWjW7P8LYp8+by+V6emi2Lv3/y/bvHkRhRbzLiiiKIpz5swRK1WqJCYnJ4tpaWniqFGjxIoVK4rGxsZi8+bNVSdHvHLixAmxcePGopGRkWhtbS1OnDhRzMzMVD1/69Yt0c3NTTQ1NVUrF2/Kq6ScOXNGBKBWKEVRFJctWybWqFFDNDQ0FGvVqiVu2LBB7flbt26JLVq0EI2MjMRatWqJBw8eVCuUoiiKcXFx4pdffql6bTVq1BCHDBkivnjxIs98eRVKURTFoUOHigDUCqVCoRD9/PzE6tWri4aGhqKNjY3Yo0cPMTw8XBTF3IVSFEVxxowZYsWKFUULCwtx4MCB4ujRo0U3NzfV8/ktlNOnTxd79+4tmpmZidbW1uLixYvV1omJiRHlcrlobm4ulilTRuzdu7f46NEjtWX8/PzEKlWqiJaWluLYsWPFkSNHFkmhFMX3/+zyk2/WrFmq75WPj484YcIEnS+UMlF840AEIiLSKenp6YiOjsbHH38Mkzfvm/gWFy7k3E7x9TvljB4N9O+fc0zlixc509xLlgCvLuNnagqcPMl7epcWHTp0gLW1da7rNJJ2+ZD3h7zwpBwiIsqlcWNg+/b/v/3irVvAiBE5H8bG/38CziumpsC2bSyTuio1NRUrVqyAp6cn9PX18c8//+Do0aN5XpCdSieelENERHnq0gU4dQp44yoqucpk8+Y5I5NdupRcNipZMpkM+/fvR6tWreDq6oqAgADs2LEDHh4eUkcjDcERSiIieqvGjYGgoJzbKa5alXNbxqSknEsDOTsDQ4cCDRpInZKKm6mpKY4ePSp1DNJgLJREOiohIQHHjx/HxYsXce3aNbx8+RL6+vqoUqUKXF1d0aRJE7Ro0SLXdeiI8tKgAfDGLZeJiFRYKIl0TGhoKBYvXowtW7Yg4825yf/ZunUrAMDOzg6+vr4YNmwYypcvX5IxiYhIh/AYSiIdkZqaivHjx8PV1RXr169/a5l83f379zF16lTUrVsX/v7+JZCSiIh0EUcoiXTA/fv34eXlhRs3bqgeK1u2LORyOZo0aYI6derAysoK2dnZiI2NxfXr13Hs2DGcOnUKoiji0aNHEAQBI0aMwJIlS6Cnx39rEhFR/vE6lERaLjY2Fi1btkRMTAyAnNvIDR8+HJ9//jlMTU3fu+7cuXNx8uRJ1WNff/01/vzzT5ZKHVJU15kjIt3D61ASETIzMyEIgqpM2tvbY8mSJahRo0a+1q9WrRp+//137Ny5Ez///DOys7Oxdu1a1K5dGxMmTCjO6KSFHr58iCN3j+BlxkuUNS6LDjU6oGrZqlLHIiINwCEIIi02Z84chIaGAsgph2vXrs13mXxFJpOhV69emDdvHmQyGQDAz88PERERRZ6XtNPF/y6i59aeqL64Or7e8zXGHByDr/d8jeqLq6Pn1p64+N9FqSNK5t69e5DJZAgLC5M6SqF89dVX6N69u9QxVGQyGXbv3p3v5U+cOAGZTIbExMS3LjNt2jQ4OzsXOtuHql69On777bcS329J/Y6yUBJpqZiYGPz8888AAH19fcyfPx+VKlXKc9kbN25g1KhRaN68ORo3bowePXrg77//VlumY8eO+PLLLwEAGRkZGDlyZPG+ANIKuyJ2ocVfLbDr5i5kKbPUnstSZmHXzZznd9/cXeT7/uqrryCTySCTyWBkZISaNWtixowZyMrKev/KH6BNmzaq/b3+8c033xTL/gpi3bp1kMlk6NSpk9rjiYmJkMlkOHHihDTBXvPq+7dlyxa1x3/77TdUr169QNuKi4uDl5dXEaaj4sZCSaSlVqxYofrD6uPjg3r16uW53JkzZzBgwAAkJCTA19cXEydORKtWrfD48eNcy44cORIfffQRAOD48eO4fv168b0A0ngX/7uIz3Z8hozsd18xICM7A/229yuWkcpOnTohLi4OkZGRGD9+PKZNm4b58+cX+X5eGTJkCOLi4tQ+5s2bV2z7KwgDAwMcPXoUgYGBUkd5KxMTE/zwww/IzMws1Hasra1hbGxcRKmklZ2dDaVSKXWMYsdCSaSFMjMz8eeffwLI+SPzxRdf5LlccnIypkyZglatWmHjxo348ssv4e3tjbFjx2LcuHG5ljcxMcGAAQNUX69YsaJ4XgBphdmnZ7+3TL6SkZ2BOUFzijyDsbExrK2tYW9vj2HDhsHDw0N1iavnz5/jyy+/RPny5WFmZgYvLy9ERkaq1o2JiUG3bt1Qvnx5mJubo169eti/f/8792dmZgZra2u1j7Jly6qeP3/+PFxcXGBiYoJGjRqpDjl5nb+/PxwcHGBiYoK2bdti/fr1uaZjg4KC0LJlS5iamqJatWoYPXo0UlJS3pnN3NwcAwcOxKRJk9653NWrV9GuXTuYmpqiQoUKGDp0KJKTk1XPZ2dnY9y4cShXrhwqVKiACRMm4M3zc5VKJebMmYOPP/4YpqamaNCgAbZv3/7O/QLAZ599hsTERKxevfqdy+3ZswcNGzaEiYkJatSogenTp6uNPL855X3mzBk4Ozurvu+7d+/Ocxr30qVLaNSoEczMzNCsWTPcunUr175XrlyJatWqwczMDH369MGLFy/UXveMGTPw0UcfwdjYGM7Ozjh48KDq+bym1sPCwiCTyXDv3j0AOaPJ5cqVg7+/P+rWrQtjY2Pcv38fQM7l3QYOHIgyZcrAzs4Oq1atUsv2vp/d+/IB+fsdLQ4slERaKDw8HPHx8QCAtm3bomLFinkut3//fjx79gyjR4+Gnp4eUlNT3/svZblcDgODnPP1jh8/XrTBSWs8fPkQ/rcKdm1S/1v+ePjyYTElymFqagqFQgEgZ0r84sWL8Pf3x9mzZyGKIjp37qwaHRsxYgQyMjJw6tQpXL16FXPnzoWFhcUH7zs5ORldu3ZF3bp1cenSJUybNg3fffed2jLR0dHw9vZG9+7dceXKFfj6+mLq1Klqy0RFRaFTp07o1asXwsPDsXXrVgQFBeXrMJNp06bh6tWrby13KSkp8PT0RPny5XHhwgVs27YNR48eVdv2woULsW7dOvz1118ICgpCQkICdu3apbadOXPmYMOGDVixYgWuX7+OsWPHYsCAAWpXhMhL2bJlMXXqVMyYMeOtBfn06dP48ssvMWbMGNy4cQMrV67EunXrMGvWrDyXf/nyJbp164ZPP/0Uly9fxs8//4yJEyfmuezUqVOxcOFCXLx4EQYGBhg4cKDa83fu3MG///6LgIAAHDx4EKGhoRg+fLjq+cWLF2PhwoVYsGABwsPD4enpCblcrvYPlfxITU3F3Llz8eeff+L69euoXLkygJzv/auSN3z4cAwbNkxVevPzs3tfvvz8jhYbkYi0zsqVK0UAIgBxwoQJ4tWrV/P86NChg2hhYSGuWrVKrF69ughANDU1Ffv06SNevHjxrevVqVNHBCDq6emJSUlJUr9cKqS0tDTxxo0bYlpaWr7XWRu6VsQ0FPhjXei6Isvt4+MjCoIgiqIoKpVK8ciRI6KxsbH43Xffibdv3xYBiMHBwarl4+PjRVNTU/Hff/8VRVEUP/30U3HatGn53l/r1q1FQ0ND0dzcXO1j06ZNoijm/H9XoUIFte/j8uXLRQBiaGioKIqiOHHiRLF+/fpq2506daoIQHz+/LkoiqI4aNAgcejQoWrLnD59WtTT03vrz2jt2rWipaWlKIqiOGnSJLFWrVpiZmam+Pz5cxGAGBgYKIqiKK5atUosX768mJycrFp33759op6envjo0SNRFEXRxsZGnDdvnur5zMxM8aOPPlJ9r9PT00UzMzPxzJkzahkGDRokfvbZZ+/8/o0ZM0ZMT08X7e3txRkzZoiiKIqLFi0S7e3tVcu1b99enD17ttq6GzduFG1sbFRfAxB37dolimLO9/jN7/vq1avVvu+BgYEiAPHo0aNqrxuAar2ffvpJ1NfXFx88eKBa5sCBA6Kenp4YFxcniqIo2trairNmzVLL1rhxY3H48OFq+3n1sxRFUQwNDRUBiNHR0aIo5vysAIhhYWFq27G3txcHDBig+lqpVIqVK1cWly9fLopi/n5278uXn9/RN33I+0NeeNkgIi108+ZN1ee1a9d+63IxMTHIzs7GmDFj0KNHD4wZMwYXLlzA5s2bkZSU9NZjwxwdHREREQGlUonIyEi4uLgU+WsgzfYy42WJrvc2e/fuhYWFBTIzM6FUKvH5559j2rRpOHbsGAwMDNC0aVPVshUqVFD97gLA6NGjMWzYMBw+fBgeHh7o1asXnJyc3rm//v375xpRrFKlCgAgIiICTk5Oatfqc3d3V1v21q1baNy4sdpjTZo0Ufv6ypUrCA8PVzsxThRFKJVKREdHo06dOu/MOHHiRKxcuRJ//fUX+vTpo/ZcREQEGjRoAHNzc9VjzZs3h1KpxK1bt2BiYoK4uDi175uBgQEaNWqkmva+c+cOUlNT0aFDB7VtKxSKfL0XGBsbY8aMGRg1ahSGDRuW6/krV64gODhYbUQyOzsb6enpSE1NhZmZmdryt27dyvV9f/N7+srrP18bGxsAwJMnT2BnZwcg53azVav+/6Wu3N3dVd8bMzMz/Pfff2jevLnaNps3b44rV66893W/zsjIKM/ftdcfk8lksLa2xpMnTwC8/2dnamr63nz5+R0tLiyURFooNTVV9fm7pvBSU1ORlpaGPn36YPLkyQAADw8PZGZmYtu2bRgxYgTs7e1zrVemTJk890WlR1njsu9fqAjXe5u2bdti+fLlMDIygq2trepwjPwYPHgwPD09sW/fPhw+fBhz5szBwoULMWrUqLeuY2lpiZo1axZF9LdKTk6Gr68vRo8eneu5V8XnXcqVK4fJkydj+vTp6Nq1a7HkA4B9+/aplS8A+T5RZsCAAViwYAFmzpyZ6wzv5ORkTJ8+HT179sy1XmEvvG9oaKj6/NVl0IryhJhXN3wQXzvmNK8TkExNTVX7f1u+Vxl15YQdHkNJpIVef1N6dTxZXl69Ob95+Y3OnTsDwFv/1f36No2MjD44J2mvDjU6wECvYGMOBnoG8KjhUaQ5zM3NUbNmTdjZ2amVyTp16iArKwvnzp1TPfbs2TPcunULdevWVT1WrVo1fPPNN9i5cyfGjx//3pNF3qVOnToIDw9Henq66rGQkBC1ZRwdHXHxovrZ7hcuXFD7umHDhrhx4wZq1qyZ6yO//7+NGjUKenp6WLx4ca6MV65cUTt+MTg4GHp6enB0dISlpSVsbGzUvm9ZWVm4dOmS6uvXTyR5M1+1atXylU9PTw9z5szB8uXLVServP76b926lefrz+sOXY6Ojrh69SoyMv7/BLE3v6f5df/+ffz333+qr0NCQlTfm7Jly8LW1hbBwcFq6wQHB6t+p15dmi0uLk71fFFd3/F9P7v85MvP72hxYaEk0kKvj2LcvXv3rcu9evOrUKGC2uNWVlYAcg52z0t0dLTq8/z+ASHdUrVsVXSr1a1A68gd5SV25xwHBwcIgoAhQ4YgKCgIV65cwYABA1C1alUIggAA+Pbbb3Ho0CFER0fj8uXLCAwMfO90cmpqKh49eqT28fz5cwDA559/DplMhiFDhuDGjRvYv38/FixYoLa+r68vbt68iYkTJ+L27dv4999/sW7dOgD/P2I2ceJEnDlzBiNHjkRYWBgiIyOxZ8+eAl371cTEBNOnT8eSJUvUHu/fvz9MTEzg4+ODa9euITAwEKNGjcIXX3yhmrofM2YMfvnlF+zevRs3b97E8OHD1c5aLlOmDL777juMHTsW69evR1RUFC5fvozff/8d69evz3fGLl26oGnTpli5cqXa435+ftiwYQOmT5+O69evIyIiAlu2bMEPP/yQ53Y+//xzKJVKDB06FBERETh06JDq+57XKOC7vPreXLlyBadPn8bo0aPRp08fWFtbAwC+//57zJ07F1u3bsWtW7cwadIkhIWFYcyYMQCgKtXTpk1DZGQk9u3bh4ULFxYow9vk52f3vnz5+R0tLiyURFrI1dVV9fm7rhX56l+tr47ReeXp06cAgPLly+daJzs7W3UMmq2treqNlkqfKS2nwFg/f1OcJgYmmNxicjEnUrd27Vq4urqia9eucHd3hyiK2L9/v2oEPzs7GyNGjECdOnXQqVMn1KpVC8uWLXvnNlevXg0bGxu1j88++wxAzuElAQEBuHr1KlxcXDB16lTMnTtXbf2PP/4Y27dvx86dO+Hk5ITly5erjsl8NV3s5OSEkydP4vbt22jZsiVcXFzg5+cHW1vbAr1+Hx+fXHfGMjMzw6FDh5CQkIDGjRvD29sb7du3xx9//KFaZvz48fjiiy/g4+MDd3d3lClTBj169FDbzs8//4wff/wRc+bMUX3/9u3bh48//rhAGefOnas2WgYAnp6e2Lt3Lw4fPozGjRvDzc0NixYtyvPwGyDnzPGAgACEhYXB2dkZU6dOhZ+fH4CCT5HXrFkTPXv2ROfOndGxY0c4OTmp/U6MHj0a48aNw/jx4/Hpp5/i4MGDqstAATmzQ//88w9u3rwJJycnzJ07FzNnzixQhrfJz8/uffny8ztaXGSi+MbFp4hI47148QKVKlVCZmYmrKyscOTIkTynyiIiItCnTx907txZ7U1lwoQJOHLkCA4dOqS6nMUrJ06cUB1j1qtXr3xde440W3p6OqKjo/Hxxx8X+A/w7pu70W97v3dej9JY3xhbvLege+3uhUyqm2bNmoUVK1YgNjZW6ig64++//8bXX3+NFy9ewNTUVOo4Wq0w7w+v40k5RFrI0tISvXr1wpYtW5CQkICDBw9CLpfnWq5OnTro0aMHdu3ahezsbDRq1AgXLlzA4cOHMXjw4FxlUhRFtTNP37yGG5U+3Wt3R9DAIMwJmgP/W/5qt1800DOA3FGOyS0mo5FtIwlTapZly5ahcePGqFChAoKDgzF//nzeyrSQNmzYgBo1aqBq1aq4cuUKJk6ciD59+rBMahCOUBJpqdOnT6NVq1YAco6R3L17N8qVK5druVd31dm9ezeePHkCW1tb9OvXL8+76xw4cAATJkwAkDN1FxkZCX19/WJ9HVT8imoE4uHLhzh69yheZrxEWeOy8KjhUWLHTGqTsWPHYuvWrUhISICdnR2++OILTJ48uUBnqJO6efPmYdmyZXj06BFsbGzQvXt3zJo1K9clhqjgiur9gYWSSEuJogi5XI69e/cCANq3b4+FCxd+cAGMjY3F559/rjo4f+vWrbmucUfaqaj+YBCR7imq9weelEOkpWQyGRYvXqy6ZuSxY8fw448/5nlNtPe5f/8+hgwZoiqT3t7eLJNERJRvLJREWiQ9PR0nT57E9OnT0apVKzg4OCApKUk1lRYQEID+/fur7g37PqIoYtu2bejTpw8ePsy5B3PdunVzXeaDiIjoXXhAB5EWePHiBXr16oVTp04hMzMT+vr6yM7OBpBze7GlS5eiX79+UCgUiIiIQL9+/dCuXTv06dMHLi4uuc4Af/78OY4ePaq6ltkr9erVw5EjR1TXqSQiIsoPFkoiLaCvr4+rV6+qprNflUkAmD59Onr06IHg4GB89dVXuH79OrKysnD48GEcPnwYBgYGqFmzJqysrJCdnY3Y2Fi1O0W8MmjQICxcuBCWlpYl9rqIiEg38KQcIi1x+fJluLu7q90WsUyZMoiLi4O5uTkAICMjA/Pnz8cff/yBx48f52u7FSpUwN9//w1PT89iyU3S40k5RPQ2PCmHqJSJjo7G6//+09fXx+DBg1VlEsi5E8cPP/yA+/fvY8uWLfjiiy9Qt25dtduTmZubo0WLFhg7diyMjY3x7NmzXPfaJSIiKgiOUBJpOFEUMX/+fEycOBF9+/aFq6srJkyYAJlMhsjISHzyySfv3YZCoUBaWhr09fVhZmYGPb2cf0va2Njg0aNHAIDJkydj1qxZBb43Lmk+jlAWn3v37uHjjz9GaGgonJ2dpY5DVGAcoSQqBTIzMzFkyBBMnDgRU6dOxebNm/Hdd99h6tSpGD58eL7KJAAYGRnB0tISFhYWqjIJQG3Ec86cOfj888+RkfH2W+wRlaSvvvoKMpkMMpkMRkZGqFmzJmbMmIGsrKz3r/wB2rRpo9rf6x/ffPNNseyPSJfwpBwiDZWYmAhvb2+cOnUK69atg4+Pj+q5mTNnFsk+Xrx4ofb1v//+iwcPHuDAgQOwsLAokn0QFUanTp2wdu1aZGRkYP/+/RgxYgQMDQ0xefLkYtnfkCFDMGPGDLXHeDcWovfjCCWRBoqOjkazZs1w+fJlHD58WK1MFpX09HSkp6fnevzMmTOIjIws8v0RfQhjY2NYW1vD3t4ew4YNg4eHB/z9/QHkXP7qyy+/RPny5WFmZgYvLy+1392YmBh069YN5cuXh7m5OerVq4f9+/e/c39mZmawtrZW+yhbtqzq+fPnz8PFxQUmJiZo1KgRQkNDc23D398fDg4OMDExQdu2bbF+/XrIZDLVjQMAICgoCC1btoSpqSmqVauG0aNHIyUlpZDfLSLpsFASaZizZ8+iadOmUCgUOHv2LNq0aVMs+3n27BkAqB0zOX78eNy9excuLi7Fsk+iwjI1NVVd6eCrr77CxYsX4e/vj7Nnz0IURXTu3Fl1ea0RI0YgIyMDp06dwtWrVzF37txCjbwnJyeja9euqFu3Li5duoRp06bhu+++U1smOjoa3t7e6N69O65cuQJfX19MnTpVbZmoqCh06tQJvXr1Qnh4OLZu3YqgoCCMHDnyg7MRSY1T3kQaZOvWrfDx8UGjRo2we/duVKxYsdj2ZWhoCENDQzRp0gSurq5YsmQJRFGEvb19se2TNEijRsD/TsgqUdbWwMWLBV5NFEUcO3YMhw4dwqhRoxAZGQl/f38EBwejWbNmAIC///4b1apVw+7du9G7d2/cv38fvXr1wqeffgoAqFGjxnv3s2zZMvz5559qj61cuRL9+/fH5s2boVQqsWbNGpiYmKBevXp48OABhg0bpraso6Mj5s+fDwBwdHTEtWvXMGvWLNUyc+bMQf/+/fHtt98CABwcHLBkyRK0bt0ay5cv54lTpJVYKIk0gCiKmDNnDqZOnYrPP/8cf/31F4yNjYt1n5UrV0Zqaqrqto1r167FmjVrVH8IScc9egT873abmmzv3r2wsLBAZmYmlEolPv/8c0ybNg3Hjh2DgYEBmjZtqlq2QoUKcHR0REREBABg9OjRGDZsGA4fPgwPDw/06tULTk5O79xf//79c40oVqlSBQAQEREBJycntcLn7u6utuytW7fQuHFjtceaNGmi9vWVK1cQHh6Ov//+W/WYKIpQKpWIjo5GnTp13vdtIdI4LJREElMoFPD19cW6devw008/4aeffiqxS/e8KpNAzh/SFStW4NChQ7zIeWlgba0V+23bti2WL18OIyMj2Nraqv3Ovs/gwYPh6emJffv24fDhw5gzZw4WLlyIUaNGvXUdS0tL1KxZs0AZCyo5ORm+vr4YPXp0rufs7OyKdd9ExYWFkkhCCQkJ6NWrF86cOYONGzdiwIABkmWZM2cOVq5ciSlTprBQlgYfMO0sBXNz8zwLXp06dZCVlYVz586ppryfPXuGW7duoW7duqrlqlWrhm+++QbffPMNJk+ejNWrV7+zUL5LnTp1sHHjRqSnp6tGKUNCQtSWcXR0zHXiz4ULF9S+btiwIW7cuFHsxZWoJPGkHCKJ3LlzB+7u7rh69SqOHj0qaZkEgHLlyqFx48YIDQ1FfHy8pFmI3sfBwQGCIGDIkCEICgrClStXMGDAAFStWhWCIAAAvv32Wxw6dAjR0dG4fPkyAgMD3zudnJqaikePHql9PH/+HADw+eefQyaTYciQIbhx4wb279+PBQsWqK3v6+uLmzdvYuLEibh9+zb+/fdfrFu3DsD/nwA3ceJEnDlzBiNHjkRYWBgiIyOxZ88enpRDWo2FkkgCQUFBcHNzgyiKCAkJQcuWLaWOBACYO3cuRFHEhAkTpI5C9F5r166Fq6srunbtCnd3d4iiiP3798PQ0BAAkJ2djREjRqBOnTro1KkTatWqhWXLlr1zm6tXr4aNjY3ax2effQYAsLCwQEBAAK5evQoXFxdMnToVc+fOVVv/448/xvbt27Fz5044OTlh+fLlqmMyXx0X7eTkhJMnT+L27dto2bIlXFxc4OfnB1tb26L+FhGVGN56kaiEbd68GV9//TXc3d2xc+dOWFlZSR1JTaVKlZCWlobk5GSpo1AR4a0XpTVr1iysWLECsbGxUkchyoW3XiTSMqIoYvr06ejfvz/69euHw4cPa1yZBIChQ4ciJSUFGzZskDoKkVZatmwZLly4gLt372Ljxo2YP39+sdycgEiTcISSqARkZGRg8ODB2LRpE2bOnIkpU6aU2JncBaVQKGBmZoYaNWrg9u3bUsehIsARypI1duxYbN26FQkJCbCzs8MXX3yByZMnF+gMdaKSUlTvDyyURMXs2bNn6NGjB86fP49169ahX79+Ukd6rw4dOuDo0aO4e/cuPv74Y6njUCGxUBLR23DKm0gL3L59G25uboiIiMDx48e1okwCwMKFCwEA48aNkzgJERFpAxZKomJy8uRJuLm5wcDAQO1aedrAyckJ1apVw/79+6FUKqWOQ0WEE1JE9Kaiel9goSQqBhs2bECHDh3g4uKCM2fO5Osewppm/PjxUCgUWLRokdRRqJD09fUB5BwfS0T0utTUVABQXW7rQ/EYSqIiJIoi/Pz8MHPmTAwcOFB1yzhtpFQqYWZmhooVK+LBgwdSx6FCEEUR9+/fR2ZmJmxtbaGnx7EEotJOFEWkpqbiyZMnKFeuHGxsbAq1PRZKoiKSnp6Or7/+Glu2bMEvv/yCCRMmaOyZ3PnVu3dvbN++HZcuXULDhg2ljkOFoFAoEB0dzUMYiEhNuXLlYG1tXei/VyyUREXg6dOnEAQBoaGh2LhxI7y9vaWOVCTu378Pe3t7tG3bFsePH5c6DhWSUqnktDcRqRgaGqoOiSksFkqiQrp58ya6dOmC5ORk+Pv7o2nTplJHKlJ16tTB7du3kZKSwkvOEBFRnnggDVEhHD9+HO7u7jAxMcG5c+d0rkwCgJ+fH5RKJX766SepoxARkYbiCCXRB/rrr7/g6+uLtm3bYtu2bbC0tJQ6UrEpU6YMjIyM8OzZM6mjEBGRBuIIJVEBKZVKTJ48GYMGDcKgQYOwb98+nS6TAPDZZ58hISEBR44ckToKERFpII5QEhVAWloafHx8sH37dsyfPx/jxo3T+jO58yMxMRFWVlZo2LAhLl68KHUcIiLSMCyURPn0+PFjCIKA8PBwbN68Gd27d5c6Uolq3LgxLl26hPj4eFhZWUkdh4iINAinvIny4fr162jatCliYmJw6tSpUlcmAeCXX36BKIqYOHGi1FGIiEjDcISS6D2OHDkCb29v2NvbY+/evbCzs5M6kmQqVqyI9PR0JCcnSx2FiIg0CEcoid5h1apV8PLyQrNmzRAUFFSqyyQADBkyBCkpKfj777+ljkJERBqEI5REeVAqlZg4cSIWLFiA4cOHY/HixTAwMJA6luTS09NhYWGBmjVr4ubNm1LHISIiDcERSqI3pKamwtvbGwsXLsRvv/2GP/74g2Xyf0xMTNC6dWvcunULMTExUschIiINwRFKotfExcVBLpcjIiIC//zzD7p16yZ1JI0TFhYGFxcX9OzZEzt27JA6DhERaQAWSqL/CQ8PR9euXZGdnY29e/fCxcVF6kgaq1q1anjy5AnS0tKgp8eJDiKi0o5/CYgAHDhwAC1atECFChVw7tw5lsn3GDt2LBQKBZYsWSJ1FCIi0gAcoaRSb9myZRg1ahQ6d+6Mf/75BxYWFlJH0nhKpRKmpqaoUqUK7t+/L3UcIiKSGEcoqdTKzs7G2LFjMWLECIwaNQq7d+9mmcwnPT09dOnSBbGxsQgLC5M6DhERSYwjlFQqJScn4/PPP8e+ffuwZMkSjBgxQupIWicmJgbVq1dH+/btcfToUanjEBGRhFgoqdR5+PAhunXrhsjISGzduhWdO3eWOpLWcnR0RFRUFJKTk2FiYiJ1HCIikginvKlUCQsLQ9OmTfH06VMEBQWxTBbSjz/+iOzsbMyYMUPqKEREJCGOUFKpsXfvXvTr1w+1a9dGQEAAbGxspI6kEywsLGBiYoL4+HipoxDpnLt37+L8+fO4fPky4uLikJ2djbJly6J+/fpwdXWFq6srjIyMpI5JBN7+g0qFJUuWYOzYsZDL5di0aRPMzc2ljqQz+vXrhzVr1uD48eNo166d1HGItJ5CocC///6LZcuW4ezZs+9ctlKlShg8eDC++eYb2NnZlVBCotw4Qkk6LSsrC2PHjsUff/yB8ePHY+7cudDX15c6lk5JSEhAxYoV0ahRI5w/f17qOERa7eLFi/jqq69w/fr1Aq1nYmKCmTNn4ttvv+V7HEmChZJ0VlJSEvr164dDhw5h6dKl8PX1lTqSzmrUqBEuX76MhIQElCtXTuo4RFppwYIFmDRpErKzs1WPOTg4oEOHDqhXrx6qVasGAwMDJCQk4ObNmzh//jyOHz+OrKws1fItWrTA7t27UaFCBSleApViLJSkk2JjY9G1a1fcu3cP27ZtQ8eOHaWOpNOOHDmCjh07wtfXFytWrJA6DpHWmTZtGqZPn676uk6dOpgwYQJcXV0hk8neul58fDz+/PNPbN68Ga/+nNevXx8nTpxgqaQSxUJJOufSpUvo1q0bjIyMsHfvXtSvX1/qSKVChQoVoFAokJSUJHUUIq2yceNGfPnll6qvhwwZgmHDhsHQ0DDf27h8+TLGjx+vOjmuVatWOH78OKe/qcTwskGkU/bs2YNWrVqhWrVqCAkJYZksQYMHD0ZycjK2bt0qdRQirfHw4UOMGjVK9fX333+P0aNHF6hMAkDDhg2xfv16VKxYEQBw6tQp/P7770WalehdOEJJOkEURSxatAjfffcdevbsiQ0bNsDMzEzqWKVKeno6zM3NUatWLUREREgdh0gr9OnTB9u2bQMAdOvWDbNnz861TGpqKtauXYurV6/i6tWrePnyJX7++Wd0794917IXLlzAwIEDAQCmpqa4c+cObG1ti/U1EAEcoSQdkJWVhWHDhmH8+PGYOHEi/v33X5ZJCZiYmKBVq1a4efMm7t+/L3UcIo13//597NixA0DOISMTJ07Mc7nnz59jxYoVuHv3LhwdHd+5zcaNG6Nfv34AgLS0NKxevbpoQxO9BQslabUXL16gS5cuWLNmDf7880/MmTMHenr8tZbK/PnzAQDjx4+XOAmR5lu9ejWUSiWAnOu5Wlpa5rlcpUqVEBgYiMOHD+fr/61Bgwapjp1cuXKl2lngRMWFf3lJa8XExKB58+Y4d+4cDh48iEGDBkkdqdRr1KgRqlatioCAANUfSiLK28GDBwEAMpkMvXr1eutyRkZGqmMj88Pa2hotW7YEAMTFxeHatWuFC0qUDyyUpJXOnz+Ppk2bIjU1FWfPnkX79u2ljkT/M2bMGGRkZGDp0qVSRyHSWAqFAuHh4QCA6tWro1KlSkW6/UaNGqk+v3jxYpFumygvLJSkdXbs2IHWrVujRo0aCAkJQZ06daSORK8ZP348DA0NsWDBAqmjEGmsO3fuQKFQAABq165d5NuvW7eu6nOOUFJJYKEkrSGKIubNmwdvb28IgoDjx4+jcuXKUseiN+jp6aFz5864f/8+/5ARvUVycrLq8+K4u9Trx2OmpKQU+faJ3sRCSVohMzMTQ4YMwcSJEzF16lRs3rwZJiYmUseit1i0aBEAYNy4cRInIdJMBgYGqs9fv9ViUXl9m6/vi6i48LeMNF5iYiK8vb1x6tQprFu3Dj4+PlJHovf4+OOP4eDggOPHj0OhUMDIyEjqSEQaxcbGRvV5TExMkW//9W1aW1sX+faJ3sQRStJo0dHRaNasGS5fvozDhw+zTGqRqVOnIjs7Gz///LPUUYg0jo2NjapU3rhxo8ivinD9+nXV56+foENUXFgoSWOdPXsWTZs2hUKhwNmzZ9GmTRupI1EB+Pj4wMzMDCtXrpQ6CpFGatq0KQAgKSkJISEhRbbd7OxsHDlyRPU1CyWVBBZK0khbt25F27Zt4ejoiJCQkPfeHYI0U58+ffD06VOcPHlS6ihEGueLL75Qff7PP/+8c9nNmzdj5cqV2LVrFwDg5MmTWLlyJVauXImkpCS1ZYOCgvDw4UMAQKdOnVClSpUiTk6UG+/lTRpFFEXMmTMHU6dORf/+/bFmzRoYGxtLHYs+UHx8PCpXrowmTZoU6QgMkS7IyspC9erVVeVv6dKlaNWqVZ7Lenp64r///svzuYMHD6Jq1aoAcm636O3trbr96Z49eyCXy4shPZE6FkrSGAqFAr6+vli3bh2mTZsGPz8/yGQyqWNRITVs2BBhYWFITExE2bJlpY5DpFH+/PNPDBkyBABQuXJlbN26tUB3xXmdKIqYNWsWtm7dCgBwc3NDUFCQ6jaMRMWJU96kERISEuDp6YnNmzdj06ZN+Omnn1gmdcSsWbMgiiImTZokdRQijTNo0CB07NgRAPDkyRMMGTIE8fHxBd6OKIr4/fffVWXS2NgYa9euZZmkEsMRSpLcnTt30KVLFzx79gy7du1S3YOWdIeVlRWysrLw8uVLqaMQaZyHDx/C3d0dsbGxAIBKlSrhp59+QuvWrfO1/pMnTzBjxgy1Y5X//PNPDBo0qFjyEuWFI5QkqaCgILi5uQEAQkJCWCZ11MCBA5GUlITt27dLHYVI45QrV051xjcAPH36FCNHjsQ333yDkydPIisrK8/1YmJisGDBAnTv3l2tTP7+++8sk1TiOEJJktm8eTO+/vpruLu7Y+fOnbCyspI6EhWT1NRUlClTBrVr11a7Ph5RaXbz5k0sW7YMq1evRnp6OgCgY8eOOHz4sNpypqamcHR0hJ2dHfT19fH8+XPcuHEDT548UVuuSpUqWL16Nbp161Zir4HoFRZKKnGiKGLGjBmYNm0afHx8sGrVKt5JpRRo3bo1Tp06hdjYWHz00UdSxyGSzP79+zFv3jycPHkS+vr6qtskdu7cGXv37sX69esxbdq0fN9Bx8jICF9++SXmzp3Lf5iTZFgoqURlZGRg8ODB2LRpE2bOnIkpU6bw5JtS4sKFC2jSpAn69u2LLVu2SB2HSBLp6ekoV64cMjIycj23evVqDB48GEDOxckPHDiAjRs34sKFC4iOjlZbtmzZsrC0tER8fDwuXryIunXrlkh+ordhoaQS8+zZM/To0QPnz5/HunXr0K9fP6kjUQmztbVFQkICUlNToafHQ7ipdDpw4AB69eqFtLQ0tcejoqJQo0aNPNdJSEjAkydPkJ2dDQsLC1SrVg1t27bFqVOn4ODggHPnzqF8+fIlEZ8oT3xHpxJx+/ZtuLm5ISIiAsePH2eZLKVGjx6NjIwMrFixQuooRJLx8vJC/fr1AUA1Q2NjY4OPP/74retYWVmhdu3aqFevHuzt7aGnp6c6hvLOnTvw9PTMdcccopLEQknF7uTJk3Bzc4OBgQHOnTuHZs2aSR2JJPLdd9/B0NAQ8+bNkzoKkWSGDh2KCxcuoEWLFqhTpw6AnJNxCnL4T3Z2NqKiogDkHJd++fJldOnSJdeoJ1FJYaGkYrVhwwZ06NABLi4uOHv27Func6h0MDAwQKdOnRATE8OzvalU+vXXX7F69Wo4ODjg5MmTCAkJwYgRIzBy5MgCbefu3bvIzMxUfZ2dnY2goCD06NEDPJKNpMBjKKlYiKIIPz8/zJw5E4MGDcLy5cthaGgodSzSAHfu3IGDgwM8PT1x8OBBqeMQlRh/f38IggArKyvExsbCzMzsg7e1e/du9OjRQ/W1np4elEolKlWqhKioKJQpU6YoIhPlG0coqcilp6fj888/x8yZM/HLL79g9erVLJOkUrNmTXzyySc4duwYFAqF1HGISkR4eDh69uwJY2NjhIaGFqpMAsCtW7dUnxsaGqJ69eo4evQoHjx4wDJJkmChpCL19OlTtGvXDrt378a2bdswceJEXhaIcpkyZQqysrIwe/ZsqaMQFbv4+Hg0a9YMoigiMDAQdnZ2hd5m9+7dMW/ePISFhWHGjBl49OgRmjVrxmv6kmQ45U1F5ubNm+jSpQuSk5Ph7++vdisxotcplUpYWFigTJkyePz4sdRxiIqNQqFA9erVERcXh02bNqF///5Fvo+IiAjUrVsXAQEB6Nq1a5Fvnyg/OEJJReL48eNwd3eHqakpzp07xzJJ76Snp4fevXvjyZMnCAoKkjoOUbFxc3NDXFwc/Pz8iqVMAkDt2rXh4OCAPXv2FMv2ifKDhZIK7a+//oKnpyeaNGmC4OBgVK9eXepIpAXmz58PAJgwYYLESYiKR69evRAaGoq+ffti+vTpxbYfmUwGQRAQEBAApVJZbPshehcWSvpgSqUSkydPxqBBgzBo0CDs3bsXlpaWUsciLVG5cmU0aNAAISEhePnypdRxiIrUlClTsHPnTri6upbIrUblcjkeP36M8+fPF/u+iPLCQkkfJC0tDf369cPcuXOxYMECXhaIPsjMmTMhiiKmTp0qdRSiIrN+/XrMmTMHVatWRUhISInss1mzZqhYsSKnvUkyPCmHCuzx48cQBAHh4eHYvHkzunfvLnUk0mLly5eHUqnEixcvpI5CVGhBQUFo3bo1zMzMEBMTAysrqxLb99dff43z58/zpgEkCY5QUoFcv34dTZs2RUxMDE6dOsUySYX21Vdf4eXLl9ixY4fUUYgKJSYmBu3bt4eenh5CQkJKtEwCOdPeN27cwJ07d0p0v0QACyUVwJEjR9CsWTOULVsW586dQ6NGjaSORDpg5syZ0NPTw7Rp06SOQvTBkpOT4eLigszMTPj7+6NevXolnqFjx44wMTHhtDdJgoWS8mXVqlXw8vJC8+bNERQUVCQX5iUCAHNzczRr1gzXrl3Df//9J3UcogJTKpVwdnbG8+fPsXjxYnh5eUmSw9zcHB4eHvD395dk/1S6sVDSOymVSnz//ffw9fXFN998A39/f5QtW1bqWKRjFixYAAD47rvvJE5CVHAeHh6IiorC8OHDMWrUKEmzyOVyBAUFIT4+XtIcVPrwpBx6q9TUVAwYMAC7d+/GokWLMHr0aN5GkYqNjY0NEhMTkZKSAj09/luXtMPQoUOxevVqeHh44MiRI1LHwaNHj2Bra4u1a9fCx8dH6jhUivBdm/IUFxeH1q1b4/Dhw9izZw/GjBnDMknFauTIkUhPT8fq1auljkKUL7/++itWr14NBwcHHDp0SOo4AABra2s0bdqU095U4jhCSbmEh4eja9euyM7Oxt69e+Hi4iJ1JCoFsrKyYGpqio8++gjR0dFSxyF6J39/fwiCACsrK8TGxsLMzEzqSCpz5szBrFmzEB8fDxMTE6njUCnBEUpSc+DAAbRo0QIVKlTA+fPnWSapxBgYGMDT0xP37t1DRESE1HGI3urq1avo2bMnjI2NERoaqlFlEgAEQUBKSgqOHTsmdRQqRVgoSWXZsmXo2rUr2rRpg9OnT6Nq1apSR6JS5tdffwUAjB8/XuIkRHmLj4+Hu7s7RFFEYGCgRl7xok6dOqhZsyanvalEsVASsrOzMXbsWIwYMQKjRo3Crl27YGFhIXUsKoVq1aqFGjVq4MiRI8jKypI6DpEahUIBJycnpKSkYMOGDXB3d5c6Up5kMhnkcjn8/f2hVCqljkOlBAtlKZecnIwePXpgyZIl+OOPP/Dbb79BX19f6lhUik2aNAlZWVmYPXu21FGI1Li5uSEuLg5+fn7o37+/1HHeSRAEPHr0CBcuXJA6CpUSPCmnFHv48CG6deuGyMhI/Pvvv5JdjJfodUqlEhYWFihbtiwePXokdRwiAECvXr2wc+dO9O3bF1u2bJE6zntlZWXB2toavr6+mDVrltRxqBTgCGUpFRYWhqZNmyI+Ph7BwcEsk6Qx9PT00LNnTzx+/BhnzpyROg4RpkyZgp07d8LV1VUryiSQc5Jbly5deBtGKjEslKXQ3r170aJFC1hbW+PcuXNwcnKSOhKRmld3zvn+++8lTkKl3fr16zFnzhxUrVoVISEhUscpEEEQcP36dURFRUkdhUoBFspSZsmSJRAEAR06dMDJkydhY2MjdSSiXKytrfHpp5/i7NmzSE5OljoOlVJBQUEYOHAgLCwsEB4eDgMDA6kjFUjHjh1hbGzMs72pRLBQlhJZWVkYNWoUxowZg3HjxmHHjh0wNzeXOhbRW/38888QRRFTp06VOgqVQjExMWjfvj309PQQEhICKysrqSMVmIWFBdq3b89pbyoRPCmnFEhKSkK/fv1w6NAhLF26FL6+vlJHIsqXcuXKAQASExMlzUGlS3JyMuzs7JCYmIh9+/Zp9THmq1atwvDhw/H48WNUqFBB6jikwzhCqeNiY2PRokULBAUFYf/+/SyTpFV8fHzw4sULjrBQiVEqlXB2dsbz58+xePFirS6TAFS30d2/f7/UUUjHcYRSh126dAndunWDkZER9u7di/r160sdiahAkpOTYWlpiXr16iE8PFzqOFQKtGvXDoGBgRg+fDiWLl0qdZwi0bRpU1SrVg3bt2+XOgrpMI5Q6qg9e/agVatWqFatGs6dO8cySVrJwsICbm5uuHr1Kq9JScVu6NChCAwMhIeHh86USSDnbO+DBw8iPT1d6iikw1godYwoivj111/Ro0cPdO7cGYGBgahSpYrUsYg+2Lx58wDwEkJUvH799VesXr0aDg4OOHTokNRxipRcLkdKSgoCAwOljkI6jFPeOiQrKwsjR47EypUrMWnSJMyaNQt6evw3A2k/a2trvHz5EsnJyfydpiLn7+8PQRBgZWWF2NhYmJmZSR2pSImiiJo1a6JDhw5YsWKF1HFIR/GdWUe8ePECXbp0wZo1a/Dnn39izpw5/MNLOmP48OFIS0vDX3/9JXUU0jHXrl1Dr169YGxsjNDQUJ0rkwAgk8kgCAICAgKgVCqljkM6iiOUOiAmJgZdunTBgwcPsGPHDrRv317qSERFKisrC6amprCzs+NdP6jIxMfHo3r16khLS0NQUBDc3d2ljlRsTpw4gbZt2+L8+fNo3Lix1HFIB3EIS8udP38eTZs2RWpqKs6ePcsySTrJwMAAHTp0wN27dxEZGSl1HNIBCoUCTk5OSElJwYYNG3S6TAJAixYtYGVlxUtwUbFhodRiO3bsQOvWrVGjRg2EhISgTp06UkciKjYLFy4EAIwdO1biJKQL3NzcEBcXBz8/P/Tv31/qOMXOwMAAXbp04W0YqdiwUGohURQxb948eHt7QxAEHD9+HJUrV5Y6FlGxqlOnDqpXr45Dhw4hKytL6jikxXr16oXQ0FD07dsX06dPlzpOiZHL5bh69Sqio6OljkI6iIVSy2RmZmLIkCGYOHEifvjhB2zevBkmJiZSxyIqERMnTkRWVhbmzp0rdRTSUlOmTMHOnTvh6uqKLVu2SB2nRHl6esLIyIjT3lQseFKOFklMTIS3tzdOnTqF1atXw8fHR+pIRCVKqVTC3Nwc5cqVQ1xcnNRxSMusX78eX331FapWrYp79+7BwMBA6kglrnPnzkhPT8fx48eljkI6hiOUWiI6OhrNmjXD5cuXcfjwYZZJKpX09PTQo0cPPHr0COfOnZM6DmmRoKAgDBw4EBYWFggPDy+VZRLImfY+deoUEhISpI5COoaFUgucPXsWTZs2hUKhwNmzZ9GmTRupIxFJZsGCBQCA7777TuIkpC1iYmLg4eEBPT09hISEwMrKSupIkpHL5cjOzsb+/fuljkI6hoVSw23duhVt27aFo6MjQkJC4OjoKHUkIknZ2tqifv36OHPmDFJTU6WOQxouOTkZLi4uUCgU8Pf3R7169aSOJClbW1s0btyYZ3tTkWOh1FCiKGL27Nno168fvL29cfToUVSsWFHqWEQaYcaMGVAqlfjhhx+kjkIaTKlUwtnZGc+fP8fixYvh5eUldSSNIJfLceDAAWRkZEgdhXQIT8rRQAqFAr6+vli3bh2mTZsGPz8/yGQyqWMRaRRLS0vo6enh+fPnUkchDdWuXTsEBgZi+PDhWLp0qdRxNMbVq1fh5OSEAwcOoFOnTlLHIR3BEUoNk5CQAE9PT2zevBmbNm3CTz/9xDJJlIcvv/wSiYmJ2Lt3r9RRSAMNHToUgYGB8PDwYJl8Q/369fHxxx9z2puKFEcoNcidO3fQpUsXPHv2DLt27ULLli2ljkSksV6+fIly5crByckJYWFhUschDfLrr79i/PjxcHBwwM2bN6Gnx7GTN3377bfYvn07YmNjOWhBRYL/l2mIoKAguLm5AQBCQkJYJoneo2zZsmjatCmuXLmCJ0+eSB2HNERAQADGjx8PKysrhIWFsUy+hSAIePjwIS5duiR1FNIR/D9NA2zevBnt27fHp59+irNnz6JmzZpSRyLSCvPnzwcAfP/99xInIU1w7do19OzZE8bGxggNDYWZmZnUkTRWixYtUL58eU57U5HhlLeERFHEjBkzMG3aNPj4+GDVqlUwMjKSOhaRVqlSpQqSkpKQnJzM0ahSLD4+HtWrV0daWhqCgoLg7u4udSSNN2DAAFy9ehVXrlyROgrpAL77SiQjIwNffvklpk2bhpkzZ2Lt2rUsk0QfYNiwYUhLS8P69euljkISUSgUcHJyQkpKCjZs2MAymU+CICA8PBzR0dFSRyEdwBFKCTx79gw9evTA+fPnsX79evTt21fqSERaS6FQwMzMDNWrV8edO3ekjkMSaNiwIUJDQ+Hn54fp06dLHUdrJCUloWLFipg/fz5Gjx4tdRzSchyhLGG3b9+Gm5sbbt68icDAQJZJokIyMjKCh4cHoqKiEBUVJXUcKmG9evVCaGgo+vbtyzJZQGXKlEHbtm2xZ88eqaOQDmChLEEnT56Em5sbDAwMEBISwmkZoiKycOFCAMC4ceMkTkIlacqUKdi5cydcXV2xZcsWqeNoJUEQcPLkSd4ggAqNhbKEbNiwAR06dEDDhg1x9uxZ1KhRQ+pIRDqjXr16sLe3x4EDB5CVlSV1HCoB69evx5w5c1C1alWEhIRIHUdrdevWDdnZ2Thw4IDUUUjLsVAWM1EU8eOPP8LHxwdffvklDhw4gHLlykkdi0jnTJgwAZmZmViwYIHUUaiYBQUFYeDAgbCwsEB4eDgMDAykjqS1PvroI7i6unLamwqNJ+UUo/T0dHz99dfYsmUL5s6di++//553JCAqJkqlEmZmZrCyssJ///0ndRwqJjExMXB0dER2djbCwsJQr149qSNpvZ9//hnz58/H06dPYWxsLHUc0lIcoSwmT58+Rbt27bB7925s374dEyZMYJkkKkZ6enro3r074uLicOHCBanjUDFITk6Gi4sLFAoF/P39WSaLiFwuR1JSEk6ePCl1FNJiLJTF4ObNm3Bzc8Pdu3dx8uRJ9OrVS+pIRKXCq+nu7777TuIkVNSUSiWcnZ3x/PlzLF68GF5eXlJH0hlOTk6wt7fntDcVCgtlETt+/Djc3d1hamqKc+fOoUmTJlJHIio1PvroI9StWxdBQUFITU2VOg4VoVeXhho+fDhGjRoldRydIpPJIAgC/P39waPg6EOxUBahv/76C56enmjSpAmCg4Nhb28vdSSiUmf69OlQKpXw8/OTOgoVkaFDhyIwMBAeHh5YunSp1HF0klwux4MHDxAaGip1FNJSPCmnCCiVSkydOhW//PILfH198ccff/CsQyIJlS1bFgYGBkhISJA6ChXSokWLMG7cONSqVQsRERG8X3sxyczMROXKlTF69GheIJ4+CP/PLKS0tDT069cPc+fOxYIFC7B8+XKWSSKJDRgwAM+fP+e19bTc3r17MW7cOFhZWSE0NJRlshgZGhqic+fO8Pf3lzoKaalSOUKpVCpx584dXLp0CZGRkcjIyICxsTFq1KgBV1dX1KpVC/r6+u/dzuPHjyEIAq5evYq///4b3bt3L/7wRPReL1++RLly5eDs7IzLly9LHYc+wLVr1+Di4gJ9fX3cvn0bdnZ2UkfSeVu3bkW/fv1w7949HrJFBVaqhtIePHiA1atXY/Xq1YiLi3vrchUrVsTAgQPh6+v71jvaXL9+HV26dEFGRgZOnToFV1fX4opNRAVUtmxZNGnSBOfPn0d8fDwqVqwodSQqgPj4eLi5uUGpVOLUqVMskyXEy8sLhoaG8Pf354lPVGClYv5AoVBg+vTpqFGjBmbMmPHOMgnkvJnNmzcPDg4O+O6775CWlqb2/JEjR9CsWTNYWlri/PnzLJNEGmju3LkQRRHff/+91FGoABQKBZycnJCSkoINGzbA3d1d6kilRtmyZdG2bVtOe9MH0fkp7+joaPTs2RNhYWGqx/T19dGsWTM4OTnBwcEB5ubmSEtLQ1RUFMLDw3H69Gm1+wE7Ojpi586dqFu3LlatWoXhw4ejY8eO2Lp1K8qUKSPBqyKi/KhcuTJSUlKQkpIidRTKp4YNGyI0NBR+fn48OUQCS5cuxbfffounT5/yNsFUIDpdKO/cuYPWrVurbsOmr6+PL774Av3794e1tfVb13v27Bm2bt2KNWvWQKFQAAAqVKiArl27Yv369RgxYgR+++03nnxDpOF+/PFHzJw5E+vWrYOPj4/Uceg9vL29sWPHDvTt2xdbtmyROk6pFBsbCzs7O2zevBmfffaZ1HFIi+hsoUxMTETDhg0RHR0NAKhevTrmzp2LunXr5nsbUVFRmDRpEm7evKl67Oeff8YPP/xQ5HmJqOgpFAqYmZmhRo0auH37ttRx6B2mTp2K2bNnw9XVFRcvXpQ6Tqn26uTUf/75R+oopEV09hjKcePGqcpkzZo1sX79+gKVSQD45JNPsHbtWjg5Oakeu3jxIu8kQKQljIyM0K5dO0RGRqreD0jzbNiwAbNnz4atrS1CQkKkjlPqyeVy7N+/XzVDR5QfOjlCeezYMXh4eAAAzM3NsWvXLtjY2KgtM3Xq1HceeHz06FFUqVIFAPD8+XN0795ddZHkf//9F7179y6m9ERUlK5du4ZPP/0UgiBg9+7dUsehNwQHB6NVq1YwMzNDTEwMrKyspI5U6oWFhcHFxQWHDx9Ghw4dpI5DWkInC6WXlxcOHjwIAPjpp5/g7e2da5mwsDDExsbmevznn3+Gra1trj88R48exdixYwEATZo0wblz54o+OBEVC3t7e8TFxSE9PZ0Xx9YgMTExcHR0RHZ2NsLCwlCvXj2pIxEAURRRvXp1yOVy/P7771LHIS2hc++sUVFRqjJZtWpV9OjRI8/lnJ2d0a1bN7WPqlWrIi0tDV26dMm1fPv27VG7dm0AwPnz53mMD5EW+e6775CZmYmFCxdKHYX+Jzk5GS4uLlAoFPD392eZ1CAymQxyuRx79uzhIV6UbzpXKPft26f63NvbO193vHll//79kMlk6Ny5c67nZDKZ2jR3QEBA4YISUYkZMWIEjI2NsXjxYqmjEHLuVubs7Iznz59j8eLF8PLykjoSvUEQBMTGxqpdco/oXXSuUL4+cujm5pbv9TIzM3Ho0CE4OzujatWqeS7z+gV2L1269OEhiahE6enpQS6X4+HDh5xd0AAeHh6IiorC8OHDeUcWDdW6dWtYWlryIueUbzpXKMPDwwEABgYGcHBwyPd6Z86cQWJiYp7T3a989NFHqguZv9oPEWmHV9PdvHOOtIYOHYrAwEB4eHhg6dKlUsehtzA0NISXlxf27NkjdRTSEjpXKF++fAkg5xZSxsbG+V5v3759MDAwgKen51uXkclkqnsCv3jxonBBiahEVatWDbVr18apU6eQmpoqdZxSadGiRVi9ejVq1aqFQ4cOSR2H3kMQBISGhuL+/ftSRyEtoHOFUiaTAUCBDiROTU3FiRMn0Lx58/feaurVdnmmKJH2mTZtGpRKJaZNmyZ1lFJn7969GDduHKysrBAaGsr3UC3g5eUFAwMDnjNA+aJz/0dXqlQJQM61I5OTk/O1zrFjx956dvfrsrKyEBcXBwCqkUoi0h59+/aFhYUF1qxZI3WUUuXatWvo0aMHjI2NERoaCjMzM6kjUT5YWlqiTZs2nPamfNG5Quni4qL6PCIiIl/r7Nu3D2ZmZmjTps07l4uKikJGRgYAoGHDhh+ckYik079/fyQkJODw4cNSRykV4uPj4ebmBqVSicDAQNjZ2UkdiQpAEAScOHGCh3nRe+lcoWzcuLHq88DAwPcun5CQgHPnzqF9+/YwNTV957InTpxQfd6oUaMPzkhE0vnll18gk8kwefJkqaPoPIVCAScnJ6SkpGDDhg1qV8og7SCXy5GZmam6vjPR2+hcoRQEQXUyzu7du9978P3BgweRlZWVr+nubdu2Acg5TrNPnz5FE5iISlS5cuXQqFEjhIaGqm6nSsXDzc0NcXFx8PPzQ//+/aWOQx/Azs4Ozs7OnPam99K5QlmhQgX069cPAJCUlIQVK1a8c/l9+/bBysrqvdesXL9+PR4/fgwA6Nq1K+zt7YsmMBGVuF9++QWiKGLChAlSR9FZvXr1QmhoKPr27Yvp06dLHYcKQRAE7N+/H5mZmVJHIQ2mk/fyjoiIgLOzMxQKBfT09LBu3Tq1YysL6vbt2+jXrx8yMzOhp6eHM2fOoGnTpkWYmIhKWsWKFZGenp7vk/co/6ZOnYrZs2fD1dWVF5LXAZcvX4arqyuOHj2K9u3bSx2HNJTOjVACQJ06dVSXBVEqlRg5cmS+T9B5U3R0NIYNG6b6l9m4ceNYJol0gK+vL1JSUrBp0yapo+iUDRs2YPbs2bC1tUVISIjUcagIuLi4oFq1apz2pnfSyRFKIOeYx44dO6pOzDEzM8P333+PXr16qa5V+S6iKOLAgQOYPXu26uy2Ro0a4dSpU+89eYeINF9GRgbMzc3xySef4NatW1LH0QnBwcFo1aoVzMzMcO/ePVSoUEHqSFRERo4ciYCAANy7dy9ff0Op9NHZQgnkHEPp5eWF4OBg1WOurq748ssv0apVKxgYGORaR6lU4uzZs9i4caPaes7Ozjhy5AivP0mkQzw8PHDs2DHcu3ePx0UXUkxMDBwdHZGdnY2wsDDUq1dP6khUhA4fPgxPT0+EhYWhQYMGUschDaTThRLIuQvOqFGj8Ndff6k9Xr58edSvXx+1atWCmZkZ0tPTERkZiWvXriE+Pl5t2T59+mDlypXvvYsOEWmX8PBwNGjQAD169MDOnTuljqO1kpOTYWdnh8TEROzbtw9eXl5SR6IiplAoUKlSJYwfPx5+fn5SxyENpPOF8pUDBw5g1KhRiIqKyvc6H330ERYtWgRvb+9iTEZEUrKzs8Pjx4+RlpbG2wF+AKVSiVq1aiEqKgpLlizBqFGjpI5ExaRfv36IjIzEpUuXpI5CGqjUvHt6eXnh1q1b2LdvHwRBQPny5fNcztLSEl5eXti5cyeio6NZJol03Lhx46BQKPDbb79JHUUreXh4ICoqCsOHD2eZ1HFyuRyXL1/GgwcPpI5CGqjUjFC+SRRF3Lt3D5GRkcjIyICRkRE++eQT1KhRg6MURKWIUqmEqakpKlWqxD+UBeTr64tVq1bBw8MDR44ckToOFbPExERUqlQJixcvxvDhw6WOQxqm1BZKIqJXvL29sWPHDoSGhsLZ2VnqOFrht99+w9ixY+Hg4ICbN2/yH+KlhIeHB/T19XHo0CGpo5CG4TsAEZV6v/76KwBg/PjxEifRDnv37sW4ceNgZWWFsLAwlslSRC6XIzAwEC9fvpQ6CmkYvgsQUalnZ2cHR0dHnDx5Eunp6VLH0WjXrl1Djx49YGRkhNDQUJiZmUkdiUqQIAjIzMzEwYMHpY5CGoaFkogIwI8//ojs7Gzed/od4uPj4ebmBqVSicDAQNjZ2UkdiUqYvb09GjRoAH9/f6mjkIbhMZRERP9jYWEBExOTXNeipZzrEFavXh1xcXHYtGkT+vfvL3Ukkoifnx9+//13PHnyBIaGhlLHIQ3BEUoiov/57LPP8OzZMxw7dkzqKBrHzc0NcXFx+PHHH1kmSzlBEJCYmIjTp09LHYU0CEcoiYj+JyEhARUrVkTDhg1x8eJFqeNojFdnwfft2xdbtmyROg5JTBRFVKtWDd7e3rx+K6lwhJKI6H+srKzQsGFDXL58GQkJCVLH0QhTp07Fjh074OrqyjJJAACZTAa5XI49e/aAY1L0CgslEdFr5syZA1EUMWnSJKmjSG7Dhg2YPXs2bG1tERISInUc0iCCIODevXu4evWq1FFIQ3DKm4joDRUqVEBGRgaSk5OljiKZ4OBgtGrVCmZmZrh37x4qVKggdSTSIBkZGahUqRImTJiAH374Qeo4pAE4QklE9IahQ4ciJSUF//zzj9RRJBETE4P27dtDT08PISEhLJOUi7GxMTp16oQ9e/ZIHYU0BEcoiYjekJ6eDgsLCzg4OCAiIkLqOCUqOTkZdnZ2SExMxL59++Dl5SV1JNJQf//9NwYMGIAHDx6gatWqUschiXGEkojoDSYmJmjVqhVu3ryJ+/fvSx2nxCiVSjg7O+P58+dYvHgxyyS9U+fOnaGvr4+AgACpo5AGYKEkIsrDggULAADjxo2TOEnJ8fDwQFRUFIYPH45Ro0ZJHYc0XPny5dGqVStOexMAFkoiojw1bNgQVatWRUBAAJRKpdRxit0333yDwMBAeHh4YOnSpVLHIS0hCAKOHz+OpKQkqaOQxFgoiYjeYuzYsVAoFPj999+ljlKsfvvtN6xcuRIODg44dOiQ1HFIi8jlcigUCv7eEE/KISJ6G6VSCVNTU1SpUkVnj6Xcu3cv5HI5ypcvj9jYWJiZmUkdibSMk5MTGjRogI0bN0odhSTEEUoiorfQ09NDly5dEBsbi/DwcKnjFLlr166hR48eMDIyQmhoKMskfRBBELBv3z5kZmZKHYUkxEJJRPQOixYtAgCMHz9e4iRFKz4+Hm5ublAqlQgMDISdnZ3UkUhLCYKA58+fIzg4WOooJCEWSiKid7C3t4eDgwMCAwOhUCikjlMkFAoFnJyckJKSgg0bNsDd3V3qSKTFGjZsCFtbW57tXcqxUBIRvYefnx+ys7Mxffp0qaMUCTc3N8TFxeHHH39E//79pY5DWk5PTw9yuRx79uwBT8sovXhSDhFRPlhYWMDU1BRPnz6VOkqheHt7Y8eOHejbty+2bNkidRzSEQcOHEDnzp1x9epV1K9fX+o4JAGOUBIR5UOfPn0QHx+P48ePSx3lg02dOhU7duyAq6sryyQVqXbt2sHCwoLT3qUYRyiJiPIhPj4elStXRqNGjXD+/Hmp4xTYhg0b4OPjA1tbW8TExMDAwEDqSKRjevfujZiYGK38/4MKjyOURET5ULFiRbi4uODixYtITEyUOk6BBAcH4+uvv4a5uTnCw8NZJqlYyOVyXLhwAf/995/UUUgCLJRERPk0e/ZsiKKIyZMnSx0l32JiYtC+fXvo6enh3LlzqFChgtSRSEd16dIF+vr6CAgIkDoKSYBT3kREBVChQgUoFAqtuHdxcnIy7OzskJiYiL1796Jz585SRyId17ZtW5iammL//v1SR6ESxhFKIqICGDRoEJKTk/Hvv/9KHeWdlEolnJ2d8fz5cyxevJhlkkqEXC7HsWPHkJycLHUUKmEcoSQiKoD09HSYm5vD0dERN27ckDrOW7Vr1w6BgYEYPnw4li5dKnUcKiXu3r2LTz75BNu3b0evXr2kjkMliCOUREQFYGJigpYtWyIiIgIPHjyQOk6evvnmGwQGBqJ9+/Ysk1SiatSogfr16/PyQaUQCyURUQEtWLAAADBu3DiJk+T222+/YeXKlXBwcMDhw4eljkOlkFwux759+5CVlSV1FCpBnPImIvoAVatWxbNnz5Camgo9Pc34t/nevXshl8tRvnx5xMbGwszMTOpIVAqdP38eTZs2xYkTJ9C6dWup41AJ0Yx3QSIiLTNmzBhkZGRozJTytWvX0KNHDxgZGSE0NJRlkiTTqFEj2NjYcNq7lOEIJRHRB8jKyoKZmRlsbGwQExMjaZb4+HhUr14daWlpCAoKgru7u6R5iHx9fXHs2DFERkZCJpNJHYdKAEcoiYg+gIGBATp37oz79+/j2rVrkuVQKBRwcnJCSkoKNmzYwDJJGkEQBERFRWn0lRCoaLFQEhF9oIULFwKQ9uQcNzc3xMXF4ccff0T//v0ly0H0unbt2sHc3JzT3qUIp7yJiAqhVq1auHv3LlJTU2FkZFSi+/b29saOHTvQp08fbN26tUT3TfQ+vXr1wsOHDxESEiJ1FCoBHKEkIiqEqVOnIjs7GzNnzizx/e7YsQOurq4sk6SRBEHAuXPnEBcXJ3UUKgEcoSQiKiRzc3OYm5vjyZMnJbK/DRs2wMfHB7a2toiJiYGBgUGJ7JeoIJ49e4bKlStjxYoVGDJkiNRxqJhxhJKIqJD69OmDp0+f4tSpU8W+r+DgYHz99dcwNzfHlStXWCZJY1WoUAEtWrTgcZSlBAslEVEhzZ8/HzKZDBMmTCjW/cTExKB9+/bQ09PDuXPnULFixWLdH1FhCYKAo0ePIjk5WeooVMxYKImICqlixYpo0KABzp8/j5cvXyI+Ph7+/v4oyiOKkpOT4eLiAoVCgT179qBevXpFtm2i4iIIAjIyMnDkyBGpo1AxY6EkIioCM2fOhCiKqruECIJQZNfgUyqVcHZ2xvPnz7F48WJ07ty5SLZLVNw++eQT1K1bl9PepQALJRFRIYiiiFWrVqmmuyMjI5GVlQUg52SdouDh4YGoqCgMHz4co0aNKpJtEpUUQRCwd+9e1f8XpJtYKImICuHq1avw9fXNczSyQoUKBd5eRkYGmjVrht9//x0AMGzYMAQGBqJ9+/Yac99wooIQBAHPnj3D2bNnpY5CxYiXDSIiKqQlS5bg22+/VTtmUl9fH5mZmQW+j/HJkyfRpk0bAEDLli1x+vRpODg44ObNm9DT4xgAaR+lUomqVauif//+WLBggdRxqJjw3YmIqJBGjx6NnTt3wtjYWPVYmTJlClwmAeD48ePQ19cHAJw+fRqGhoYIDg5mmSStpaenh27dumHPnj1FeqIaaRaOUBIRFZFz586hQ4cOSEpKgoWFBZKSkpCYmIjQ0FD8999/yM7OhoWFBerVqwcHB4c8S2KzZs3UpgZlMhkaNGiAgwcPokqVKiX5coiKzL59+9C1a1fcuHEDderUkToOFQMWSiKiInT37l04ODgAABwcHHDr1q08lytTpgw8PT0xbNgwtG3bFjKZDKmpqShbtiyys7NVy+np6UGpVGL58uX45ptvSuQ1EBW1tLQ0VKxYET/++CMmTZokdRwqBpxDISIqIqmpqVi6dClkMhmUSuVbyyQAJCUlYfv27Wjfvj2cnZ1x4cIFBAYGqsrkq+ny9u3bw9/fn7euI61mamoKT09PXj5Ih3GEkoioCFy6dAmfffYZIiMjVY/JZDJ8+umnqFu3Luzt7WFgYIDnz5/j5s2bCAsLQ0JCgmpZfX19lClTBomJiTA2NsaoUaPg6+uLmjVrSvFyiIrcunXrMHDgQPz333+wtraWOg4VMRZKIqJCOnXqFLp06aK6vZyRkREGDBiAPn36oGrVqnmuk5mZiWPHjmHNmjW4efOm6nFbW1vcunULFhYWJZKdqKTEx8ejSpUqWLlyJQYPHix1HCpiLJRERIVw7do1NGvWDElJSQCATz/9FDNnzkSNGjXytX5mZib++usvLF++XDXdPXLkSNV1KIl0SatWrWBpaYmAgACpo1AR4zGUREQfKDMzE1988YWqTLZo0QJr167Nd5kEAENDQ/j6+uLXX3+FgYEBAOCPP/7AwYMHiyUzkZTkcjmOHj2KlJQUqaNQEWOhJCL6QPPmzUNYWBgAoGbNmli0aJHatSgvXLiATz/9NM+PK1euqG2rXbt2mDx5surrwYMHq6bQiXSFIAhIT0/HkSNHpI5CRcxA6gBERNooLS0NCxcuBJBzQs3MmTNhYmKS57L9+/dHvXr11B6zs7PLtVzv3r1x9OhRnD17Fg8fPsSGDRswfPjwog9PJBEHBwfUqVMHe/bsQffu3aWOQ0WIhZKI6ANs3boVz58/BwB4eXnlKoyva9iwITp27PjebcpkMowfPx7e3t4AgGXLlmHYsGEfdMcdIk0ll8uxZs0aZGdnq+4KRdqPU95ERB9g27Ztqs/79u373uVTUlKQlZX13uUcHR3h4uICALh+/braGeBEukAQBMTHx6vdEYq0HwslEVEBiaKIixcvAgAsLS3RoEGDdy7/448/ws3NDY0aNcLAgQNx/fr1dy7fqlUr1eev9kOkK5o2bYrKlSvzIuc6hoWSiKiAHj16hCdPngAA6tat+9YpaUNDQ3To0AGTJk3CkiVLMHLkSERGRsLHxwcRERFv3X7dunVVn4eGhhZteCKJ6enpoVu3bvD395c6ChUhFkoiogJ69uyZ6nMbG5u3Lufs7Ixff/0VPXr0QNu2bTF48GD8/fffkMlkWLx48VvXe/0uIq/vi0hXCIKA27dv85AOHcJCSURUQK/fD6KgJ8zY2dmhbdu2OH/+vOpC5m96fZu89wTpIg8PD5iamnLaW4ewUBIRFVC5cuVUnz9+/LjA61tbWyMzMxNpaWl5Pv9qOh0AypcvX+DtE2k6U1NTdOzYkdPeOoSFkoiogD766CNYWVkBAG7cuFHgUcQHDx7A2NgYZmZmeT7/+vGVzs7OH5yTSJMJgoCzZ89+0D/KSPOwUBIRFZBMJkOjRo0AAAkJCW89DiwhISHXY7du3UJgYCDc3d2hp5f3W3BQUJDqc1dX1yJITKR5unbtCgDYu3evxEmoKMhEHqBDRFRgy5cvV93FxtvbGz/99FOuZQYNGgRjY2M4OzvDysoKUVFR2LFjBwwMDLBp06Y87/kdHR0NuVwOAPjkk09w+/bttxZPIm3XokULVKhQgcdS6gC+SxERfYABAwbAwsICAODv74+7d+/mWqZdu3ZITEzEhg0bMGvWLBw6dAjt27fHli1b8iyToiji999/V309bNgwlknSaYIg4MiRI0hNTZU6ChUSRyiJiD7Q5MmT8csvvwAAnJycsG7dOhgaGn7w9g4cOIAJEyYAACpWrIjbt2/zpBzSabdv34ajoyP27NmjGpkn7cR/+hIRfSA/Pz/UqlULABAeHo4ffvghX7dXzMuFCxfg5+en+nrZsmUsk6TzatWqpSqUpN1YKImIPpCpqSk2bNgAY2NjAMD+/fsxYsSIAp21Kooi/v33XwwbNgzp6ekAgP79+6N3797FkplI0wiCgICAgLdel5W0A6e8iYgKKSAgAN7e3lAoFACAMmXKYOjQoejRowcsLS3zXEcURVy4cAGrVq3CuXPnVI97eXlh165dqpJKpOvOnDmD5s2bIzg4GM2aNZM6Dn0gFkoioiJw/Phx9O/fH48ePVI9ZmxsDHd3d9StWxd2dnbQ19fHixcvEBERgUuXLuHevXtq2xg6dCh+//13GBkZlXB6IulkZ2fDxsYGX3/9NebOnSt1HPpALJREREUkISEB3377LTZu3Fig9ezs7LBq1Sp4enoWUzIizTZo0CAEBwfz3t5ajMdQEhEVESsrK2zYsAEREREYM2aM6m46edHT00OrVq2wZcsWREZGskxSqSYIAm7duoVbt25JHYU+EEcoiYiKiSiKuHv3Li5duoS4uDhkZWXBwsIC9evXh7OzM8zNzaWOSKQRUlNTUbFiRUyfPh3ff/+91HHoA7BQEhERkeQEQcCzZ8/Ubj1K2oNT3kRERCQ5uVyOM2fO4OnTp1JHoQ/AQklERESS69q1KwBg7969EiehD8FCSURERJKrUqUK3N3dedccLcVCSURERBpBLpfj8OHDSEtLkzoKFRALJREREWkEQRCQlpaGo0ePSh2FCoiFkoiIiDRC7dq1UatWLU57ayEWSiIiItIYcrkcAQEBUCqVUkehAmChJCIiIo0hCAKePHmCc+fOSR2FCoCFkoiIiDSGu7s7KlasyGlvLcNCSURERBpDX18fXbt2hb+/v9RRqABYKImIiEijCIKAiIgIREZGSh2F8omFkoiIiDRKhw4dYGJiwmlvLSITRVGUOgQRERHR67p164YXL17g1KlTUkehfOAIJREREWkcQRAQHByM+Ph4qaNQPrBQEhERkcbp1q0bRFHE3r17pY5C+cBCSURERBqnSpUqaNq0Kc/21hIslERERKSRBEHAoUOHkJaWJnUUeg8WSiIiItJIgiAgNTUVx48flzoKvQcLJREREWmk2rVro2bNmrx8kBZgoSQiIiKNJJPJIAgCAgICoFQqpY5D78BCSURERBpLEAQ8evQIFy5ckDoKvQMLJREREWksd3d3VKhQgdPeGo6FkoiIiDSWgYEBunbtykKp4VgoiYiISKMJgoAbN27gzp07Ukeht2ChJCIiIo3WoUMHGBsb8yLnGkwmiqIodQgiIiKid+natSuSkpJw8uRJqaNQHjhCSURERBpPEAQEBQXh2bNnUkehPLBQEhERkcbr2rUrlEol9u3bJ3UUygMLJREREWk8GxsbNG3alGd7aygWSiIiItIKcrkchw4dQnp6utRR6A0slERERKQVBEFASkoKjh8/LnUUegMLJREREWmFunXr4pNPPuG0twZioSQiIiKtIJPJIJfLERAQAKVSKXUceg0LJREREWkNQRAQFxeHixcvSh2FXsNCSURERFqjefPmsLKy4rS3hmGhJCIiIq1hYGCALl268DaMGoaFkoiIiLSKIAi4du0a7t69K3UU+h8WSiIiItIqnp6eMDIy4rS3BpGJoihKHYKIiIioIDp37oy0tDQEBgZKHYXAEUoiIiLSQoIg4PTp00hISJA6CoGFkoiIiLRQt27dkJ2djf3790sdhcBCSURERFrI1tYWjRs35nGUGoKFkoiIiLSSIAg4ePAgMjIypI5S6rFQEhERkVYSBAHJyck8MUcDsFASERGRVqpXrx4+/vhjTntrABZKIiIi0koymQyCIMDf3x9KpVLqOKUaCyURERFpLUEQ8N9//+Hy5ctSRynVWCiJiIhIa7Vo0QLly5fntLfEWCiJiIhIaxkYGKBLly4slBJjoSQiIiKtJggCrl69iujoaKmjlFoslERERKTVPD09YWRkBH9/f6mjlFoyURRFqUMQERERFYaXlxcyMjJw/PhxqaOUShyhJCIiIq0nCAJOnTqF58+fSx2lVGKhJCIiIq3XrVs3ZGdnY//+/VJHKZVYKImIiEjrVa1aFY0aNeLZ3hJhoSQiIiKdIJfLcfDgQWRkZEgdpdRhoSQiIiKdIAgCkpKScOLECamjlDoslERERKQTPv30U1SvXp3T3hJgoSQiIiKdIJPJIJfL4e/vD14VsWSxUBIREZHOEAQBDx8+xOXLl6WOUqqwUBIREZHOaNmyJcqVK8dp7xLGQklEREQ6w9DQEJ07d+ZtGEsYCyURERHpFEEQcOXKFdy7d0/qKKUGCyURERHplE6dOsHQ0BABAQFSRyk1ZCJPgyIiIiId4+npiezsbBw9elTqKKUCRyiJiIhI5wiCgJMnTyIxMVHqKKUCCyURERHpHLlcjqysLBw4cEDqKKUCCyURERHpnI8++ggNGzbk5YNKCAslERER6SRBEHDgwAEoFAqpo+g8FkoiIiLSSYIg4OXLlzh58qTUUXQeCyURERHpJCcnJ9jb23PauwSwUBIREZFOkslkkMvl8Pf3B6+SWLxYKImIiEhnCYKA2NhYhIWFSR1Fp7FQEhERkc5q1aoVLC0tOe1dzFgoiYiISGcZGhqic+fOLJTFjIWSiIiIdJogCAgLC8P9+/eljqKzWCiJiIhIp3Xq1AmGhobw9/eXOorOkok87YmIiIh0XMeOHSGKIo4cOSJ1FJ3EEUoiIiLSeYIg4MSJE3jx4oXUUXQSCyURERHpvG7duiErKwsHDhyQOopOYqEkIiIinWdnZwcXFxee7V1MWCiJiIioVBAEAQcOHIBCoZA6is5hoSQiIqJSQS6X48WLFzh16pTUUXQOCyURERGVCs7OzqhWrRqnvYsBCyURERGVCjKZDHK5HP7+/uBVE4sWCyURERGVGoIg4P79+7hy5YrUUXQKCyURERGVGq1bt0bZsmV515wixkJJREREpYaRkRG8vLx4HGURY6EkIiKiUkUQBFy+fBmxsbFSR9EZLJRERERUqnh5ecHAwAABAQFSR9EZMpGnOREREVEp4+HhAX19fRw6dEjqKDqBI5RERERU6giCgMDAQLx48ULqKDqBhZKIiIhKHblcjszMTI5QFhEWSiIiIip17O3t0aBBA57tXURYKImIiKhUEgQB+/fvR2ZmptRRtB4LJREREZVKgiAgMTERp0+fljqK1mOhJCIiolLJxcUFH330Eae9iwALJREREZVKMpkMcrkce/bsAa+iWDgslERERFRqCYKAmJgYXL16VeooWo2FkoiIiEqt1q1bo0yZMpz2LiQWSiIiIiq1jI2N4eXlxUJZSCyUREREVKoJgoBLly7hwYMHUkfRWiyUREREVKp5eXlBX18fAQEBUkfRWjKRpzURERFRKde+fXsYGhri4MGDUkfRShyhJCIiolJPEAQcP34cL1++lDqKVmKhJCIiolJPLpcjMzMThw4dkjqKVmKhJCIiolKvevXqcHJy4tneH4iFkoiIiAg509779u1DZmam1FG0DgslEREREXKmvRMTExEUFCR1FK3DQklEREQEwNXVFba2tvD395c6itZhoSQiIiICIJPJIJfLsWfPHvCqigXDQklERET0P4IgIDo6GteuXZM6ilZhoSQiIiL6n7Zt28LCwoLT3gXEQklERET0P8bGxujUqRMvH1RALJRERERErxEEARcuXMB///0ndRStwUJJRERE9JrOnTtDX18fAQEBUkfRGjKRpzERERERqWnbti1MTU2xf/9+qaNoBY5QEhEREb1BEAQcO3YMSUlJUkfRCiyURERERG+Qy+VQKBQ4fPiw1FG0AgslERER0Rtq1KiB+vXr82zvfGKhJCIiIsqDIAjYt28fsrKypI6i8VgoiYiIiPIgCAISEhIQHBwsdRSNx0JJRERElAdXV1fY2Nhw2jsfWCiJiIiI8qCnpwe5XI49e/aAV1l8NxZKIiIiorcQBAF3797FjRs3pI6i0VgoiYiIiN6ibdu2MDc357T3e7BQEhEREb2FiYkJOnXqxEL5HiyURERERO8gCALOnz+PuLg4qaNoLBZKIiIionfo3Lkz9PX1ERAQIHUUjSUTedoSERER0Tu1adMG5ubm2Ldvn9RRNBJHKImIiIjeQxAEHDt2DMnJyVJH0UgslERERETvIZfLkZGRgcOHD0sdRSOxUBIRERG9xyeffIJ69erxbO+3YKEkIiIiygdBELBv3z5kZWVBFEWkpqbi+fPnSEtLkzqa5FgoiYiIiPLB3d0dz549Q8uWLWFtbQ1zc3NYWVnBzMwMVatWhVwux8KFCxEfHy911BLHs7yJiIiI3iEqKgpTpkzBzp07kZWV9d7ljY2N0bdvX8yePRtVq1YtgYTSY6EkIiIiyoNSqcTSpUsxadIkpKamqj1XtmxZ1KxZE6ampkhNTcWdO3eQlJSktoylpSV+++03+Pj4QCaTlWT0EsdCSURERPSGrKwsDBo0CBs2bFA9ZmVlBW9vb8jlctjZ2amVRKVSiejoaOzevRs7d+7Ey5cvVc+NGTMGixYt0ulSyUJJRERE9BpRFDFw4ECsW7dO9Vjfvn0xbtw4mJmZvXf9Fy9eYO7cuWp31hk/fjwWLFhQHHE1AgslERER0WvWrFmDwYMHAwAMDAwwb948dOjQocDb2bVrF6ZNmwalUqn6unv37kUZVWOwUBIRERH9T2xsLOrXr6+asp4/fz46deqktoxCocAff/yBvXv34uXLl6hVqxZGjhyJZs2a5dre1q1bMXPmTABAlSpVcP36dVSoUKH4X0gJ42WDiIiIiP5n5syZqjIpCEKuMgkAP/zwAzZu3IguXbpg4sSJ0NPTw4gRI3D58uVcy/bp0wetW7cGADx+/BgLFy4s3hcgEY5QEhERESHn2EdbW1ukpqbCzMwMhw8fhqWlpdoyV69exeeff47x48fjq6++AgBkZGSgR48esLKywqZNm3Jt99GjR/Dy8kJWVhYqV66M+/fvw9jYuCReUonhCCURERERgG3btqkuD9StW7dcZRIAjhw5An19fXh7e6seMzY2Rs+ePXHlyhU8evQo1zrW1tZo3749AODJkyfYv39/Mb0C6bBQEhEREQE4e/as6vMuXbrkuUxERATs7e1hYWGh9nj9+vUBADdv3sxzvc6dO6s+DwkJKWxUjcNCSURERATg0qVLAAA9PT3Url07z2Xi4+NRqVKlXI+/euzJkyd5rveqcL6+H13CQklEREQE4P79+wAAW1tbmJqa5rlMeno6DA0Ncz1uZGQEIOd4yrxUrlwZZcqUUduPLmGhJCIiIgKQmZkJ4P/LYV5MTExUy71OoVAAwDtPtnlVRPNaX9uxUBIREREBqrvgJCcnv3WZihUr4unTp7kef/VY5cqV81wvOzsbKSkpAPDW0U9txkJJREREBKiOm3zy5AmePXv21mViYmJylc6rV6+qbeNNMTExqunwOnXqFFVkjcFCSURERATA1dVV9XlYWFiey3To0AHZ2dnYvn276jGFQoHdu3fDyckJ1tbWea73+kXPX9+PrmChJCIiIgJU14oEgB07duS5jJOTEzp27IjFixfj119/xbZt2zBo0CD8999/GDt27Fu3vXPnTtXn7dq1K7rQGoJ3yiEiIiJCznGONWrUwP379yGTybBt2zY4OjrmWi4jIyPPe3k3b948z+1euHABAwcOBAA4Ozvj8uXLkMlkxfpaShoLJREREdH/zJ8/HxMmTACQc6zj33//nedlgvIrNTUVvXr1woMHDwAAa9asUZVLXcIpbyIiIqL/GT16tOoi5BEREZgzZw4+dOwtOzsbP/74o6pMtmjRQnX/b13DEUoiIiKi11y6dAlubm7IysoCAHTv3h1Tpkwp0OV+kpKS4Ofnh6NHjwLIuSRRWFgYHBwciiWz1DhCSURERPQaV1dXrF+/XnWc4+7du9G7d2+EhIS8d7RSqVTixIkT6NGjh6pMGhoaYtu2bTpbJgGOUBIRERHlaevWrfDx8VG7naKDgwMEQUD9+vXh4OAAExMTpKWl4fbt27hy5Qr27NmDe/fuqZa3sLDAtm3b0KlTJwleQclhoSQiIiJ6i5s3b2LgwIE4e/Zsgdf18PDAn3/+CXt7+2JIplk45U1ERET0FrVr18bp06exadMmuLu752uddu3aYceOHTh8+HCpKJMARyiJiIiI8u3GjRsICgrCpUuXcPfuXWRkZMDExAQODg5wdXVFy5YtdfpYybdhoSQiIiKiQuGUNxEREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVCgslERERERUKCyURERERFQoLJREREREVyv8BJIs403Ek9VUAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABltElEQVR4nO3dd1gU1/4G8Hdh6dUaRFHBgoJSpInGGo2aa2I0lhi7ibG3mGiMiVFvjLH9ElOsMdFEjSnGbmy5xC5NmoKAgAV7pbdlz+8PLnsdAenM7vJ+nmefsDNnZr+7EHyZM+cchRBCgIiIiIioggzkLoCIiIiIdBsDJRERERFVCgMlEREREVUKAyURERERVQoDJRERERFVCgMlEREREVUKAyURERERVQoDJRERERFVCgMlEREREVUKAyURERERVQoDJRERERFVCgMlEREREVUKAyURERERVQoDJRERERFVilLuAoiIqGao1Wrk5ubKXQYRaQkjIyMYGhpWybkYKImIaoHc3FwkJSVBrVbLXQoRaRFbW1vY2dlBoVBU6jwMlEREek4Igdu3b8PQ0BAODg4wMODdTkS1nRACmZmZuHfvHgCgUaNGlTofAyURkZ5TqVTIzMyEvb09zM3N5S6HiLSEmZkZAODevXto2LBhpbq/+WcqEZGey8/PBwAYGxvLXAkRaZvCPzLz8vIqdR5eoSQiqiUqc49URASwYQMQHg6kpQFWVoCHBzBxIuDuXmUlElENq+y9k4UYKImIqETBwcCsWcDZs0X3nTsHrFsHdO4MfPkl4ONT4+URkZZglzcRERXr4EGga9eiYdLUVPr8zJmCdgcP1lxt+kihUGDPnj1ylwEA2LJlC2xtbct1zNixY/H6668/t03z5s3x1VdfVbiuyvjnn3+gUCjw5MmTGn/tRYsWwcPDo8ZftyYxUBIRURHBwcDgwUB2dsFzZ2dg7VogJQXIygKePCl47uxcsD87u6B9cHDV1TB27FgoFAooFAoYGRnB0dERc+fORXZhUVWke/fumDVrVpnaKRQK7Ny5U7L9q6++QvPmzau0poq4evUqFAoFGjZsiLS0NMk+Dw8PLFq0qMznGjZsGOLi4qq4QtJnDJRERFTErFn/C5NDhwKRkcDkyYC1dcE2G5uC5xERwJAhBduys4HZs6u2jr59++L27dtITEzEl19+iQ0bNuDTTz+t2hcpB1NTU3z88ceVHsBQndLS0rBq1apKncPMzAwNGzasoorkxwn9qx8DJRERSYSH/6+b29kZ+PlnoKQB4iYmwLZt/7tSeeZMQcisKiYmJrCzs4ODgwNef/119OrVC8eOHdPsz8nJwYwZM9CwYUOYmprixRdfRPAzl0lPnDgBX19fmJiYoFGjRvjwww+hUqkAFFwFPXHiBNasWaO5Gnr16tUS6xk+fDiePHmCTZs2PbfudevWoUWLFjA2NoazszN+/vlnyf74+Hh07doVpqamcHFxkbynQjdu3MDQoUNha2uLunXrYsCAAc+trdD06dPxf//3f5r5BYuTk5OD999/H40bN4aFhQX8/Pzwzz//aPYX1+X92WefoWHDhrCyssI777yDDz/8sNhu3FWrVqFRo0aoV68epk6dWiR8p6WlYfjw4bCwsEDjxo3x3XffSfZfv34dAwYMgKWlJaytrTF06FDcvXtXs7+4rvVZs2ahe/fumufdu3fHtGnTMGvWLNSvXx99+vTR7AsNDYW3tzfMzc3RqVMnxMbGSs5V2veutPoA4IsvvsALL7wAKysrvP3221V+VV0bMVASEZHExo3/+3rmzJLDZCFjY2DGjOKPr0oXL17E2bNnJdMfzZ07F7t27cLWrVtx4cIFtGzZEn369MGjR48AADdv3sQrr7wCHx8fREREYN26ddi8eTM+++wzAMCaNWvg7++PCRMm4Pbt27h9+zYcHBxKrMHa2hoLFizAkiVLkJGRUWyb3bt3Y+bMmZgzZw4uXryIiRMnYty4cQgICABQsATmoEGDYGxsjMDAQKxfvx7z5s2TnCMvLw99+vSBlZUVTp06hTNnzsDS0hJ9+/Yt9Wrb8OHD0bJlSyxZsqTENtOmTcO5c+ewc+dOREZGYsiQIejbty/i4+OLbb99+3YsXboUy5cvR2hoKJo2bYp169YVaRcQEICEhAQEBARg69at2LJlC7Zs2SJps3LlSri7uyMsLAwffvghZs6cqQnUarUaAwYMwKNHj3DixAkcO3YMiYmJGDZs2HPfc3G2bt0KY2NjnDlzBuvXr9dsX7BgAVavXo2QkBAolUqMHz9es68s37vS6vvtt9+waNEifP755wgJCUGjRo2wdu3actevcwQREem1rKwsER0dLbKyssrU3t9fCKDg8eRJ2V7jyZP/HdOpUyWKfcqYMWOEoaGhsLCwECYmJgKAMDAwEH/88YcQQoj09HRhZGQktm/frjkmNzdX2NvbixUrVgghhPjoo4+Es7OzUKvVmjbfffedsLS0FPn5+UIIIbp16yZmzpxZaj2F7bKzs0WzZs3EkiVLhBBCfPnll6JZs2aadp06dRITJkyQHDtkyBDxyiuvCCGEOHLkiFAqleLmzZua/X/99ZcAIHbv3i2EEOLnn38uUndOTo4wMzMTR44cKba+pKQkAUCEhYWJw4cPCyMjI3HlyhUhhBDu7u7i008/FUIIce3aNWFoaCh5fSGEeOmll8T8+fOFEEL8+OOPwsbGRrPPz89PTJ06VdK+c+fOwt3dXfN8zJgxolmzZkKlUkne97BhwzTPmzVrJvr27Ss5z7Bhw0S/fv2EEEIcPXpUGBoaiuvXr2v2X7p0SQAQQUFBmtcZMGCA5BwzZ84U3bp10zzv1q2b8PT0lLQJCAgQAMTx48c12w4ePCgAaP7fKO17V5b6/P39xZQpUyTn8PPzk3xW2qS8vx9KwiuUREQkUTiew9S04F7JsrCxKej+fvr4qtCjRw+Eh4cjMDAQY8aMwbhx4/DGG28AABISEpCXl4fOnTtr2hsZGcHX1xcxMTEAgJiYGPj7+0vm2uvcuTPS09ORnJxcoZpMTEywZMkSrFq1Cg8ePCiyPyYmRlJT4Ws+XZODgwPs7e01+/39/SXtIyIicOXKFVhZWcHS0hKWlpaoW7cusrOzkZCQUGqNffr0wYsvvohPPvmkyL6oqCjk5+ejdevWmnNbWlrixIkTJZ47NjYWvr6+km3PPgcAV1dXyWorjRo1KtL1/ux79ff3L/LZPH2V2MXFBba2tpo2ZeXl5VXsdjc3N0l9ADQ1lvV797z6YmJi4OfnV+Q96jvOQ0lERBJWVgX/zc4uGNVdllCZkgLk5EiPrwoWFhZo2bIlAOCHH36Au7s7Nm/ejLfffrvqXqQCRo4ciVWrVuGzzz6rlhHe6enp8PLywvbt24vsa9CgQZnO8cUXX8Df3x8ffPBBkXMbGhoiNDS0yFJ7lpaWFS8aBYH+aQqFAmq1ulLnfJaBgQGEEJJtxQ2SsrCwKPb4p2ss/EOjqmusjXiFkoiIJJ4eZ7FjR9mOeTr3VNd0ewYGBvjoo4/w8ccfIysrSzNw4syZM5o2eXl5CA4OhouLCwCgbdu2OHfunCSAnDlzBlZWVmjSpAmAgiUpC5enLE8ty5Ytw7p164oMlGnbtq2kpsLXfLqmGzdu4Pbt25r958+fl7Tv0KED4uPj0bBhQ7Rs2VLysCnjZWNfX18MGjQIH374oWS7p6cn8vPzce/evSLntrOzK/Zczs7ORQY7Pfu8rJ59r+fPn0fbtm0B/O+zuXHjhmZ/dHQ0njx5ovn8GjRoIPnsACA8PLxCtTyrrN+759XXtm1bBAYGFnmPeq9KOuCJiEhrlfceqbCw/90P6ewsRE7O89tnZxe0KzwmPLzyNQtR/L1yeXl5onHjxmLlypVCiIJ75+zt7cVff/0lLl26JMaMGSPq1KkjHj16JIQQIjk5WZibm4upU6eKmJgYsWfPHlG/fn3N/YRCCDFhwgTh4+MjkpKSxP379zX3Vj6ruHstu3TpIkxNTSX3UO7evVsYGRmJtWvXiri4OLF69WphaGgoAgIChBBC5OfnCxcXF9G7d28RHh4uTp48Kby8vCT3UGZkZIhWrVqJ7t27i5MnT4rExEQREBAgpk+fLm7cuFFsfU/fQ1koNjZWKJVKYWpqKnnPI0aMEM2bNxe7du0SiYmJIjAwUHz++efiwIEDQoii91Bu27ZNmJmZiS1btoi4uDjx73//W1hbWwsPD4/nfr+evbexWbNmwtraWixfvlzExsaKb7/9VhgaGorDhw8LIYRQq9XCw8NDdOnSRYSGhorAwEDh5eUlOcfhw4eFQqEQW7duFXFxcWLhwoXC2tq6yD2Uz36vCu+hfPz4sWZbWFiYACCSkpLK9L0rS307d+4Upqam4ocffhCxsbFi4cKFwsrKSu/voWSgJCLScxX5B6NTp/8FxKFDSw6V2dlCDBnyv7adO1dR0aL4gCKEEMuWLRMNGjQQ6enpIisrS0yfPl3Ur19fmJiYiM6dO2sGRxT6559/hI+PjzA2NhZ2dnZi3rx5Ii8vT7M/NjZWdOzYUZiZmUnCxbOKCylnz54VACSBUggh1q5dK5ycnISRkZFo3bq1+OmnnyT7Y2NjxYsvviiMjY1F69atxeHDhyWBUgghbt++LUaPHq15b05OTmLChAkiJSWl2PqKC5RCCPHuu+8KAJJAmZubKxYuXCiaN28ujIyMRKNGjcTAgQNFZGSkEKJooBRCiCVLloj69esLS0tLMX78eDFjxgzRsWNHzf6yBsrFixeLIUOGCHNzc2FnZyfWrFkjOebatWvitddeExYWFsLKykoMGTJE3LlzR9Jm4cKF4oUXXhA2NjZi9uzZYtq0aVUSKIUo/XtXlvqWLl2q+azGjBkj5s6dq/eBUiHEMzciEBGRXsnOzkZSUhIcHR1h+uy6iSUIDi5YTvHplXJmzABGjCi4pzIlpaCb++uvgcJp/MzMgBMnuKZ3bdG7d2/Y2dkVmaeRdEtFfj8Uh4NyiIioCB8f4I8//rf8YmwsMHVqwcPE5H8DcAqZmQG//84wqa8yMzOxfv169OnTB4aGhvjll19w/PjxYidkp9qJg3KIiKhY//oXcPIk8MwsKkXCZOfOBVcm//WvmquNapZCocChQ4fQtWtXeHl5Yf/+/di1axd69eold2mkJXiFkoiISuTjA5w+XbCc4saNBcsypqUVTA3k4QG8+y7g7i53lVTdzMzMcPz4cbnLIC3GQEmkQ4QQCAoKwvnz5xEaGork5GSoVCqYm5vDxcUFXl5e6NGjh2TCZKKq4O4OPLPkMhGRBgMlkQ7IzMzE5s2bsXbtWly+fLnYNkeOHAEAGBoaYsCAAZgxYwa6detWk2USEVEtxXsoibTcqVOn4O7ujhkzZpQYJp+Wn5+PP//8E927d8eYMWPw+PHjGqiSiIhqM16hJNJSQggsX74cH330kWSVD29vb/Tr1w+urq5wdHSEUqlEamoqLl++jAsXLmDPnj24f/8+AOCnn37C8ePH8ddff0nWryUiIqpKnIeSSEstWrQIixcv1jx3d3fHJ598Amdn5+cel5eXh/3792PVqlVIS0sDANSpUwf//PMPQ2UtVVXzzBGR/qmq3w8MlERa6Ndff8Wbb76peT516lRMmDABhoaGZT7HnTt3MGfOHERGRgIAmjRpgqioKNja2lZ1uaTlquofjJupN3Es8RhSc1JhbWKN3k690di6cRVWSkQ1rap+P/AeSiItc/fuXUydOlXz/IMPPsCkSZPKFSYBwM7ODhs3bkT79u0BAMnJyZgzZ06V1kq1Q8itEAz6dRCar2mOcXvHYebhmRi3dxyar2mOQb8OQsitELlLlM3Vq1ehUCgQHh4udymVMnbsWLz++utyl6GhUCiwZ8+eMrf/559/oFAo8OTJkxLbLFq0CB4eHpWuraKaN2+Or776qsZft6Z+RhkoibTMRx99hIcPHwIAXn75ZYwePbrYdtHR0Zg+fTo6d+4MHx8fDBw4ENu3b5e0sbCwwOrVq2FhYQEA+OGHH3D27NnqfQOkV3bH7MaLP7yI3Zd3Q6VWSfap1Crsvlywf8/lPVX+2mPHjoVCoYBCoYCxsTFatmyJJUuWQKVSlX5wBXTv3l3zek8/Jk2aVC2vVx5btmyBQqFA3759JdufPHkChUKBf/75R57CnlL4+e3cuVOy/auvvkLz5s3Lda7bt2+jX79+VVgdVTcGSiIt8uDBA00otLKywoIFC4ptd/bsWYwcORKPHj3CxIkTMW/ePHTt2hV3794t0rZRo0Z47733NM+//fbb6ime9E7IrRAM3zUcOfk5z22Xk5+DN/94s1quVPbt2xe3b99GfHw85syZg0WLFmHlypVV/jqFJkyYgNu3b0seK1asqLbXKw+lUonjx48jICBA7lJKZGpqio8//hh5eXmVOo+dnR1MTEyqqCp55efnQ61Wy11GtWOgJNIiW7duRc5/17UbNGgQ6tatW6RNeno6PvroI3Tt2hU///wzRo8ejcGDB2P27NmS4Pi0119/HXXq1AEA/PHHH5pR4ETP8/mpz0sNk4Vy8nOw7PSyKq/BxMQEdnZ2aNasGSZPnoxevXph3759AIDHjx9j9OjRqFOnDszNzdGvXz/Ex8drjr127RpeffVV1KlTBxYWFnB1dcWhQ4ee+3rm5uaws7OTPKytrTX7g4KC4OnpCVNTU3h7eyMsLKzIOfbt24dWrVrB1NQUPXr0wNatW4t0x54+fRpdunSBmZkZHBwcMGPGDGRkZDy3NgsLC4wfPx4ffvjhc9tFRUWhZ8+eMDMzQ7169fDuu+8iPT1dsz8/Px/vvfcebG1tUa9ePcydOxfPDqdQq9VYtmwZHB0dYWZmBnd3d/zxxx/PfV0AGD58OJ48eYJNmzY9t93evXvRoUMHmJqawsnJCYsXL5ZceX62y/vs2bPw8PDQfO579uwpths3NDQU3t7eMDc3R6dOnRAbG1vktTds2AAHBweYm5tj6NChSElJkbzvJUuWoEmTJjAxMYGHhwcOHz6s2V9c13p4eDgUCgWuXr0KoOBqsq2tLfbt2wcXFxeYmJjg+vXrAArmFB4/fjysrKzQtGlTbNy4UVJbad+70uoDyvYzWh0YKIm0yN9//635etCgQcW2OXToEB4+fIgZM2bAwMAAmZmZpf71a2xsjFdffRVAwSjw06dPV13RpJdupt7Evth95TpmX+w+3Ey9WU0VFTAzM0Nubi6Agi7xkJAQ7Nu3D+fOnYMQAq+88orm6tjUqVORk5ODkydPIioqCsuXL4elpWWFXzs9PR39+/eHi4sLQkNDsWjRIrz//vuSNklJSRg8eDBef/11REREYOLEiUV6GhISEtC3b1+88cYbiIyMxK+//orTp09j2rRppdawaNEiREVFlRjuMjIy0KdPH9SpUwfBwcH4/fffcfz4ccm5V69ejS1btuCHH37A6dOn8ejRI+zevVtynmXLluGnn37C+vXrcenSJcyePRsjR47EiRMnnluftbU1FixYgCVLlpQYkE+dOoXRo0dj5syZiI6OxoYNG7BlyxYsXbq02Papqal49dVX0b59e1y4cAH//ve/MW/evGLbLliwAKtXr0ZISAiUSiXGjx8v2X/lyhX89ttv2L9/Pw4fPoywsDBMmTJFs3/NmjVYvXo1Vq1ahcjISPTp0wevvfaa5A+VssjMzMTy5cvx/fff49KlS2jYsCGAgs++MORNmTIFkydP1oTesnzvSquvLD+j1UYQkVZQq9WiYcOGAoCwtbUVkZGRIioqqsijd+/ewtLSUmzcuFE0b95cABBmZmZi6NChIiQkpNhjoqKixFdffSUACABiwYIFcr9dqkFZWVkiOjpaZGVllfmYH8N+FFiEcj+2hG2psrrHjBkjBgwYIIQo+P/j2LFjwsTERLz//vsiLi5OABBnzpzRtH/w4IEwMzMTv/32mxBCiPbt24tFixaV+fW6desmjIyMhIWFheSxbds2IYQQGzZsEPXq1ZN8juvWrRMARFhYmBBCiHnz5ol27dpJzrtgwQIBQDx+/FgIIcTbb78t3n33XUmbU6dOCQMDgxK/Rz/++KOwsbERQgjx4YcfitatW4u8vDzx+PFjAUAEBAQIIYTYuHGjqFOnjkhPT9cce/DgQWFgYCDu3LkjhBCiUaNGYsWKFZr9eXl5okmTJprPOjs7W5ibm4uzZ89Kanj77bfF8OHDn/v5zZw5U2RnZ4tmzZqJJUuWCCGE+PLLL0WzZs007V566SXx+eefS479+eefRaNGjTTPAYjdu3cLIQo+42c/902bNkk+94CAAAFAHD9+XPK+AWiO+/TTT4WhoaFITk7WtPnrr7+EgYGBuH37thBCCHt7e7F06VJJbT4+PmLKlCmS1yn8XgohRFhYmAAgkpKShBAF3ysAIjw8XHKeZs2aiZEjR2qeF/7OX7dunRCibN+70uory8/osyry+6E4nNicSEs8fvwY9+7dAwA4OztDoVAU2+7atWvIz8/HzJkzMXDgQMycORPBwcHYsWMH0tLSSrzf6+n5K2NiYqr+DZBeSc1JrdHjSnLgwAFYWloiLy8ParUab731FhYtWoS///4bSqUSfn5+mrb16tWDs7Oz5ud7xowZmDx5Mo4ePYpevXrhjTfeKHUu1hEjRhS5ovjCCy8AKPj/xs3NTTK1ir+/v6RtbGwsfHx8JNt8fX0lzyMiIhAZGSkZRCeEgFqtRlJSEtq2bfvcGufNm4cNGzbghx9+wNChQyX7YmJi4O7urhmIBwCdO3eGWq1GbGwsTE1Ncfv2bcnnplQq4e3tren2vnLlCjIzM9G7d2/JuXNzc+Hp6fnc2oCC2xSWLFmC6dOnY/LkyUX2R0RE4MyZM5Irkvn5+cjOzkZmZibMzc0l7WNjY4t87s9+poWe/v42atQIAHDv3j00bdoUANC0aVM0bvy/qa78/f01n425uTlu3bqFzp07S87ZuXNnRERElPq+n2ZsbFzsz9rT2xQKBezs7DS/90v73pmZmZVaX1l+RqsLAyWRlsjKytJ8bWVlVWK7zMxMZGVlYejQoZg/fz4AoFevXsjLy8Pvv/+OqVOnolmzZkWOe/qcmZmZVVg56SNrE+vSG1XhcSXp0aMH1q1bB2NjY9jb20OpLPs/W++88w769OmDgwcP4ujRo1i2bBlWr16N6dOnl3iMjY0NWrZsWRWllyg9PR0TJ07EjBkziuwrDD7PY2tri/nz52Px4sXo379/tdQHAAcPHpSELwBlHigzcuRIrFq1Cp999lmREd7p6elYvHhxsbf1VHbifSMjI83XhX+UV+WAGAODgjsFxVP3nBY3AMnMzKzYiwJP11dYo74M2OE9lERa4ulfNIX3iBWn8Bfus1NqvPLKKwBQ4l/ST//SMzY2rnCdVDv0duoNpUH5rjkoDZTo5dSrSuuwsLBAy5Yt0bRpU0mYbNu2LVQqFQIDAzXbHj58iNjYWLi4uGi2OTg4YNKkSfjzzz8xZ86cUgeLPE/btm0RGRmJ7Oxszbbz589L2jg7OyMkRDraPTg4WPK8Q4cOiI6ORsuWLYs8yvr/5vTp02FgYIA1a9YUqTEiIkJy/+KZM2dgYGAAZ2dn2NjYoFGjRpLPTaVSITQ0VPP86YEkz9bn4OBQpvoMDAywbNkyrFu3TjNY5en3HxsbW+z7LwxsT3N2dkZUVJRmwCJQ9DMtq+vXr+PWrVua5+fPn9d8NtbW1rC3t8eZM2ckx5w5c0bzM9WgQQMABdMaFaqq+R1L+96Vpb6y/IxWFwZKIi1Rt25dmJmZAQASExNLbFf4C61evXpFjgcKbmAvTkJCguZrY2NjyS9nomc1tm6MV1u/Wq5jXnN+rcZWzmnVqhUGDBiACRMm4PTp04iIiMDIkSPRuHFjDBgwAAAwa9YsHDlyBElJSbhw4QICAgJK7U7OzMzEnTt3JI/Hjx8DAN566y0oFApMmDAB0dHROHToEFatWiU5fuLEibh8+TLmzZuHuLg4/Pbbb9iyZQuA/10xmzdvHs6ePYtp06YhPDwc8fHx2Lt3b5kG5RQyNTXF4sWL8fXXX0u2jxgxAqamphgzZgwuXryIgIAATJ8+HaNGjdJ03c+cORNffPEF9uzZg8uXL2PKlCmSUctWVlZ4//33MXv2bGzduhUJCQm4cOECvvnmG2zdurXMNf7rX/+Cn58fNmzYINm+cOFC/PTTT1i8eDEuXbqEmJgY7Ny5Ex9//HGx53nrrbegVqvx7rvvIiYmBkeOHNF87iXdGlSSws8mIiICp06dwowZMzB06FDY2dkBKFhIYvny5fj1118RGxuLDz/8EOHh4Zg5cyYAaEL1okWLEB8fj4MHD2L16tXlqqEkZfnelVZfWX5GqwsDJZGWUCqVmlUckpOTS1zxofAv0cL7bgoVTgVUOD3Qsy5evKj5+o8//oC1tTU6duyIWbNm4ZdffkFiYmKRqUOodvuoy0cwMSxbF6ep0hTzX5xfzRVJ/fjjj/Dy8kL//v3h7+8PIQQOHTqkudqfn5+PqVOnom3btujbty9at26NtWvXPvecmzZtQqNGjSSP4cOHAwAsLS2xf/9+REVFwdPTEwsWLMDy5cslxzs6OuKPP/7An3/+CTc3N6xbt05zT2Zhd7GbmxtOnDiBuLg4dOnSBZ6enli4cCHs7e3L9f7HjBkDJycnyTZzc3McOXIEjx49go+PDwYPHoyXXnpJMv/snDlzMGrUKIwZMwb+/v6wsrLCwIEDJef597//jU8++QTLli3TfH4HDx6Eo6NjuWpcvny55GoZAPTp0wcHDhzA0aNH4ePjg44dO+LLL78s9lYdoGDk+P79+xEeHg4PDw8sWLAACxcuBFD+LvKWLVti0KBBeOWVV/Dyyy/Dzc1N8jMxY8YMvPfee5gzZw7at2+Pw4cPa6aBAgp6kn755RdcvnwZbm5uWL58OT777LNy1VCSsnzvSquvLD+j1YVreRNpkffff1/z1+78+fPx1ltvFWkTExODoUOH4pVXXpH8opg7dy6OHTuGI0eOaKaoKCSEwODBgxEXFwcA2LlzJ+7evYvAwEAEBgZqrl42aNAAfn5+moevry9sbGyq6+1SDanMWr17Lu/Bm3+8+dz5KE0MTbBz8E683ub1Slaqn5YuXYr169fjxo0bcpeiN7Zv345x48YhJSVF07NDFVNVa3lzUA6RFhk7dqwmUO7cuRNDhw4tMgihbdu2GDhwIHbv3o38/Hx4e3sjODgYR48exTvvvFMkTAJASEiIJkz6+/tj2LBhkv33799HUFAQAgMDcf78eaxatQopKSlQKBRo06YN/Pz80LFjR/j5+aFdu3blGhhBuu31Nq/j9PjTWHZ6GfbF7pMsv6g0UOI159cw/8X58Lb3lrFK7bJ27Vr4+PigXr16OHPmDFauXFmu7mwq6qeffoKTkxMaN26MiIgIzJs3D0OHDmWY1CK8QkmkZbp164aTJ08CAGbPnl1kYl6gYIDN999/jz179uDevXuwt7fHm2++iVGjRhVpm5ubi2HDhuHKlSsACn4xF9fuaWq1GnFxcTh//rzmKmZkZCTy8/Nhbm4OLy8vTcD08/NDkyZNquCdU3WpqisQN1Nv4njicaTmpMLaxBq9nHrV2D2TumT27Nn49ddf8ejRIzRt2hSjRo3C/Pnz+YdYJaxYsQJr167FnTt30KhRI7z++utYunRpkSmGqPyq6vcDAyWRljl37hw6d+4MIQSMjY2xdetWtGvXrkLnEkLgiy++wI4dOwAAnp6eCAwMLDJ1RVlkZmYiNDRUEzDPnz+P5ORkAIC9vb0kYHp7e0vmUiN5VdU/GESkfxgoifTY0/dS2tjYYN26dWjfvn25zqFWq/HNN9/g+++/B1BwM3lISEipEzuXx61btyQBMyQkBBkZGTA0NES7du00AbNjx45o06ZNsVOCUPVjoCSikjBQEumx7Oxs9O3bV7NurlKpxOTJkzF+/PgydZvdvHkTn376qWSuuU2bNuGdd96ptpqBgvnsoqOjJV3l0dHREELA2tpaM6KzMGgWd78nVT0GSiIqCQMlkZ5LS0vDa6+9hn/++UezzcHBAcOGDUPfvn0185IVUqlUuHTpEnbt2oVDhw5J5pn87rvvMGXKlJoqXSI1NRXBwcGSK5mFUx45OjpKRpV7enoy8FQDBkoiKgkDJVEtkJOTg08//RQrV64ssjxXgwYN4OjoCCMjI6SkpCA+Pr7IZOUODg74/vvv8fLLL9dk2c8lhMC1a9ckAfPChQvIycmBkZERPDw8JF3lLVq0KPfkxSTFQElEJWGgJKpFgoKC8PHHH+PYsWNlam9jY4N33nkHCxcuhLV11a6tXB1yc3MRGRkp6SqPj48HULAikK+vr6ar3NfXt8TJ26l4DJREVBIGSqJaKC4uDlu3btVc1Xt6NZ0WLVrAy8sLvXv3xvDhw3V+lPXDhw81c2MWPgqXwGvdurXkXkw3N7cKjVyvLRgoiagkDJREtZwQAhkZGVCpVDAzM9Ms66avhBCIj4+XdJVHRERApVLB1NQUXl5ekq5yBwcHdpX/FwNl9bl69SocHR0RFhamWTqVSJdU1e8HzuFBpKMUCgUsLS1ha2ur92ESKHi/rVu3xqhRo/Dtt98iJCQEqampOHPmDD7//HM0btwYu3btwrBhw9CsWTPY29vj9ddfxxdffIGAgACkpaXJ/RaonMaOHQuFQgGFQgFjY2O0bNkSS5YsgUqlKv3gCujevbvm9Z5+TJo0qVpej0ifcNp+ItJZZmZm6NSpEzp16qTZdufOHUk3+dKlS5Geng4DAwO4urpKRpW7uLjA0NBQxndApenbty9+/PFH5OTk4NChQ5g6dSqMjIwwf/78anm9CRMmYMmSJZJtXI2FqHS8QklEesXOzg4DBgzA559/jr///htPnjxBVFQUNm7ciI4dOyIwMBATJ06Em5sbbG1t0bNnT8yfPx979+7FnTt35C6fnmFiYgI7Ozs0a9YMkydPRq9evbBv3z4AwOPHjzF69GjUqVMH5ubm6Nevn2YwFwBcu3YNr776KurUqQMLCwu4urri0KFDz309c3Nz2NnZSR5PD2wLCgrSTG/l7e2NsLCwIufYt28fWrVqBVNTU/To0QNbt26FQqGQ3PN8+vRpdOnSBWZmZnBwcMCMGTOQkZFRyU+LSD68QklEeq1w1Z527drh7bffBgCkp6cjJCREM6p869at+OKLLwAATZs21dyH6efnhw4dOsDMzEzOt0BPMTMzw8OHDwEUdInHx8dj3759sLa2xrx58/DKK68gOjoaRkZGmDp1KnJzc3Hy5ElYWFggOjoalpaWFX7t9PR09O/fH71798a2bduQlJSEmTNnStokJSVh8ODBmDlzJt555x2EhYXh/fffl7RJSEhA37598dlnn+GHH37A/fv3MW3aNEybNg0//vhjhesjkhMDJRHVOpaWlujevTu6d+8OoGDAT3JysmTaoo8//hhZWVlQKpVwc3OTjCpv1aqV7i8j6e0NyHFF1s4OCAkp92FCCPz99984cuQIpk+frgmSZ86c0dzysH37djg4OGDPnj0YMmQIrl+/jjfeeEOzbKmTk1Opr7N27VrNcqWFNmzYgBEjRmDHjh1Qq9XYvHkzTE1N4erqiuTkZEyePFnS1tnZGStXrgQAODs74+LFi1i6dKmmzbJlyzBixAjMmjULANCqVSt8/fXX6NatG9atW8eBU6STGCiJqNZTKBRwcHCAg4MDhgwZAgDIy8tDVFSUJmD+/fffWLt2LQCgTp068PX11VzJ9PX1Rb169eR8C+V35w5w86bcVZTqwIEDsLS0RF5eHtRqNd566y0sWrQIf//9N5RKJfz8/DRt69WrB2dnZ8TExAAAZsyYgcmTJ+Po0aPo1asX3njjjVLXsh8xYgQWLFgg2Va4KlVMTAzc3Nwkgc/f31/SNjY2Fj4+PpJtvr6+kucRERGIjIzE9u3bNduEEFCr1UhKSkLbtm1L+1iItA4DJRFRMYyMjNChQwd06NBBcwXq8ePHCA4O1lzJ/O677zQDOFq2bCnpKnd3d4exsbGcb+H57Ox04nV79OiBdevWwdjYGPb29mVay77QO++8gz59+uDgwYM4evQoli1bhtWrV2P69OklHmNjY4OWLVuWq8bySk9Px8SJEzFjxowi+5o2bVqtr01UXRgoiYjKqE6dOnj55Zc1S1kKIZCYmCjpKv/tt9+Ql5cHExMTeHp6SrrKmzdvrj1zY1ag21kOFhYWxQa8tm3bQqVSITAwUNPl/fDhQ8TGxsLFxUXTzsHBAZMmTcKkSZMwf/58bNq06bmB8nnatm2Ln3/+GdnZ2ZqrlOfPn5e0cXZ2LjLwJzg4WPK8Q4cOiI6OrvbgSlSTdPwmICIi+SgUCrRo0QIjRozA119/jcDAQKSmpuLcuXNYvnw5HB0dsXfvXgwfPhxOTk6ws7PDa6+9hqVLl+L48eNISUmR+y3orFatWmHAgAGYMGECTp8+jYiICIwcORKNGzfGgAEDAACzZs3CkSNHkJSUhAsXLiAgIKDU7uTMzEzcuXNH8ihcoemtt96CQqHAhAkTEB0djUOHDmHVqlWS4ydOnIjLly9j3rx5iIuLw2+//YYtW7YAgOaPiXnz5uHs2bOYNm0awsPDER8fj71792LatGlV/CkR1SBBRETV6u7du2L//v1iwYIFolevXsLa2loAEAqFQri4uIhx48aJDRs2iPDwcJGXl1flr5+VlSWio6NFVlZWlZ+7Oo0ZM0YMGDCgxP2PHj0So0aNEjY2NsLMzEz06dNHxMXFafZPmzZNtGjRQpiYmIgGDRqIUaNGiQcPHpR4vm7dugkARR59+vTRtDl37pxwd3cXxsbGwsPDQ+zatUsAEGFhYZo2e/fuFS1bthQmJiaie/fuYt26dQKA5PMPCgoSvXv3FpaWlsLCwkK4ubmJpUuXVuyDIqqEqvr9wKUXiYhqmFqtRmxsrKSrPDIyEmq1Gubm5vDx8ZFMwN64ceNKvR6XXpTX0qVLsX79ety4cUPuUoiK4FreRER6JCMjA6GhoZK1ym/+dxR2kyZNJAHTy8sLFhYWZT43A2XNWrt2LXx8fFCvXj2cOXMG06dPx7Rp0/DZZ5/JXRpREQyURER67ubNm5pwGRgYiJCQEGRmZsLQ0BDt27eXjCp3dnYucW5MBsqaNXv2bPz666949OgRmjZtilGjRmH+/PnlGqFOVFMYKImIahmVSoVLly5JuspjYmIghICNjQ18fHwko8obNGgAgIGSiErGQElEREhJSUFwcLCkq/z+/fsAClaG8fPzQ/fu3eHr64vWrVvD3Nxc5oqJSJswUBIRURFCCFy9elXSVX7//n188803aNiwIczNzWFhYaF5mJiYaM/cmERU47KysnD16lUGSiIier7MzEzExsaibt26yM/PR0ZGBnJycgAASqVSEjAtLCx4rx9RLfLw4UPcu3cPrVu3hqGhYYXPw98aRER6zszMDHXr1kVeXh7s7e1hYGCAvLw8ZGdnIzMzUzOZt1qtBgAYGxvD3NwcZmZmMDc3h6mpKa9iEukZIQQyMzNx79492NraVipMArxCSURUK+Tm5iIpKUkTGp8lhIBKpUJOTg5ycnKQm5uL3NxcAAUrvBgbG8PExAQmJiYwNjbmVUwiPWFraws7O7tK/9HIQElEVEuo1WpNSCyL7OxsREdHIyIiAhEREYiMjMStW7cAAA0bNoSbmxvc3d3h7u4OV1fXcs2NSUTyMzIyqvSVyUIMlEREVGa3b9/WjCgPDAxEUFAQMjIyYGBggHbt2mmmLOrYsSPatGlTZf9YEZF2Y6AkIqIKy8/PR3R0tGRU+aVLlyCEgJWVlWYZycL5MV944QW5SyaiasBASUREVSotLQ0hISGSCdjv3LkDAGjWrJlk8vUOHTpwsnUiPcBASURE1UoIgevXr0u6ykNDQ5GdnQ2lUgkPDw/JWuWtWrXiqHIiHcNASURENS4vLw+RkZGSrvK4uDgAQN26deHr66vpKvf19UXdunVlrpiInoeBkoiItMKjR48QHBws6Sp/9OgRAKBVq1aSrnI3NzcYGxvLXDERFWKgJCIirSSEwJUrVyRd5eHh4cjLy4OJiQm8vLwkXeXNmjVjVzmRTBgoiYhIZ2RnZyMsLEzSVX716lUAwAsvvCCZtsjHxwdWVlbyFkxUSzBQEhGRTrt79y6CgoI0ATMoKAhpaWlQKBRwcXGRdJW7urpybkyiasBASUREeiU/Px+XL1+WdJVHRUVBrVbDwsJCMzdm4cPe3l7ukol0HgMlERHpvfT0dISGhkq6yguXkXRwcJB0lXfo0AHm5uYyV0ykWxgoiYioVkpOTpYEzJCQEGRlZcHQ0BBubm6SrvLWrVvDwMBA7pKJtBYDJREREQrmxrx48aKkqzwmJgYAYGtrq5kbs/BRv359mSsm0h4MlERERCV48uQJgoODJVcyHzx4AABo0aKFpKvc3d0dJiYmMldMJA8GSiIiojISQiApKUky+XpYWBhyc3NhbGwMT09PSVe5o6Mj58akWoGBkoiIqBJycnIQHh4u6SpPSEgAADRo0EDSTe7r6wsbGxuZKyaqegyUREREVez+/fsICgrSdJUHBQUhJSUFCoUCbdq0kXSVt2vXDkqlUu6SiSqFgZKIiKiaqdVqxMXFSe7FjIyMRH5+PszNzeHl5SXpKm/SpIncJROVCwMlERGRDDIzMzVzYxY+bty4AQCwt7eXBExvb29YWFjIXDFRyRgoiYiItMStW7c04fL8+fMICQlBRkYGDA0N0a5dO0lXeZs2bTg3JmkNBkoiIiItpVKpEB0dLRlVHh0dDSEErK2t4ePjI7mS2bBhQ7lLplqKgZKIiEiHpKamaubGLLySee/ePQBA8+bNJQHT09MTpqamMldMtQEDJRERkQ4TQuDatWuSgHnhwgXk5OTAyMgIHh4ekq7yFi1acG5MqnIMlERERHomNzcXkZGRklHl8fHxAIB69erB19dXcyXT19cXderUkbli0nUMlERERLXAw4cPNXNjFj4eP34MAGjdurWkq9zNzQ1GRkYyV0y6hIGSiIioFhJCID4+XtJVHhERAZVKBVNTU3h5eUm6yh0cHNhVTiVioCQiIiIAQFZWFsLCwiRd5deuXQMA2NnZacJl4dyYVlZWMldM2oKBkoiIiEp0584dSTd5cHAw0tLSYGBgABcXF0lXuYuLCwwNDeUumWTAQElERERllp+fj5iYGElX+aVLl6BWq2FpaQkfHx/JlUw7Ozu5S65SQggkJSUhNDQUiYmJyMnJgampKVq2bAlvb+9ae2sAAyURERFVSnp6OkJCQiQTsN++fRsA0LRpU0nA7NChA8zMzGSuuPySk5OxceNGbN68Gbdu3SqxXbNmzfDuu+/i7bffxgsvvFCDFcqLgZKIiIiqlBACycnJkoAZGhqKrKwsKJVKuLm5SbrKW7VqpbXLSObk5GDJkiVYsWIFVCpVmY8zMTHBokWL8P7770OpVFZjhdqBgZKIiIiqXV5eHqKioiRd5bGxsQCAOnXqwNfXVxMw/fz8UK9ePZkrBuLi4jBo0CBcunRJs83Q0BAdO3ZE+/bt0bJlS5iZmSEzMxNxcXGIjIxEUFAQno5WPj4+2LVrFxwcHOR4CzWGgZKIiIhk8fjxY80ykoVXMx8+fAgAaNmypaSr3N3dHcbGxjVW26VLl9CjRw/cv38fAKBUKjFmzBgMHz78uV3ZycnJ+Pnnn7Fz506o1WoABd3+J06cQPPmzWuidFkwUBIREZFWEEIgMTFR0lUeFhaGvLw8mJiYwNPTU9JV3rx582oZAHPv3j14enpq7pVs2bIlvvjiCzg7O5f5HBEREfjwww+RnJysOceFCxf0dqolBkoiIiLSWtnZ2QgPD5d0lSclJQEAGjZsKOkm9/HxgY2NTannTElJwc6dOzF27FiYmJhI9gkhMGTIEOzatQsA4Orqio0bN8La2rrctT948ADjx4/X1Dtx4kSsX7++3OfRBQyUREREpFPu3buHoKAgzZXMoKAgpKamQqFQoG3btpKucldX1yKDYr766ivMnj0bPj4+2LNnD+zt7TX7du/ejUGDBgEouLfzzz//RP369TX7g4ODMX78+GLr2rZtG9zd3SXbbty4gTfeeANZWVkAgBMnTqBr165V8jloE/0fdkRERER6pWHDhujfvz/69+8PAFCr1YiNjZV0lW/duhVqtRrm5ubw9vaWdJWfO3cOBgYGCAsLg4eHB/bu3Qt/f38AwMqVKzWv89FHH0nC5NNGjBgBV1dXybamTZsWaefg4ID33nsPS5cuBQCsWrVKLwMlr1ASERGR3snIyEBoaKikq/zmzZsAAAMDA82AGQMDAygUCqxfvx7e3t7w9PQEALRq1Qq7du0qco9m4RXK1atX4+WXXy5TLSqVCn379sXdu3ehUCiQmJiodwN0tHPSJyIiIqJKsLCwQNeuXfHBBx/gjz/+QHJyMpKTk/HDDz9owiRQcHUzPz8fEyZMwODBgzXbhw0bVuqAn4yMjDLNTalUKjXnFkLgwIEDFXxX2ouBkoiIiGqFxo0bS7qwFQqFZkJ1pVKJx48fa/Z16tTpuef65JNP0LFjR3h7e2P8+PGSuSqL8/T5QkNDK1K+VuM9lERERFRrXLlyBQBgamqKrl27olevXujRowc8PT3h6OiIR48ewcrKCk2aNCn2eCMjI/Tu3RtdunSBra0tEhISsHXrVowZMwY///wz2rZtW+xxrVu3hqGhIfLz8xEREVFt708uvIeSiIiIao309HTExsbCzc0NRkZGkn02NjZITU2Fo6Mj9u3bV+ZzXr9+HW+88Qa8vLyeOy1Q165d8fjxYzg6OiIxMbHC70EbscubiIiIag1LS0t4eXkVCZMANPdMlvdaW9OmTdGjRw8EBQUhPz+/xHaF59XWdcsrQ//eEREREVEFFN5feffu3ecGw+LY2dkhLy9PM9/ks9LT0/HkyRMA0Ip1yqsaAyURERERoJkyKCsrS7O6TVklJyfDxMQE5ubmxe6PiYnRfN2hQ4eKF6mlGCiJiIiIAHh7e2u+/ueff4pt8+jRoyLbYmNjERAQAH9//xK7s0+cOFHs6+gLDsohIiIiApCQkIBWrVpBCAF7e3scOnQIhoaGkjZvv/02TExM4OHhgbp16yIhIQG7du2CUqnEtm3b4OTkVOS82dnZeOmll5CamgoTExMkJyeXuAKPruIVSiIiIiIALVq0QN++fQEAt27dwvbt24u06dmzJ548eYKffvoJS5cuxZEjR/DSSy9h586dxYZJANi4cSNSU1MBAEOHDtW7MAnwCiURERGRxpkzZ9ClSxcIIWBiYoLffvutxKBYFpGRkRg9ejTy8/NhZGSECxcuoF27dlVYsXbgFUoiIiKi/+rcuTOmT58OAMjJycHEiRNx48aNCp0rLi4O06ZN04wY/+STT/QyTAK8QklEREQkkZGRgc6dO2tWtKlbty4WLlyIl156qUzHCyGwf/9+fPHFF0hLSwMAdOnSBX///Xex81/qAwZKIiIiomfcvXsXL730kmSN7u7du2P06NHw9vbWTIL+NLVajbNnz2Lr1q04f/68Zruvry+OHDkCW1vbmihdFgyURERERMV4+PAhxo0bh/3790u229vbo127dmjZsiXMzMyQmZmJuLg4REVF4d69e5K2I0aMwLp162BlZVWTpdc4BkoiIiKiEgghsGPHDsyZMwd3794t83EODg745ptvMGDAgGqsTnswUBIRERGVIjc3F3/++Sc2bdqEc+fOFbvEoqWlJV588UVMnDgR/fv3h1KplKFSeTBQEhEREZVDWFgYOnTogMaNG+PLL7+EqakpWrVqhdatW5e4Uo6+Y6AkIiIiKiOVSgV3d3dER0fD3NwcaWlptTZEPo2fABEREVEZrVy5EtHR0QCAzMxMnDp1SuaKtAMDJREREVEZXLx4EQsXLtQ8NzQ0LHZ5xtqIXd5EREREpVCpVPDx8UFUVJRm5RsAsLa2xv3792FsbCxjdfLjFUoiIiKiUpw8eRLh4eFQq9WS7ampqThy5IhMVWkPBkoiIiKiUnTv3h1Hjx7Fd999B3Nzc5iYmMDDwwMNGjRATk6O3OXJjl3eREREROVgamqK1q1bIzIyUu5StAavUBIRERGVkVqtRk5ODpo1ayZ3KVqFgZKIiIiojK5cuQIAcHZ2lrkS7cJASURERFRGgYGBAAAPDw95C9EyDJREREREZRQREQEA8PPzk7kS7cJASURERFRGly9fBgC0aNFC5kq0CwMlERERURldv34dJiYmXL/7Gfw0iIiIiMro7t27sLW1lbsMrcNASURERFRGKSkpeOGFF+QuQ+swUBIRERGVAeegLBkDJREREVEZxMXFAQDatGkjcyXah4GSiIiIqAwK56B0d3eXuRLtw0BJREREVAaFc1B27NhR5kq0DwMlERERURnExsYCABwdHWWuRPswUBIRERGVwfXr12Fqaso5KIvBT4SIiIioDO7evQsbGxu5y9BKDJREREREZZCSkgI7Ozu5y9BKDJREREREpVCr1cjNzUXz5s3lLkUrMVASERERlSImJgYA4OzsLHMl2omBkoiIiKgUQUFBAAAPDw95C9FSDJREREREpYiMjAQA+Pv7y1yJdmKgJCIiIipF4RyUXMe7eAyURERERKW4du0aTE1NoVAo5C5FKzFQEhEREZXi3r17sLW1lbsMrcVASURERFSK1NRUzkH5HAyURERERM/BOShLx0BJRERE9BzR0dEAgDZt2shcifZioCQiIiJ6jsDAQACcg/J5GCiJiIiInqNwDsqOHTvKXIn2YqAkIiIieo64uDgoFArOQfkcDJREREREz3H9+nWYmprKXYZWY6AkIiIieo67d+9yDspSMFASERERPUdaWhrnoCwFAyURERFRCTgHZdkwUBIRERGV4OLFiwCAtm3bylyJdmOgJCIiIipB4RyUnp6eMlei3RgoiYiIiEpQOAelr6+vzJVoNwZKIiIiohIUzkHZtGlTuUvRagyURERERCXgHJRlw0BJREREVIL79+9zDsoyYKAkIiIiKkFqaioaNWokdxlaj4GSiIiIqBgqlQp5eXlwdHSUuxStx0BJREREVIyoqCgAnIOyLBgoiYiIiIoRFBQEAPDw8JC3EB3AQElERERUjMI5KP38/GSuRPsxUBIREREVIz4+HgqFAk2aNJG7FK3HQElERERUjBs3bnAOyjJioCQiIiIqxr1791C3bl25y9AJDJRERERExUhLS4OdnZ3cZegEBkoiIiKiZ3AOyvJhoCQiIiJ6Rnh4OADOQVlWDJREREREzwgODgYAeHp6ylyJbmCgJCIiInpG4So5nIOybBgoiYiIiJ5ROAelvb293KXoBAZKIiIiomfcuHEDZmZmcpehMxgoiYiIiJ5x//591KlTR+4ydAYDJREREdEzUlNT0ahRI7nL0BkMlERERERPyc3NhUql4hyU5cBASURERPSUiIgIAICLi4vMlegOBkoiIiKip3AOyvJjoCQiIiJ6SuEclP7+/jJXojsYKImIiIieUjgHZcOGDeUuRWcwUBIRERE95fr16zA3N5e7DJ3CQElERET0lAcPHnAOynJioCQiIiJ6SlpaGuegLCcGSiIiIqL/KpyD0snJSe5SdAoDJREREdF/hYWFAeAclOXFQElERET0X5yDsmIYKImIiIj+KzIyEgDQsWNHmSvRLQyURERERP915coVKBQKNGjQQO5SdAoDJREREdF/JScncw7KCmCgJCIiIvqv+/fvo27dunKXoXMYKImIiIj+Kz09nXNQVgADJRERERGA7OxsqFQqtGjRQu5SdA4DJRERERE4B2VlMFASERERAQgKCgIAdOjQQeZKdA8DJRERERGAixcvAuAclBXBQElERESEgjkoDQwMOMq7AhgoiYiIiFAwB6WZmZncZegkBkoiIiIiAA8ePEC9evXkLkMnMVASERERoWAOSnt7e7nL0EkMlERERFTrFc5B6eTkJHcpOomBkoiIiGq9kJAQAJyDsqIYKImIiKjWKwyUnIOyYhgoiYiIqNbjHJSVw0BJREREtV7hHJR16tSRuxSdxEBJREREtV5ycjLMzc3lLkNnMVASERFRrcc5KCuHgZKIiIhqPc5BWTkMlERERFSrZWZmIj8/n3NQVgIDJREREdVqhVMGubq6ylyJ7mKgJCIiolqtMFB6eXnJXInuYqAkIiKiWo1zUFYeAyURERHVagkJCTAwMIC1tbXcpegsBkoiIiKq1ZKTk2FhYSF3GTqNgZKIiIhqtQcPHqBu3bpyl6HTGCiJiIioVsvIyEDjxo3lLkOnMVASERFRrZWeno78/Hy0aNFC7lJ0GgMlERER1Vqcg7JqMFASERFRrRUaGgoA8Pb2lrkS3cZASURERLVW4RyUPj4+Mlei2xgoiYiIqNa6cuUK56CsAgyUREREVGvdvHmTc1BWAQZKIiIiqrUePnyIevXqyV2GzmOgJCIiolqLc1BWDQZKIiIiqpUK56Bs2bKl3KXoPAZKIiIiqpWCgoIAcA7KqsBASURERLVS4aTmXl5eMlei+xgoiYiIqFa6dOkSAMDX11fmSnQfAyURERHVSgkJCTA0NISlpaXcpeg8BkoiIiKqlTgHZdVhoCQiIqJa6eHDh6hfv77cZegFBkoiIiKqlTgHZdVhoCQiIqJaJzU1FWq1Gi1atJC7FL3AQElERES1TuEclO3atZO5Ev3AQElERES1DuegrFoMlERERFTrREdHAwB8fHxkrkQ/MFASERFRrVM4ByWnDaoaDJRERERU69y8eZMTmlchBkoiIiKqdR49eoR69erJXYbeYKAkIiKiWicjIwNNmjSRuwy9wUBJREREtUpKSgrnoKxiDJRERERUq5w/fx4A0L59e5kr0R8MlERERFSrXLhwAQDg7e0tcyX6g4GSiIiIapVLly4B4ByUVYmBkoiIiGqVxMREGBoawtTUVO5S9AYDJREREdUqnIOy6jFQEhERUa3y6NEj1K9fX+4y9AoDJREREdUqmZmZnIOyijFQEhERUa3x6NEjqNVqtGzZUu5S9AoDJREREdUagYGBADgHZVVjoCQiIqJag3NQVg8GSiIiIqo1oqOjAQBeXl4yV6JfGCiJiIio1khMTIRSqeQclFWMgZKIiIhqjVu3bnEOymqglLsAIiIioup05coVnDt3Ds2aNcODBw/QqFEjuUvSOwohhJC7CCIiIqLqMnPmTHz99deSbU2aNEH79u2xY8cO2NraylOYHmGXNxEREem1fv36FdmWnJyMs2fPwsCAUagq8FMkIiIivfbSSy+hTp06RbavW7cO1tbWMlSkfxgoiYiISK8ZGRlh+PDhmquRBgYGGDBgAN58802ZK9MfvIeSiIiI9N65c+fQqVMnAIC1tTXi4uLwwgsvyFyV/uAVSiIiItJ7HTt2hJGREQBgw4YNDJNVjNMGERERkV4RQuDs2bM4c+YMQkNDce3aNeTm5sLQ0BAA8ODBA0RHR8PFxUXmSvUHu7yJiIhIL2RlZWHTpk1Yu3YtYmNjS23fpUsXTJs2DUOGDIFCoaiBCvUXAyURERHpvHPnzmHcuHElBkmlUgmVSlXsvpdeegmbN29Gs2bNqrNEvcZASURERDptzZo1eO+996BWqzXbfHx80L9/f7Rr1w5OTk5QKpXIzs5GbGwswsPDsXv3biQkJGjaW1lZYdeuXejdu7ccb0HnMVASERGRzlq9ejXef/99zfP27dvj008/hbOz83OPE0Lg1KlT+Pe//407d+4AAIyNjXHw4EH06tWrWmvWRwyUREREpJMOHz4sWQVnwoQJmDJlCpTKso85TktLw4IFCxAQEACg4EplZGQkmjdvXtXl6jUGSiIiItI5KSkpcHV1xc2bNwEAU6ZMweTJkyt0LpVKhTlz5uA///kPgIJ7Ko8dO8aBOuXAeSiJiIhI53z22WeaMOnv749JkyYVaZOZmYnvvvsOkyZNQufOndG+fXvs2bOnSDulUomlS5fCzs4OAPD333/j999/r9b69Q0DJREREemUzMxMfP/99wAK7ntcvHhxsVcTHz9+jPXr1yMxMbHUeyotLS3xySefaJ5/8803VVu0nmOgJCIiIp3y22+/4cmTJwCAfv36oVGjRsW2a9CgAQICAnD06FHMmTOn1PN26dIFTk5OAIDTp0/j0qVLVVazvmOgJCIiIp1SOIAGAAYNGlRiO2NjY9SvX7/M51UoFJLz/fPPPxWqrzZioCQiIiKdEhoaCqDg3kdXV9cqPbeHh0eR16HSMVASERGRzhBCaFbDad68OUxMTKr0/E/fa3n58uUqPbc+Y6AkIiIinZGfn69ZQtHS0rLKz29qaqqZxzI7O7vKz6+vGCiJiIhIZxgaGmpGdOfk5FT5+VUqlSawGhkZVfn59RUDJREREekMhUKBZs2aAQCuXr2K/Pz8Kj1/YmKi5muullN2DJRERESkU7y8vAAAWVlZkgBYFZ6eKqjwdah0DJRERESkUzp16qT5+sCBA1V67v3792u+9vf3r9Jz6zOu5U1EREQ65e7du3BwcEBeXh5sbW1x5MgRmJubF9t2x44dSEtLw/379/Hrr7+iV69eaNOmDQDgrbfegpWVlaZtbGwsBg8eDABo3bo1YmJiYGDAa29lwUBJREREOmfEiBHYsWMHAGDYsGH4+OOPi23Xp08f3Lp1q9h9hw8fRuPGjQEUjB4fPXo0IiMjAQBffvklZs2aVfWF6ykGSiIiItI5iYmJaN++PTIzMwEAa9asQc+ePSt8vm+++QYbN24EUHB1Mjw8HGZmZlVSa23A67hERESkc5ycnLB8+XLN8w8++AD/+c9/yn0eIQQ2bNigCZMKhQJbtmxhmCwnXqEkIiIinaRWqzFixAjs3LlTs23YsGF47733Sryn8mm3b9/GokWLcPbsWc22//u//8Ps2bOrpV59xkBJREREOufOnTvYsmULPvnkE81E5IXq1KmDgQMHon///nBycoKhoaFmX25uLi5duoRdu3bh8OHDksnRV6xYgQ8++KDG3oM+YaAkIiIinZCXl4dDhw5h06ZN+Ouvv6BWqwEATZo0wdy5c/Hhhx9q7qksZGZmBkdHR5iYmCA9PR1JSUlFAmjjxo2xceNGvPLKKzX2XvQNAyURERFpvVu3bsHT0xP37t2DoaGhZIWc7777DlOmTEFCQgI++eQT/PHHH8jLyyv1nDY2Nnj77bfxySefwNbWthqr138MlERERKT1MjMz0bFjR1y6dElzZbJQYmIiHB0dNc/v3r2L7du34+zZswgJCcG1a9cAFKwD7uzsDG9vb3Tv3h1Dhw6FhYVFjb4PfcVASURERDohNTUV7du3x/Xr1zXbnJyckJCQ8NzjhBBQqVRQKpVQKBTVXWatxGmDiIiISCfs3LkT169fh4mJCQDAwMAAr732WqnHKRQKGBkZMUxWIwZKIiIi0np79+7FpEmTYGNjgxs3buCdd96BWq3Gq6++KndpBHZ5ExERkZY7c+YMunbtCmNjY8TGxqJp06YQQiAmJgYuLi5yl0dgoCQiIiItFhMTA3d3dwghEBoaCjc3N7lLomIo5S6AiIiIqDi3bt2Ct7c38vPzcezYMYZJLcZASURERFonNTUV7dq1Q2ZmJnbu3ImePXvKXRI9BwflEBERkVbJzc2Fi4sLHj9+jDVr1mDYsGFyl0SlYKAkIiIiraFWq+Hp6YmbN29i/vz5mDFjhtwlURlwUA4RERFpjW7duuHkyZMYO3YsfvzxR7nLoTLiFUoiIiLSCkOGDMHJkyfRr18/hkkdw0BJREREsps+fTr++OMPeHl54cCBA3KXQ+XEQElERESyWrp0Kb799ls4OTnh/PnzMDBgPNE1vIeSiIiIZLN582a88847aNiwIZKSkmBubi53SVQBDJREREQkiwMHDuC1116DlZUVEhISUL9+fblLogpioCQiIqIad/78ebz44oswMjJCdHQ0HB0d5S6JKoGBkoiIiGpUbGws3NzcoFarERgYiA4dOshdElUSl14kIiKiGnPnzh14eXkhLy8PR44cYZjUEwyUREREVCPS09PRrl07ZGRkYNu2bejdu7fcJVEV4bh8IiIiqnYqlQqurq54+PAhVq9ejREjRshdElUhBkoiIiKqVmq1Gh06dMD169fxwQcf4L333pO7JKpiHJRDRERE1apnz54ICAjAqFGj8NNPP8ldDlUDXqEkIiKiajN8+HAEBASgd+/eDJN6jIGSiIiIqsXs2bOxc+dOeHh44PDhw3KXQ9WIXd5ERERU5VasWIF58+ahefPmiI+Ph1LJiWX0GQMlERERVamtW7di7NixaNCgAa5evcr1uWsBBkoiIiKqMn/99Rf+9a9/wcLCAgkJCWjYsKHcJVENYKAkIiKiKhEcHAx/f38olUpcunQJLVq0kLskqiEclENERESVlpCQgC5dugAATp06xTBZy/AOWSIiIqqUe/fuwcPDA7m5uTh48CB8fHzkLolqGAMlERERVVhmZiZcXV2Rnp6OrVu3ol+/fnKXRDJglzcRERFVSOH63A8ePMDy5csxevRouUsimTBQEhERUbmp1Wr4+Pjg6tWrmD17NubOnSt3SSQjjvImIiKicnv55Zdx7NgxvPnmm/jll1/kLodkxiuUREREVC6jRo3CsWPH0KNHD4ZJAsBASUREROXwwQcfYNu2bWjfvj2OHz8udzmkJdjlTURERGXyf//3f5gzZw6aNm2KhIQErs9NGgyUREREVKrt27dj5MiRqFevHq5evQpLS0u5SyItwkBJREREz3Xs2DH06dMH5ubmuHLlCuzs7OQuibQMAyURERGV6MKFC/Dz84OBgQEiIyPh7Owsd0mkhTgoh4iIiIqVlJSETp06QQiBEydOMExSiXg3LRERERXx4MEDuLu7Izc3F/v27UPHjh3lLom0GAMlERERSRSuz52WlobNmzejf//+cpdEWo5d3kRERKShUqnQvn173Lt3D0uXLsX48ePlLol0AAMlERERAShYn7tjx45ITEzEtGnT8NFHH8ldEukIjvImIiIiAMArr7yCv/76C4MHD8bvv/8udzmkQ3iFkoiIiDBu3Dj89ddf6Nq1K8MklRsDJRERUS03f/58bNmyBS4uLggICJC7HNJB7PImIiKqxb7++mvMnDkTTZo0QUJCAoyNjeUuiXQQAyUREVEt9euvv+LNN99E3bp1kZSUBGtra7lLIh3FQElERFQL/ec//0Hv3r1hamqK+Ph42Nvby10S6TAGSiIiolomPDwcPj4+UCgUiIiIQNu2beUuiXQcV8ohIiKqRa5duwZ/f3+o1WqcPHmSYZKqBAMlERFRLfHo0SO4ubkhJycHu3fvRufOneUuifQEAyUREVEtkJ2dDVdXV6SmpmLDhg0YMGCA3CWRHuE8lERERHpOrVbDzc0Nd+7cweLFi/Huu+/KXRLpGQ7KISIi0nMdO3ZEYGAgJk+ejLVr18pdDukhBkoiIiI99uqrr+LAgQMYOHAg/vzzT7nLIT3FLm8iIiI9NWHCBBw4cACdOnVimKRqxUBJRESkhxYuXIjvv/8ezs7OOHXqlNzlkJ5jlzcREZGeWbt2LaZOnQp7e3skJSVxfW6qdgyUREREeuSPP/7AkCFDYGtri6SkJNja2spdEtUCDJRERER64uTJk+jRowdMTEwQFxeHJk2ayF0S1RIMlERERHrg4sWL6NChAwDgwoULaNeuncwVUW3ClXKIiIh03I0bN+Dr64v8/HwEBAQwTFKNY6AkIiLSYU+ePIGbmxuys7Px+++/o2vXrnKXRLUQAyUREZGOysnJgYuLC548eYLvvvsOb7zxhtwlUS3FeSiJiIh0kFqthru7O27fvo1PPvkEU6ZMkbskqsU4KIeIiEgHde7cGWfPnsWECROwceNGucuhWo6BkoiISMcMHDgQe/bsQf/+/bF//365yyFilzcREZEumTx5Mvbs2QM/Pz+GSdIaDJREREQ6YvHixVi/fj1atWqFs2fPyl0OkQa7vImIiHTAxo0bMXHiRNjZ2SEpKQmmpqZyl0SkwUBJRESk5fbs2YNBgwbB2toaiYmJqFu3rtwlEUkwUBIREWmx06dPo1u3bjA2NkZsbCyaNm0qd0lERTBQEhERaano6Gh4eHhACIHQ0FC4ubnJXRJRsbhSDhERkRa6desWfHx8kJ+fj2PHjjFMklZjoCQiItIyqampaNeuHTIzM7Fz50707NlT7pKInovTBhEREWmR3NxcuLi44PHjx1izZg2GDRsmd0lEpWKgJCIi0hJqtRoeHh64efMm5s+fjxkzZshdElGZcFAOERGRlujatStOnTqFsWPH4scff5S7HKIy4xVKIiIiLTB48GCcOnUK/fr1Y5gkncNASUREJLPp06dj165d8PLywoEDB+Quh6jcGCiJiIhktHTpUnz77bdwcnLC+fPnYWDAf5pJ9/AeSiIiIpls3rwZ77zzDho2bIikpCSYm5vLXRJRhTBQEhERyWD//v0YMGAALC0tkZiYiPr168tdElGFMVASERHVsHPnzqFLly4wMjJCdHQ0HB0d5S6JqFIYKImIiGpQbGws3NzcoFarERgYiA4dOshdElGlcelFIiKiGnLnzh14eXkhLy8PR44cYZgkvcFASUREVAPS09PRrl07ZGRkYNu2bejdu7fcJRFVGc5NQEREVM1UKhVcXFzw8OFDrF69GiNGjJC7JKIqxUBJRERUjdRqNTp06IAbN27ggw8+wHvvvSd3SURVjoNyiIiIqlHPnj0REBCAUaNG4aeffpK7HKJqwSuURERE1WT48OEICAhA7969GSZJrzFQEhERVYPZs2dj586d8PDwwOHDh+Uuh6hascubiIioiq1YsQLz5s1D8+bNER8fD6WSk6qQfmOgJCIiqkJbt27F2LFj0aBBA1y9epXrc1OtwEBJRERURQ4dOoT+/fvDwsICCQkJaNiwodwlEdUIBkoiIqIqEBwcDH9/fyiVSly6dAktWrSQuySiGsNBOURERJV05coVdOnSBQBw6tQphkmqdXiXMBERUSXcu3cPnp6eyM3NxcGDB+Hj4yN3SUQ1joGSiIiogjIyMuDq6or09HRs3boV/fr1k7skIlmwy5uIiKgCVCoVXF1d8eDBAyxfvhyjR4+WuyQi2TBQEhERlZNarYa3tzeuXbuG2bNnY+7cuXKXRCQrjvImIiIqp969e+P48eN488038csvv8hdDpHseIWSiIioHEaOHInjx4+jR48eDJNE/8VASUREVEYffPABtm/fjvbt2+P48eNyl0OkNWp1l3dqaioSExORnZ0NExMTODo6wtbWVu6yiIhIC61evRrvv/8+mjZtioSEBK7PTfSUWhcoQ0NDsWHDBpw4cQLx8fF49u23aNECL774IiZMmIBOnTpBoVDIVCkREWmL7du3Y+TIkahXrx6uXr0KS0tLuUsi0iq1JlAGBQVhxowZCAwMLPMx7u7u+Oqrr9C9e/fqK4yIiLTasWPH0KdPH5ibm+PKlSuws7OTuyQiraP3gTIvLw8LFy7EihUroFarNduNjIzg7OyMVq1awcLCAllZWYiPj0dsbCxycnIk55g6dSpWrlwJMzOzmi6fiIhkdOHCBfj5+cHAwACRkZFwdnaWuyQiraTXgTI7OxtDhgzBgQMHNNtatmyJkSNHol+/fjA3Ny9yTE5ODo4ePYpt27YhOjpas71r1644cOAArKysaqR2IiKSV1JSEtq2bQuVSoXTp0+jY8eOcpdEpLX0NlCq1WoMGTIEf/75JwBAqVRi8uTJGDduHIyMjMp0/I4dO7BmzRpkZ2cDAHr27InDhw+X6XgiItJdDx48gJOTE9LT07Fv3z70799f7pKItJreThu0fv16TZg0MzPDhg0b8O6775Y5DBoYGGDkyJH48ccfYW1tDQD4z3/+g+XLl1dbzUREJL/MzEy4uroiLS0N33//PcMkURno5RXKq1evol27dsjIyAAAfPfdd+jataukzZUrV7B27VpER0fj4cOHMDU1hZOTE8aNG1dkEE5YWBjGjRuH/Px8GBkZITQ0FO3bt6+pt0NERNUoOjoajRs3ho2NDVQqFZydnZGYmIilS5fio48+krs8Ip2gl1coV6xYoQmTQ4YMKRImAeDWrVvIzMzEgAEDMG/ePEycOBEAMH36dPz++++Stp6enhg3bhyAgkE+S5cureZ3QERENSEzMxNeXl7w8fHBtWvX0LFjRyQmJmLatGkMk0TloHdXKFNTU9G4cWOkp6fDzMwMx48f13RZlyY/Px/Dhg1DTk4O9u/fL9mXk5ODl19+GY8ePYJSqcT169fRqFGj6ngLRERUQw4ePIj+/fvDwMAARkZGyMnJweDBg4tcWCCi59O7K5T79u1Deno6AKB///5lDpMAYGhoCDs7O6SlpRXZZ2JigkGDBgEAVCoVfvvtt6opmIiIqlReXh5SUlKQmpqK/Pz857b966+/oFQqoVarkZOTA0NDQ8ydO7eGKiXSH3oXKIOCgjRf9+rVq9T2mZmZePz4MW7cuIGffvoJp0+fhp+fX7Fte/furfk6ODi48sUSEVGlqVQq7N69G2PHjkX79u1hamoKW1tb2NjYwMrKCv7+/pgxYwbOnTsnWR1NCIF9+/ZBpVJJtnXt2pW/44nKSe8WIr1w4YLma1dX11Lbr1q1StO1YWBggJdeeqnE+2ZatWoFIyMj5OXlITQ0tGoKJiKiClGpVFizZg2+/PJL3Lx5s9g2WVlZOH/+PM6fP49vvvkGHh4eWLhwIQYOHIgrV67gxo0bmrYGBgZQq9VQKBS4f/9+Tb0NIr2gd4Hyzp07AAAbGxvY2NiU2n7kyJHo3bs37t+/jyNHjkCtViMvL6/YtkZGRrC3t8e1a9dw+/ZtXL16FSYmJpKHUql3HykRkda5dOkSxowZU+SPe0NDQ7Ro0QL169eHWq3GzZs3JaExPDwcgwYNwtChQ6FQKCTH+vv7Y8KECRg8eDAsLCxq5H0Q6Qu9G5Tj6OiIq1evom7dujhx4kS5j3/33XeRlpaGHTt2FPllAwCvvfYakpKSSjzewMCgSMgs7mFsbFymdlVxDkNDw3J/DkRE2ur48eMYMGAAMjMzART83u3WrRuGDh0Kb29vmJqaStqnpKTgxIkT2LlzJ6Kiooqcb/r06Zg+fTpatWpVI/UT6SO9C5Rubm6IioqCUqlEUFBQuVe1+f3337FkyRLs27cPjo6Okn2F99Y8efIE9evXx44dO5CTk4Pc3Fzk5ORU6lHaOXJzcyv8mRgaGmpFsH36YWCgd7fvElENOHXqFF5++WXNCmZOTk747LPPyjQ3sBAChw4dwrJly5CSkgIAaNq0Kc6dOwd7e/tqrZtI3+ld/2y7du0QFRUFlUqFhIQEtGnTplzH5+TkAIBmpPjT7ty5gydPngAAvLy8JIN0qpsQokyhsyqCbUZGRpnalnRrQFkolUqtCLZPH8uQS6TdHj16hKFDh0qWw12xYgVMTEzKdLxCocC//vUveHl5YeLEiUhMTMT169cxevRoHDt2rNheKSIqG70LlF5eXvjll18AFIz4LilQPnz4EPXq1ZNsy8vLw759+2BqaooWLVoUOebpEeReXl5VWHXpFAqFJvxoC7VaXWyIrWywLe74tLS0Mh379GjN8jIyMpI92D57Dv4DR/Q/s2bN0twn7+fnh1WrVhXphcrNzcW3336LAwcOIDU1Fa1bt8a0adPQqVMnTRs7Ozts2rQJb731Fu7evYu///4bGzdu1CxwQUTlp3dd3tHR0ZrR3c2bN8e+ffuK/Ud55syZyMjIgJeXFxo2bIgHDx7g4MGDSEpKwvvvv48xY8YUOWbkyJGIiIgAUNDt8uKLL1bvm6FyK5xLriZuQyjrOSobcuUKtcWdx9jYmCGXZBEVFQU3NzcAgJWVFXbv3o0XXnihSLu5c+fi2LFjGDlyJJo2bYq9e/fi0qVL2Lx5Mzp06CBpe+bMGUyaNAkAUL9+fSQnJ2vVH+1EukTvAiUAdO/eXTMgZ+XKlejbt2+RNn/99Rf+/PNPxMfHIyUlBebm5nBxccFbb72FHj16FGl/9uxZzV+v7du3R0REBP9hpTLJz8+vVDitjnBc2mTPz1PesFpd4bbwYWRkxP8Xa4EpU6Zg3bp1AIB58+Zh5MiRRdpERUXhrbfewpw5czB27FgABbcxDRw4EHXr1sW2bduKHDNnzhwcPXoUALBt2zaMGDGi+t4EkR7Ty0C5b98+DBgwAABQp04d7N69u0j3dnlkZGRg4MCBuH37NgDghx9+0KztTaSL8vPzZQm2zzuPWq2u0HtRKBRlDq3VHW4LH0qlkiG3An788UcoFAqMHDlSMgVbXl4e6tatq1lS9z//+Q8sLS2LHP9///d/mgUqnt7//fffY82aNTh27Bjs7Owkx1y4cEHTI9WzZ0/8/fff1fTuiPSb3t1DCQCvvvoq3njjDezatQuPHz/GrFmzsGHDBpibm5f7XDk5OZgzZ44mTPbs2bPY7nAiXWJoaAhzc/MK/T9RXVQqVY2E27S0NDx48KBM56no39tP3/Msd7h9OuRqu48//hi3bt3C559/juXLl+P111+HQqHAxYsXNQMlu3fvXmyYBICYmBg0a9asyP527doBAC5fvlwkUHp6eqJRo0a4ffs2AgMDkZ+fz6nWiCpA+3/DVIBCocDatWtx+vRp3L17F+Hh4Rg/fjxWrlwJBweHMp/n7t27mD9/vmYJLhsbG3z//fccDUxUDZRKJZRKpdZMKC2EKHPIrezV25SUlDKfo6KeN0duTYfbwsezwa3w/SUkJGDQoEHw8fHBypUrER8fr2nzvBXQHjx4gAYNGhTZXrjt3r17RfYpFAq4urri9u3byMjIQFxcHNq2bVuhz5ioNtPLQAkADRs2xOHDh9GzZ088fvwYly5dwhtvvIEpU6Zg8ODBJf6FCxQs1bV37158/fXXSEtLAwBYWFjgwIEDReamJCL9pFAoYGRkBCMjo+f+vqhJQgjk5eXVyD23T548KXPbinp2jtxHjx4BgOb2h+DgYHTv3l1yy5KTk1OJ58vOzi527mFjY2MA/wusz3r6nDdu3GCgJKoAvQ2UAODh4YETJ07gtddew9WrV5GVlYXVq1dj7dq16N27N9zd3dG6dWuYmZkhJycH8fHxiIqKwtGjRzVBEiiYYmL37t3o2LGjjO+GiGq7wvtFjY2NYWVlJXc5AEqeI7ciwXbx4sWSAWMKhQJCCElILAyHxTE1NS12ftzC0GtiUvwI7qfPX5mATFSb6XWgBApGZEdGRmLevHmaEYJZWVnYt28f9u3bV+rxo0aNwldffYW6detWd6lERDqnKufIXbRoEYCCK5cGBgYYP348PvjgA+zYsQMLFy4EAMkf+8+qX79+sd3a9+/fB1DQc1Wcpxey0Kb7iol0Sa24GdDKygpr165FZGQkJk2aVOo9Wqamphg3bhyCg4Px008/MUwSEVUzIQSMjY1haWmJuXPn4saNG1i/fj1atGgh6YKOjY0t8Rxt2rTBtWvXiqx0Vrh+d0kLXVy+fFlyDiIqP72cNqg0mZmZCAsLQ2hoKOLj45GdnQ0TExM4OTnBy8sLXl5eWnPPFBFRbRETE4PGjRvD2tpasj0pKUlzn6Ofnx++//77Yo+PjIzEiBEjJPNQ5ubmYuDAgbC1tcX27duLHJOTk4Pu3bsjPT0djRo1wq1bt6r2TRHVEnrf5V0cc3NzdO7cGZ07d5a7FCIi+q+SBsM0b94cjo6OSEpKQmBgIG7cuFHsjB1ubm54+eWXsWbNGjx69AgODg7Yt28fbt26hcWLFxd77qNHj2quaPbs2bPq3gxRLVMruryJiEh3KRQKzRKJQMHiEiX5/PPPMXLkSOzfvx9ffPEFVCoVvv32W3h7exdpq1KpsHXrVs3zyZMnV23hRLVIrezyJiIi3fLgwQM0a9YMmZmZAIDNmzfD19e3UufctGkTvv76awAFE5yHhoZyhSOiCuIVSiIi0nr169fHF198oXk+f/58JCcnV/h8Z8+exdq1awEUTPq+fv16hkmiSmCgJCIinTB16lR069YNQMGqN2PHjkV0dHS5z3P48GFMnz4dKpUKADB37txKX+0kqu3Y5U1ERDrj/v376NatG2JiYgAULNn59ttvY9y4caVOCXfnzh2sXr0ahw8f1mwbNGgQfv31V51Y65xImzFQEhGRTrl37x7+9a9/ISQkRLPN3Nwc/fv3h7+/P1xcXFCvXj0IIZCcnIzo6GgEBAQgICBAshLPqFGjsHnz5mKXaySi8mGgJCIinZObm4vPP/8cS5cu1XRdl1XdunXx7bff4s033+R9k0RVhIGSiIh0VnR0NL7++mts27YNGRkZz23bqFEjvPvuu5gyZUqJyzASUcUwUBIRkc5LSUlBQEAAQkNDERERgZSUFBgaGqJ+/fro0KEDfHx80LVrV3ZvE1UTBkoiIiIiqhROG0RERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElcJASURERESVwkBJRERERJXCQElERERElfL/kZKx9GRB/2kAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAHzCAYAAACe1o1DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAACJQ0lEQVR4nOzdd1QU198G8GfpvVgQUEAQRUFRpCiKvWHv3dgbtpiqiYkxphgT/aUKdrHFEntBUBQLViwgIgIiYkNRaSJSd94/9mV1BRWlzLI8n3M42Z25M/vsQvDL3Dv3SgRBEEBERERE9IHUxA5ARERERJUbC0oiIiIiKhUWlERERERUKiwoiYiIiKhUWFASERERUamwoCQiIiKiUmFBSURERESlwoKSiIiIiEqFBSURERERlQoLSiIiIiIqFRaURERERFQqLCiJiIiIqFRYUBIRERFRqbCgJCIiIqJS0RA7ABERVQypVIrc3FyxYxCRktDU1IS6unqZnIsFJRFRFZCbm4uEhARIpVKxoxCREjExMYG5uTkkEkmpzsOCkohIxQmCgKSkJKirq8PKygpqahztRFTVCYKArKwsJCcnAwAsLCxKdT4WlEREKi4/Px9ZWVmwtLSEnp6e2HGISEno6uoCAJKTk2FmZlaq7m/+mUpEpOIKCgoAAFpaWiInISJlU/hHZl5eXqnOwyuURERVRGnGSEVEACtWAOHhwLNngKEh0KwZMGUK0LRpmUUkogpW2rGThVhQEhHRG4WFAbNnA2fOFN139izg5we0bg38/jvg7l7h8YhISbDLm4iIinXwINC2bdFiUkdH8fnp07J2Bw9WXDZVJJFIsGfPHrFjAAD8/f1hYmLyXseMHTsW/fr1e2ubunXr4o8//vjgXKVx/PhxSCQSpKWlVfhrL1iwAM2aNavw161ILCiJiKiIsDBg0CAgO1v23MEB8PUF0tOBFy+AtDTZcwcH2f7sbFn7sLCyyzB27FhIJBJIJBJoamrC1tYWX375JbILQ5WR9u3bY/bs2SVqJ5FIsHXrVoXtf/zxB+rWrVummT7E7du3IZFIYGZmhmfPninsa9asGRYsWFDicw0dOhSxsbFlnJBUGQtKIiIqYvbsl8XkkCHA1auAjw9gZCTbZmwsex4RAQweLNuWnQ188knZ5vD29kZSUhJu3bqF33//HStWrMB3331Xti/yHnR0dPDNN9+U+gaG8vTs2TMsWbKkVOfQ1dWFmZlZGSUSHyf0L38sKImISEF4+MtubgcHYONG4E03iGtrA5s2vbxSefq0rMgsK9ra2jA3N4eVlRX69euHzp0748iRI/L9OTk5mDVrFszMzKCjowMvLy+EvXaZ9MSJE/Dw8IC2tjYsLCwwd+5c5OfnA5BdBT1x4gT+/PNP+dXQ27dvvzHP8OHDkZaWhlWrVr01t5+fH+rVqwctLS04ODhg48aNCvvj4uLQtm1b6OjowNHRUeE9Fbp79y6GDBkCExMTVKtWDX379n1rtkIzZ87E//73P/n8gsXJycnB559/jtq1a0NfXx8tWrTA8ePH5fuL6/L+8ccfYWZmBkNDQ0ycOBFz584ttht3yZIlsLCwQPXq1TF9+vQixfezZ88wfPhw6Ovro3bt2li2bJnC/jt37qBv374wMDCAkZERhgwZgkePHsn3F9e1Pnv2bLRv317+vH379pgxYwZmz56NGjVqoFu3bvJ9ly5dgpubG/T09NCqVSvExMQonOtd37t35QOAX375BbVq1YKhoSEmTJhQ5lfVlRELSiIiUrBy5cvHH3/85mKykJYWMGtW8ceXpWvXruHMmTMK0x99+eWX2LlzJ9avX4/Lly/D3t4e3bp1Q0pKCgDg/v376NGjB9zd3REREQE/Pz+sWbMGP/74IwDgzz//hKenJyZNmoSkpCQkJSXBysrqjRmMjIwwb948LFy4EM+fPy+2ze7du/Hxxx/js88+w7Vr1zBlyhSMGzcOISEhAGRLYA4YMABaWlo4f/48li9fjjlz5iicIy8vD926dYOhoSFOnTqF06dPw8DAAN7e3u+82jZ8+HDY29tj4cKFb2wzY8YMnD17Flu3bsXVq1cxePBgeHt7Iy4urtj2mzdvxk8//YTFixfj0qVLsLa2hp+fX5F2ISEhiI+PR0hICNavXw9/f3/4+/srtPntt9/QtGlTXLlyBXPnzsXHH38sL6ilUin69u2LlJQUnDhxAkeOHMGtW7cwdOjQt77n4qxfvx5aWlo4ffo0li9fLt8+b948LF26FBcvXoSGhgbGjx8v31eS79278m3fvh0LFizAzz//jIsXL8LCwgK+vr7vnb/SEYiISKW9ePFCuH79uvDixYsStff0FARA9pWWVrLXSEt7eUyrVqUI+4oxY8YI6urqgr6+vqCtrS0AENTU1IQdO3YIgiAImZmZgqamprB582b5Mbm5uYKlpaXw66+/CoIgCF9//bXg4OAgSKVSeZtly5YJBgYGQkFBgSAIgtCuXTvh448/fmeewnbZ2dmCjY2NsHDhQkEQBOH3338XbGxs5O1atWolTJo0SeHYwYMHCz169BAEQRCCgoIEDQ0N4f79+/L9hw4dEgAIu3fvFgRBEDZu3Fgkd05OjqCrqysEBQUVmy8hIUEAIFy5ckUIDAwUNDU1hZs3bwqCIAhNmzYVvvvuO0EQBCExMVFQV1dXeH1BEIROnToJX331lSAIgrBu3TrB2NhYvq9FixbC9OnTFdq3bt1aaNq0qfz5mDFjBBsbGyE/P1/hfQ8dOlT+3MbGRvD29lY4z9ChQ4Xu3bsLgiAIhw8fFtTV1YU7d+7I90dFRQkAhAsXLshfp2/fvgrn+Pjjj4V27drJn7dr105wcXFRaBMSEiIAEIKDg+XbDh48KACQ/7/xru9dSfJ5enoK06ZNUzhHixYtFD4rZfK+vx/ehFcoiYhIQeH9HDo6srGSJWFsLOv+fvX4stChQweEh4fj/PnzGDNmDMaNG4eBAwcCAOLj45GXl4fWrVvL22tqasLDwwPR0dEAgOjoaHh6eirMtde6dWtkZmbi3r17H5RJW1sbCxcuxJIlS/DkyZMi+6OjoxUyFb7mq5msrKxgaWkp3+/p6anQPiIiAjdv3oShoSEMDAxgYGCAatWqITs7G/Hx8e/M2K1bN3h5eeHbb78tsi8yMhIFBQVo0KCB/NwGBgY4ceLEG88dExMDDw8PhW2vPwcAJycnhdVWLCwsinS9v/5ePT09i3w2r14ldnR0hImJibxNSbm6uha73dnZWSEfAHnGkn7v3pYvOjoaLVq0KPIeVR3noSQiIgWGhrL/ZmfL7uouSVGZng7k5CgeXxb09fVhb28PAFi7di2aNm2KNWvWYMKECWX3Ih9g1KhRWLJkCX788cdyucM7MzMTrq6u2Lx5c5F9NWvWLNE5fvnlF3h6euKLL74ocm51dXVcunSpyFJ7BgYGHx4asoL+VRKJBFKptFTnfJ2amhoEQVDYVtxNUvr6+sUe/2rGwj80yjpjVcQrlEREpODV+yz+/bdkx7xa95TXdHtqamr4+uuv8c033+DFixfyGydOnz4tb5OXl4ewsDA4OjoCABo1aoSzZ88qFCCnT5+GoaEh6tSpA0C2JGXh8pTvk2XRokXw8/MrcqNMo0aNFDIVvuarme7evYukpCT5/nPnzim0b968OeLi4mBmZgZ7e3uFL+MSXjb28PDAgAEDMHfuXIXtLi4uKCgoQHJycpFzm5ubF3suBweHIjc7vf68pF5/r+fOnUOjRo0AvPxs7t69K99//fp1pKWlyT+/mjVrKnx2ABAeHv5BWV5X0u/d2/I1atQI58+fL/IeVV6ZdMATEZHSet8xUleuvBwP6eAgCDk5b2+fnS1rV3hMeHjpMwtC8WPl8vLyhNq1awu//fabIAiysXOWlpbCoUOHhKioKGHMmDGCqampkJKSIgiCINy7d0/Q09MTpk+fLkRHRwt79uwRatSoIR9PKAiCMGnSJMHd3V1ISEgQHj9+LB9b+brixlq2adNG0NHRURhDuXv3bkFTU1Pw9fUVYmNjhaVLlwrq6upCSEiIIAiCUFBQIDg6OgpdunQRwsPDhZMnTwqurq4KYyifP38u1K9fX2jfvr1w8uRJ4datW0JISIgwc+ZM4e7du8Xme3UMZaGYmBhBQ0ND0NHRUXjPI0eOFOrWrSvs3LlTuHXrlnD+/Hnh559/Fg4cOCAIQtExlJs2bRJ0dXUFf39/ITY2Vvjhhx8EIyMjoVmzZm/9fr0+ttHGxkYwMjISFi9eLMTExAj//POPoK6uLgQGBgqCIAhSqVRo1qyZ0KZNG+HSpUvC+fPnBVdXV4VzBAYGChKJRFi/fr0QGxsrzJ8/XzAyMioyhvL171XhGMrU1FT5titXrggAhISEhBJ970qSb+vWrYKOjo6wdu1aISYmRpg/f75gaGio8mMoWVASEam4D/kHo1WrlwXikCFvLiqzswVh8OCXbVu3LqPQQvEFiiAIwqJFi4SaNWsKmZmZwosXL4SZM2cKNWrUELS1tYXWrVvLb44odPz4ccHd3V3Q0tISzM3NhTlz5gh5eXny/TExMULLli0FXV1dheLidcUVKWfOnBEAKBSUgiAIvr6+gp2dnaCpqSk0aNBA2LBhg8L+mJgYwcvLS9DS0hIaNGggBAYGKhSUgiAISUlJwujRo+Xvzc7OTpg0aZKQnp5ebL7iCkpBEITJkycLABQKytzcXGH+/PlC3bp1BU1NTcHCwkLo37+/cPXqVUEQihaUgiAICxcuFGrUqCEYGBgI48ePF2bNmiW0bNlSvr+kBeX3338vDB48WNDT0xPMzc2FP//8U+GYxMREoU+fPoK+vr5gaGgoDB48WHj48KFCm/nz5wu1atUSjI2NhU8++USYMWNGmRSUgvDu711J8v3000/yz2rMmDHCl19+qfIFpUQQXhuIQEREKiU7OxsJCQmwtbWFzuvrJr5BWJhsOcVXV8qZNQsYOVI2pjI9XdbN/ddfQOE0frq6wIkTXNO7qujSpQvMzc2LzNNIlcuH/H4oDm/KISKiItzdgR07Xi6/GBMDTJ8u+9LWfnkDTiFdXeC//1hMqqqsrCwsX74c3bp1g7q6OrZs2YLg4OBiJ2Snqok35RARUbF69gROngRem0WlSDHZurXsymTPnhWXjSqWRCJBQEAA2rZtC1dXV+zfvx87d+5E586dxY5GSoJXKImI6I3c3YHQUNlyiitXypZlfPZMNjVQs2bA5MlA06Zip6Typquri+DgYLFjkBKrkgVlXJzsr+5Ll2SPc3JkS4fZ2QGurkCbNsD/3/1PRESQFY2vLblMRCRXZQpKQQB27wb+/hs4frz4NkePAqtWyR57esrGCg0bBrw27ysRERERvaJKjKG8fRvo0gUYOPDNxeTrzp4FRo0CvLyAGzfKMx0RERFR5abyVygDA4HBg4HMzJfbatqnosWI66jrngQLxyfQ1s9D7gsNPIyugcSLtXBhqyOSrtcAAJw7JxsntGEDMGSIOO+BiIiISJmpdEEZEAD06wcULvFpUvsZBv0WAqduCfj/5TvlNLRyYdfyAexaPkD76VcQd9IK2z/riCe3TJCTI+v6LigAhg+v8LdBREREpNRUdmLz2FjAxQXIypI9d+51E8P/OQJdo9wSnyM3SwM7vuiAC1tkd+hoaMiuWLq6lkdiIqLyUVYTF9/PuI8jt44gIycDRtpG6GLXBbWNapdhUiKqaGX1+0Elx1AWFADjx78sJpv1i8XYdQHvVUwCgJZePob/cwStxkYCAPLzgbFjgdz3Ow0RUaV28cFFDNg2AHX/rItxe8fh48CPMW7vONT9sy4GbBuAiw8uih1RNLdv34ZEIkF4eLjYUUpl7Nix6Nevn9gx5CQSCfbs2VPi9sePH4dEIkFaWtob2yxYsADNmjUrdbYPVbduXfzxxx8V/roV9TOqkgXl+vXA6dOyxzVs0zD87yNQU1e8ELt5+mbMrjb7jV9pD9IAABIJMHDxcdRukgwAuHYN+PPPinw3RETi2R29G15rvbD7xm7kS/MV9uVL87H7hmz/nht7yvy1x44dC4lEAolEAi0tLdjb22PhwoXIz89/98EfoH379vLXe/Vr6tSp5fJ678Pf3x8SiQTe3t4K29PS0iCRSHC8pHeclqPCz2/r1q0K2//44w/UrVv3vc6VlJSE7t27l2E6Km8qN4ZSEBQLvqF/BkNbv+gvn1ZjWsGhncNrxwr477P/UM2qGkwsTeTb1TWlGLHsCJa0GwFBkGDZMuDTTzmdEBGptosPLmL4zuHIKch5a7ucghwM2zEMoeND4WbpVqYZvL29sW7dOuTk5CAgIADTp0+HpqYmvvrqqzJ9nUKTJk3CwoULFbbp6emVy2u9Lw0NDQQHByMkJAQdOnQQO06xdHR08M0332DgwIHQ1NT84POYm5uXYSpxFRQUQPL6jRsqSOWuUJ4/D1y9Knts45qE+l73i21n62ELtyFuCl/VbaojNysXroOLDpKs3fgJGnZKBAAkJsruHiciUmU/n/r5ncVkoZyCHCwKXVTmGbS1tWFubg4bGxv4+Pigc+fO2LdvHwAgNTUVo0ePhqmpKfT09NC9e3fExcXJj01MTETv3r1hamoKfX19ODk5ISAg4K2vp6enB3Nzc4UvIyMj+f4LFy7AxcUFOjo6cHNzw5UrV4qcY9++fahfvz50dHTQoUMHrF+/vkh3bGhoKNq0aQNdXV1YWVlh1qxZeP78+Vuz6evrY/z48Zg7d+5b20VGRqJjx47Q1dVF9erVMXnyZGS+MtVJQUEBPv30U5iYmKB69er48ssv8frtFFKpFIsWLYKtrS10dXXRtGlT7Nix462vCwDDhw9HWloaVhVO6vwGe/fuRfPmzaGjowM7Ozt8//33CleeX+/yPnPmDJo1ayb/3Pfs2VNsN+6lS5fg5uYGPT09tGrVCjExMUVee8WKFbCysoKenh6GDBmC9PR0hfe9cOFC1KlTB9ra2mjWrBkCX/kHv7iu9fDwcEgkEty+fRuA7GqyiYkJ9u3bB0dHR2hra+POnTsAZGuijx8/HoaGhrC2tsbKlSsVsr3re/eufEDJfkbLg8oVlEePvnzc8qOo9zr20o5LkEgkaD6oebH7W350Tf742LEPikdEVCncz7iPfTH73uuYfTH7cD+j+D/iy4quri5y/38g+9ixY3Hx4kXs27cPZ8+ehSAI6NGjB/L+f2qP6dOnIycnBydPnkRkZCQWL14MAwODD37tzMxM9OrVC46Ojrh06RIWLFiAzz//XKFNQkICBg0ahH79+iEiIgJTpkzBvHnzFNrEx8fD29sbAwcOxNWrV7Ft2zaEhoZixowZ78ywYMECREZGvrG4e/78Obp16wZTU1OEhYXhv//+Q3BwsMK5ly5dCn9/f6xduxahoaFISUnB7t27Fc6zaNEibNiwAcuXL0dUVBQ++eQTjBo1CidOnHhrPiMjI8ybNw8LFy58Y4F86tQpjB49Gh9//DGuX7+OFStWwN/fHz/99FOx7TMyMtC7d280adIEly9fxg8//IA5c+YU23bevHlYunQpLl68CA0NDYwfP15h/82bN7F9+3bs378fgYGBuHLlCqZNmybf/+eff2Lp0qVYsmQJrl69im7duqFPnz4Kf6iURFZWFhYvXozVq1cjKioKZmZmAGSffWGRN23aNPj4+MiL3pJ8796VryQ/o+VGUDH9+wuCrONbEL46t174I+WPEn0tfbRU0K+mL9i2sH1jmx9urJSfu107sd8pEVHJvHjxQrh+/brw4sWLEh+z7so6AQvw3l/+V/zLLPeYMWOEvn37CoIgCFKpVDhy5Iigra0tfP7550JsbKwAQDh9+rS8/ZMnTwRdXV1h+/btgiAIQpMmTYQFCxaU+PXatWsnaGpqCvr6+gpfmzZtEgRBEFasWCFUr15d4XP08/MTAAhXrlwRBEEQ5syZIzRu3FjhvPPmzRMACKmpqYIgCMKECROEyZMnK7Q5deqUoKam9sbv0bp16wRjY2NBEARh7ty5QoMGDYS8vDwhNTVVACCEhIQIgiAIK1euFExNTYXMzEz5sQcPHhTU1NSEhw8fCoIgCBYWFsKvv/4q35+XlyfUqVNH/llnZ2cLenp6wpkzZxQyTJgwQRg+fPhbP7+PP/5YyM7OFmxsbISFCxcKgiAIv//+u2BjYyNv16lTJ+Hnn39WOHbjxo2ChYWF/DkAYffu3YIgyD7j1z/3VatWKXzuISEhAgAhODhY4X0DkB/33XffCerq6sK9e/fkbQ4dOiSoqakJSUlJgiAIgqWlpfDTTz8pZHN3dxemTZum8DqF30tBEIQrV64IAISEhARBEGTfKwBCeHi4wnlsbGyEUaNGyZ9LpVLBzMxM8PPzEwShZN+7d+Uryc/o6z7k90NxVG4MZeGqNpq6eahpn1ry447dwPOU58V2dxcyNMuCkXkmMh4aIDq6tEmJiJRXRk5GhR73JgcOHICBgQHy8vIglUoxYsQILFiwAEePHoWGhgZatGghb1u9enU4ODgg+v9/Qc+aNQs+Pj44fPgwOnfujIEDB8LZ2fmtrzdy5MgiVxRr1aoFAIiOjoazs7PC1Cqenp4KbWNiYuDu7q6wzcPDQ+F5REQErl69is2bN8u3CYIAqVSKhIQENGrU6K0Z58yZgxUrVmDt2rUY8tqKG9HR0WjatCn09fXl21q3bg2pVIqYmBjo6OggKSlJ4XPT0NCAm5ubvNv75s2byMrKQpcuXRTOnZubCxcXl7dmA2TDFBYuXIiZM2fCx8enyP6IiAicPn1a4YpkQUEBsrOzkZWVVWTMakxMTJHP/fXPtNCr318LCwsAQHJyMqytrQEA1tbWqF375VRXnp6e8s9GT08PDx48QOvWrRXO2bp1a0RERLzzfb9KS0ur2J+1V7dJJBKYm5sjOVl20++7vne6urrvzFeSn9HyonIFZeFUQdoGeVB7jw79SzsuQV1THS793v4/i65RDjIeGuDFi1KEJCJSckbaRu9uVIbHvUmHDh3g5+cHLS0tWFpaQkOj5P9sTZw4Ed26dcPBgwdx+PBhLFq0CEuXLsXMmTPfeIyxsTHs7e3LIvobZWZmYsqUKZg1a1aRfYWFz9uYmJjgq6++wvfff49evXqVSz4AOHjwoELxBciKxZIYNWoUlixZgh9//LHIHd6ZmZn4/vvvMWDAgCLHlWYeRAAKNwIV3ggjlUpLdc5Xqf1/YSG8Mua0cIjFq3R1dYu9Eef1G5UkEkmZ5hOTyo2hLPxe5eeU/BbsnMwcXDt0DQ07NoR+Nf23ts3PVVd4HSIiVdTFrgs01N7vmoOGmgY623Uu0xz6+vqwt7eHtbW1QjHZqFEj5Ofn4/z58/JtT58+RUxMDBwdHeXbrKysMHXqVOzatQufffbZO28WeZtGjRrh6tWryM7Olm87d+6cQhsHBwdcvKg4L2dYWJjC8+bNm+P69euwt7cv8qWlpVWiLDNnzoSamhr+fG0eu0aNGiEiIkJh/OLp06ehpqYGBwcHGBsbw8LCQuFzy8/Px6VLl+TPX72R5PV8VlZWJcqnpqaGRYsWwc/PT36zyqvvPyYmptj3r1bMlSAHBwdERkYiJ+flDWKvf6YldefOHTx48ED+/Ny5c/LPxsjICJaWljhdOO/g/zt9+rT8Z6pmzZoAZNMaFSqr+R3f9b0rSb6S/IyWF5UrKAv/uMvO0Eb6w5JN9RAZECm7u3vQ25fAyXmugZQ7RgqvQ0Skimob1UbvBr3f65g+Dn0qbOWc+vXro2/fvpg0aRJCQ0MRERGBUaNGoXbt2ujbty8AYPbs2QgKCkJCQgIuX76MkJCQd3YnZ2Vl4eHDhwpfqamy4VMjRoyARCLBpEmTcP36dQQEBGDJkiUKx0+ZMgU3btzAnDlzEBsbi+3bt8Pf3x/Ayytmc+bMwZkzZzBjxgyEh4cjLi4Oe/fuLdFNOYV0dHTw/fff46+//lLYPnLkSOjo6GDMmDG4du0aQkJCMHPmTHz00UfyrvuPP/4Yv/zyC/bs2YMbN25g2rRpCnctGxoa4vPPP8cnn3yC9evXIz4+HpcvX8bff/+N9evXlzhjz5490aJFC6xYsUJh+/z587FhwwZ8//33iIqKQnR0NLZu3Ypvvvmm2POMGDECUqkUkydPRnR0NIKCguSf+/tOx1P42URERODUqVOYNWsWhgwZIp+m6IsvvsDixYuxbds2xMTEYO7cuQgPD8fHH38MAPKiesGCBYiLi8PBgwexdOnS98rwJiX53r0rX0l+RsuLyhWUry6LePdKrRIdc/G/i9A20Ebj7o3f2u5+pBkEqVqR1yEiUkVft/ka2uol6+LU0dDBV17lMzfkm6xbtw6urq7o1asXPD09IQgCAgIC5N2KBQUFmD59Oho1agRvb280aNAAvr6+bz3nqlWrYGFhofA1fPhwAICBgQH279+PyMhIuLi4YN68eVi8eLHC8ba2ttixYwd27doFZ2dn+Pn5ycdkFnYXOzs748SJE4iNjUWbNm3g4uKC+fPnw9LS8r3e/5gxY2BnZ6ewTU9PD0FBQUhJSYG7uzsGDRqETp064Z9//pG3+eyzz/DRRx9hzJgx8PT0hKGhIfr3769wnh9++AHffvstFi1aJP/8Dh48CFtb2/fKuHjxYoWrZQDQrVs3HDhwAIcPH4a7uztatmyJ33//HTY2NsWew8jICPv370d4eDiaNWuGefPmYf78+QDev4vc3t4eAwYMQI8ePdC1a1c4Ozsr/EzMmjULn376KT777DM0adIEgYGB8mmgAFmX9ZYtW3Djxg04Oztj8eLF+PHHH98rw5uU5Hv3rnwl+RktLyq3lvfu3UDhsAyX/jEYs+btE0ZmPsnEfMf5aD6wOUb5jXpr2/8+74DTa2UDaletAiZOLJPIRETlqjRr9e65sQfDdgx763yU2ura2DpoK/o17FfKpKrpp59+wvLly3H37l2xo6iMzZs3Y9y4cUhPT4eurq7YcSo1ruX9Bj16AP8/xAER++2Rdv/tc45d2X0F0nzpO7u7s9K0cXF7QwCAvj4weHCZxCUiUmr9GvZD6PhQDGg0oMiYSg01DQxoNACh40NZTL7C19cXYWFhuHXrFjZu3IjffvsNY8aMETtWpbZhwwaEhoYiISEBe/bswZw5czBkyBAWk0pE5e7y1taWXTlctAiQ5qvjvy86YOLm/XjTMIuL/12EQU0DOLR3KL7B/9s9ry1yMmWDpUeOBIyNyzo5EZFycrN0w84hO3E/4z6CbwUjIycDRtpG6GzXucLGTFYmcXFx+PHHH5GSkgJra2t89tln5bZUZFXx8OFDzJ8/Hw8fPoSFhQUGDx78xonQSRwq1+UNAE+fAk5OwKNHsueDlx5D63GRH3y+K7vrY/2EHgAAIyPg2jWghDe6ERGJrqy6tIhI9bDL+y2qVweWL3/5fMcX7XFuk+ObD3iLiH322DS1m/z577+zmCQiIiJ6lUoWlADQrx/w2Weyx4JUDVtndcGmqV3xPLVkdyxmP9PE9s86YN3YnijIk809qa+/HW3a3CynxERERESVk8qNoXzVb78B+flA4byvF7c3wvXDtmgxMgoeI66jlkOKwmo6ggA8uWWCC1sb4eyGxsh8/HIey4EDnyMy8nu0afMUhw8ffufyXURERERVhUqOoXyVIADr1wOzZwPp6Yr7dAxzYOH4FNr6uch9oYmH0dWRlaY4fkBPD1i8GJg2DXj69DG8vb1x69YtBAQEVNj6mEREpcExlET0JhxDWUISCTB2LBAVBYwZI7sLvFD2M20knLfEjWN1cetsbYViUkMDGDIEuHoVmDEDUFOTLbl07NgxNGnSBJ07d8aRI0cq/g0RERERKRmVLygL1a4N+PsD9+4BS5cCffrItr3K3Bzo2VM25dCdO8C2bUC9eoptjI2NERgYiHbt2qFXr17YvXt3hb0HIiIiImWk8l3e7/LiBZCTA2hpAbq6eON8la/Lzc3F6NGj8d9//2HNmjUYO3ZsueYkIvpQ7PIuP7dv34atrS2uXLmCZs2aiR2H6L2xy7uM6OoCJiaysZLvs8a8lpYWNm/ejIkTJ2LcuHH4s/DOHyIiKhNjx46FRCKBRCKBlpYW7O3tsXDhQuTn55fL67Vv317+eq9+TZ06tVxej0iVqPRd3uVNXV0dy5cvh6mpKWbPno3U1FR89913kLxPZUpERG/k7e2NdevWIScnBwEBAZg+fTo0NTXLbeWZSZMmYeHChQrb9PT03tCaiApV+SuUpSWRSPDLL79g0aJF+P777/HJJ59AKpWKHYuISCVoa2vD3NwcNjY28PHxQefOnbFv3z4AQGpqKkaPHg1TU1Po6emhe/fuiIuLkx+bmJiI3r17w9TUFPr6+nByckJAQMBbX09PTw/m5uYKX0ZGRvL9Fy5cgIuLC3R0dODm5oYrV64UOce+fftQv3596OjooEOHDli/fj0kEgnS0tLkbUJDQ9GmTRvo6urCysoKs2bNwvPnz0v5aRGJhwVlGZk7dy58fX3x119/YcKECeXWJUNEVJXp6uoiNzcXgKxL/OLFi9i3bx/Onj0LQRDQo0cP5OXlAQCmT5+OnJwcnDx5EpGRkVi8eDEMDAw++LUzMzPRq1cvODo64tKlS1iwYAE+//xzhTYJCQkYNGgQ+vXrh4iICEyZMgXz5s1TaBMfHw9vb28MHDgQV69exbZt2xAaGooZM2Z8cDYisbHLuwz5+PjAxMQEo0ePRnp6OrZs2QJt7ZKtzENEVKHc3ICHDyv+dc3NgYsX3/swQRBw9OhRBAUFYebMmYiLi8O+fftw+vRptGrVCgCwefNmWFlZYc+ePRg8eDDu3LmDgQMHokmTJgAAOzu7d76Or68vVq9erbBtxYoVGDlyJP79919IpVKsWbMGOjo6cHJywr179+Dj46PQ1sHBAb/99hsAwMHBAdeuXcNPP/0kb7No0SKMHDkSs2fPBgDUr18ff/31F9q1awc/Pz/eOEWVEgvKMjZ8+HAYGhpi8ODB8mmFSvMXMRFRuXj4ELh/X+wU73TgwAEYGBggLy8PUqkUI0aMwIIFC3D06FFoaGigRYsW8rbVq1eHg4MDoqOjAQCzZs2Cj48PDh8+jM6dO2PgwIHvXOVs5MiRRa4o1qpVCwAQHR0NZ2dnhYLv9QUuYmJi4O7urrDNw8ND4XlERASuXr2KzZs3y7cJggCpVIqEhAQ0atToXR8LkdJhQVkOevXqhcDAQPTu3RtdunRBQEAATE1NxY5FRPSSuXmleN0OHTrAz88PWlpasLS0hIZGyf/ZmjhxIrp164aDBw/i8OHDWLRoEZYuXYqZM2e+8RhjY2PY29u/V8b3lZmZiSlTpmDWrFlF9llbW5fraxOVFxaU5aRdu3Y4duwYvL290a5dOxw+fBjmYv0CJyJ63Qd0O4tBX1+/2AKvUaNGyM/Px/nz5+Vd3k+fPkVMTAwcHR3l7aysrDB16lRMnToVX331FVatWvXWgvJtGjVqhI0bNyI7O1t+lfLcuXMKbRwcHIrc+BMWFqbwvHnz5rh+/Xq5F65EFYk35ZQjNzc3nDx5EikpKfDy8sLt27fFjkREpBLq16+Pvn37YtKkSQgNDUVERARGjRqF2rVro2/fvgCA2bNnIygoCAkJCbh8+TJCQkLe2Z2clZWFhw8fKnylpqYCAEaMGAGJRIJJkybh+vXrCAgIwJIlSxSOnzJlCm7cuIE5c+YgNjYW27dvh7+/PwDIp5SbM2cOzpw5gxkzZiA8PBxxcXHYu3cvb8qhSo0FZTlzdHREaGgoAMDLy0s+toeIiEpn3bp1cHV1Ra9eveDp6QlBEBAQEABNTU0AQEFBAaZPn45GjRrB29sbDRo0gK+v71vPuWrVKlhYWCh8DR8+HABgYGCA/fv3IzIyEi4uLpg3bx4WL16scLytrS127NiBXbt2wdnZGX5+fvIxmYU3aTo7O+PEiROIjY1FmzZt4OLigvnz58PS0rKsPyKiClPll16sKElJSejWrRsePHiAwMBAuLm5iR2JiKoILr0orp9++gnLly/H3bt3xY5CVASXXqxkLCwscPz4cdSvXx8dO3bEiRMnxI5ERETlwNfXF2FhYbh16xY2btyI3377DWPGjBE7FlG5YkFZgapVq4YjR46gRYsW8Pb2xsGDB8WOREREZSwuLg59+/aFo6MjfvjhB3z22WdYsGCB2LGIyhW7vEWQk5OD4cOHY//+/diwYYN8fA4RUXlglzcRvQm7vCsxbW1tbN++HSNHjsTIkSOxfPlysSMRERERfTDOQykSDQ0NrF27FiYmJvDx8UFaWhrmzp0rdiwiUmHskCKi15XV7wUWlCJSU1PD77//DlNTU3z11VdIS0vDokWL5HOVERGVBXV1dQBAbm4udHV1RU5DRMokKysLAOTTbX0oFpQik0gk+O6772BiYoLZs2cjLS0Ny5Ytk/8DQERUWhoaGtDT08Pjx4+hqakJNTWOdiKq6gRBQFZWFpKTk2FiYlLquoM35SgRf39/TJgwAYMHD8aGDRugpaUldiQiUhG5ublISEiAVCoVOwoRKRETExOYm5uXuneUBaWS2bVrF4YPH45OnTphx44d0NPTEzsSEakIqVSK3NxcsWMQkZLQ1NQssx5RFpRKKDg4GH379oWrqyv2798PY2NjsSMRERERvRELSiV19uxZ9OjRA3Z2dggMDETNmjXFjkRERERULBaUSuzq1avo2rUrTE1NcfjwYVhZWYkdiYiIiKgIFpRK7ubNm+jcuTMEQUBwcDDq168vdiQiIiIiBZw7QsnZ29sjNDQUenp6aNOmDSIiIsSORERERKSABWUlUKdOHZw8eRJ16tRB+/btcebMGbEjEREREcmxoKwkatasiWPHjsHZ2RldunTB4cOHxY5EREREBIAFZaViZGSEwMBAdOjQAb169cLOnTvFjkRERETEgrKy0dXVxe7duzFo0CAMGTIE69atEzsSERERVXFcy7sS0tTUxMaNG2FsbIzx48cjLS0Nn3zyidixiIiIqIpiQVlJqaurw9fXFyYmJvj000+RlpaGBQsWlHotTiIiIqL3xYKyEpNIJFi0aBFMTEwwd+5cpKam4o8//oCaGkcyEBERUcVhQakC5syZAxMTE/j4+CA9PR1r1qyBhga/tURERFQxWHWoiClTpsDY2BgfffQRMjIysGXLFujo6Igdi4iIiKoALr2oYgICAjBw4EC0atUKe/fuhYGBgdiRiIiISMWxoFRBJ0+eRK9eveDo6IiAgABUq1ZN7EhERESkwlhQqqhLly7B29sb5ubmOHz4MCwsLMSORERERCqKBaUKu3HjBjp37gxtbW0EBwfD1tZW7EhERESkgji/jApr2LAhQkNDoaamBi8vL1y/fl3sSERERKSCWFCquLp16+LUqVOoUaMG2rZti7CwMLEjERERkYphQVkFmJub4/jx42jQoAE6duyI48ePix2JiIiIVAgLyirC1NQUR44cgaenJ7y9vbF//36xIxEREZGKYEFZhejr62P//v3o2bMn+vfvj82bN4sdiYiIiFQAC8oqRltbG9u2bcPo0aPx0UcfwdfXV+xIREREVMlx6cUqSENDA6tXr4aJiQmmT5+O9PR0zJ07FxKJROxoREREVAmxoKyi1NTUsHTpUpiamuLrr79GamoqFi9ezKKSiIiI3hsLyipMIpHg22+/hbGxMT7++GOkpaXBz88P6urqYkcjIiKiSoQFJWHWrFkwMTHB+PHjkZ6ejo0bN0JLS0vsWERERFRJcOlFktuzZw+GDh2Kjh07YufOndDT0xM7EhEREVUCLChJQXBwMPr16wcXFxccOHAAxsbGYkciIiIiJceCkoo4d+4cevTogbp16yIwMBBmZmZiRyIiIiIlxoKSihUZGYmuXbvCyMgIR44cgbW1tdiRiIiISElxYnMqVpMmTRAaGorc3Fx4eXkhNjZW7EhERESkpFhQ0hvVq1cPoaGhMDAwQJs2bRAeHi52JCIiIlJCLCjprWrXro2TJ0/C2toa7du3x+nTp8WOREREREqGBSW9U40aNXD06FE0a9YMXbp0QVBQkNiRiIiISImwoKQSMTIywqFDh9CpUyf07t0bO3bsEDsSERERKQkWlFRiurq62LVrFwYPHoyhQ4dizZo1YkciIiIiJcClF+m9aGpqYuPGjTAxMcHEiRORlpaGzz77TOxYREREJCIWlPTe1NTU8M8//8DExASff/450tLSsHDhQkgkErGjERERkQhYUNIHkUgk+Omnn2BiYoIvv/wSaWlp+PPPP6GmxlEUREREVQ0LSiqVL774AiYmJpgyZQrS0tKwdu1aaGpqih2LiIiIKhALSiq1SZMmwcjICKNGjUJGRga2bdsGHR0dsWMRERFRBeFa3lRmDh06hIEDB8LT0xN79uyBoaGh2JGIiIioArCgpDJ16tQp9OrVCw0bNkRAQACqV68udiQiIiIqZywoqcxdvnwZ3t7eMDMzw+HDh2FpaSl2JCIiIipHLCipXMTExKBz587Q1NREcHAw7OzsxI5ERERE5YRzvFC5cHBwQGhoKDQ0NODl5YWoqCixIxEREVE5YUFJ5cbGxganTp2CmZkZ2rZtiwsXLogdiYiIiMoBC0oqV7Vq1cLx48fRqFEjdOrUCSEhIWJHIiIiojLGgpLKnYmJCYKCgtCqVSt0794d+/btEzsSERERlSEWlFQh9PX1sW/fPvTu3RsDBgzApk2bxI5EREREZYQFJVUYbW1tbN26FWPHjsVHH32Ef/75R+xIREREVAa49CJVKHV1daxatQrGxsaYOXMm0tPT8fXXX0MikYgdjYiIiD4QC0qqcBKJBEuWLIGpqSm++eYbpKam4rfffmNRSUREVEmxoCRRSCQSfPPNNzAxMcHMmTORlpaGFStWQF1dXexoRERE9J5YUJKoZsyYAWNjY4wbNw4ZGRnYtGkTtLS0xI5FRERE74FLL5JS2Lt3L4YMGYIOHTpg586d0NfXFzsSERERlRALSlIax44dQ58+fdCsWTMcOHAAJiYmYkciIiKiEmBBSUrlwoUL6N69O6ytrREYGIhatWqJHYmIiIjegQUlKZ1r166ha9euMDAwQHBwMKytrcWORERERG/Bic1J6TRu3BinTp1Cfn4+vLy8EBMTI3YkIiIiegsWlKSU6tWrh9DQUBgaGqJNmza4cuWK2JGIiIjoDVhQktKytLTEyZMnUbduXbRv3x6hoaFiRyIiIqJisKAkpVa9enUcPXoUrq6u6Nq1KwIDA8WORERERK9hQUlKz9DQEAEBAejSpQv69OmD7du3ix2JiIiIXsGCkioFHR0d7NixA0OHDsWwYcOwatUqsSMRERHR/+PSi1RpaGpqYv369TA2NsbkyZORnp6Ozz//XOxYREREVR4LSqpU1NTU8Pfff8PExARffPEFUlNT8eOPP0IikYgdjYiIqMpiQUmVjkQiwY8//ghTU1N8/vnnSEtLw99//w01NY7gICIiEgMLSqq0PvvsM4Xu73Xr1kFTU1PsWERERFUOC0qq1CZOnAhjY2OMHDkSGRkZ2L59O3R0dMSORUREVKVwLW9SCYGBgRgwYABatGiBffv2wdDQUOxIREREVQYLSlIZoaGh6NWrF+rXr49Dhw6hRo0aYkciIiKqElhQkkoJDw9H165dUbNmTRw+fBi1a9cWOxIREZHKY0FJKicmJgZdunSBhoYGjhw5gnr16okdiYiISKVxnhVSOQ4ODggNDYWmpia8vLxw7do1sSMRERGpNBaUpJKsra1x6tQpmJubo23btjh//rzYkYiIiFQWC0pSWWZmZggJCYGjoyM6deqEo0ePih2JiIhIJbGgJJVmYmKCw4cPo02bNujRowf27NkjdiQiIiKVw4KSVJ6enh727t2Lvn37YtCgQdiwYYPYkYiIiFQKC0qqErS0tLBlyxaMGzcOY8aMwd9//y12JCIiIpXBpRepylBXV8fKlSthYmKCWbNmIS0tDd988w0kEonY0YiIiCo1FpRUpUgkEvz6668wNTXFvHnzkJqaiqVLl7KoJCIiKgUWlFTlSCQSfP311zAxMcH06dORnp6OlStXQl1dXexoRERElRILSqqypk2bBiMjI4wdOxbp6enYvHkztLW1xY5FRERU6XDpRary9u3bhyFDhqBdu3bYtWsX9PX1xY5ERERUqbCgJAIQEhKCPn36wNnZGQcOHICpqanYkYiIiCoNFpRE/y8sLAze3t6oU6cODh8+jFq1aokdiYiIqFJgQUn0iqioKHTp0gUGBgY4cuQIbGxsxI5ERESk9DixOdErnJycEBoaioKCAnh5eeHGjRtiRyIiIlJ6LCiJXmNnZ4fQ0FAYGxujTZs2uHz5stiRiIiIlBoLSqJiWFhY4MSJE7Czs0OHDh1w6tQpsSMREREpLRaURG9QvXp1BAcHw83NDV27dkVAQIDYkYiIiJQSC0qitzA0NMTBgwfRrVs39O3bF9u2bRM7EhERkdJhQUn0Djo6OtixYweGDx+O4cOHY+XKlWJHIiIiUipcepGoBDQ0NODv7w9jY2NMmTIFaWlp+PLLL8WORURUaQkCkJMj+6+ODiCRiJ2ISoMFJVEJqamp4a+//oKpqSnmzJmD1NRU/Pzzz5DwtyAR0TsJAhAWBmzbJvvvlStAZqZsn5YW4OwMuLkBffoA3boBauxDrVQ4sTnRB/jf//6Hzz77DFOnTsWyZcugxt98RERvtHcv8MMPwKVLJWtvZwd89hkwdSoLy8qCBSXRB1q7di0mTZqEYcOGwd/fH5qammJHIiJSKk+eADNnAlu3Ft1naZkLC4s8SCTAkycaSEzUgiAo9vh4eQHr1gH29hUUmD4YC0qiUtixYwdGjBiBbt26Yfv27dDV1RU7EhGRUkhIADp3Bm7dernN0fEFhg17ivbtn8HUtECh/fPnajh3Th/bt1fDmTOG8u3GxsDBg0Dr1hWVnD4EC0qiUgoKCsKAAQPg7u6Offv2wcjISOxIRESiun9fVgAmJsqeGxnl46uvktCzZ3qJbr4JC9PHt9/Wxv37WgAAAwMgJEQ2xpKUEwtKojJw5swZ9OjRA/Xr18ehQ4dQo0YNsSMREYlCKpVdmQwJkT23s8vGihW3YW6e/17nycpSw+zZ1jh71gAAULcuEBkpKy5J+bCgJCoj4eHh6NatG6pXr44jR46gdu3aYkciIqpwfn7AtGmyx+bmufj331uoWfNlMRkWFobx48cXe+ymTZvQtGlT+fPsbAkmTaqL8HB9AICPD+DrW37Z6cOxoCQqQ7GxsejSpQvU1NRw5MgR2HMkORFVIVlZQJ06QGqq7PnKlQnw9Hyu0KawoBw5ciScnJwU9nl5ecHU1FRh2927mhg4sD5evFCDRALExAD165fr26APwJvxicpQgwYNEBoaCm1tbXh5eSEyMlLsSEREFWbbtpfFZPfuaUWKyVc1b94cvXv3Vvh6vZgEACurPEyenAxANpfl8uXlEp1KiQUlURmzsrLCyZMnYWlpiXbt2uHcuXNiRyIiqhBr1758PGrU03e2f/78OfLz3z22cuDAVGhpSQEA/v6ycZqkXFhQEpUDMzMzhISEwMnJCZ07d0ZwcLDYkYiIylVenmwFHACoUycXTZq8eGv7b7/9Fi1btoSbmxvGjx+PqKioN7Y1NS1A69ayZXVSUoDY2DKLTWWEBSVROTE2NkZQUBDatm2Lnj17Yvfu3WJHIiIqN9evy9bmBoAmTbLeOD2QpqYmunTpgrlz5+Kvv/7CjBkzEBcXhzFjxiA6OvqN52/c+GWBWtIVd6jisKAkKkd6enrYs2cP+vfvj0GDBmH9+vViRyIiKpWlS5diyZIluHv3rsL2wjknAcDOLueNxzdr1gz/+9//0L9/f3To0AETJ07E5s2bIZFI8Oeff77xuHr1sot9LVIOLCiJypmWlhY2b96MCRMmYOzYsfjrr7/EjkRE9MGWLFmCL774AtbW1vDy8sKKFSvw9OlT5OW9bKOl9X4TyFhbW6NDhw64cOECCgoKim3z6jlffS1SDhpiByCqCtTV1bFixQqYmpri448/RmpqKubPnw9JSZaMICJSInXr1sXDhw8BAKdPn8bp06cxdepU6OoOALATAJCZ+f7Xq8zNzZGXl4cXL17AoJjZyzMz1eWPucqt8mFBSVRBJBIJFi9eDFNTU3z11VdIS0vD0qVLoabGjgIiUi5SqRTXr1/H+fPncfXqVcTExODOnTtITk5GSkpKsce8eHFF/jgmRue9X/PevXvQ1taGnp5esftv3Hh5zoYN3/v0VM5YUBJVsLlz58LY2BjTp09HWloaVq1aBQ0N/q9IRBVHKpUiPj4e58+fx5UrVxATE4PExEQ8evQI6enpyM3NVWgvkUigo6MDU1NT1KpVC48ePcKr66IMGjQIy5b5omFD2TyUkZF6yMsDNDWLvnZKSgqqVaumsC0mJgYhISHw8vJ64x/Z4eEvC01X11K8eSoX/FeMSAQ+Pj4wNjbG6NGjkZGRgX///Rfa2tpixyIiFXLnzh2cPXsW4eHhiI6Oxu3bt/Hw4UOkp6cjOzu7SHsdHR0YGxvD0dERNjY2aNiwIVxcXNCyZUvY2NjI261btw7jx4+Huro6jIyMsHLlSgwaNAgA0KEDsGsXkJqqgePHjdClS0aR1/niiy+gra2NZs2aoVq1aoiPj8fOnTuhq6uLTz75pNj3cuuWNi5fli2/WL8+YGlZFp8QlSUWlEQiGTFiBAwNDTF48GD07t0bu3fvhr6+vtixiKiSePjwIc6ePYvLly8jOjoaCQkJSEpKQlpaGrKzs/H6yspaWlowNjaGvb29vGBs2rQpPD09YWdnV+LhN4XLJfbr1w9+fn6oWbOmfN+UKbKCEgD8/WugU6cMvH7ajh074uDBg9iwYQOeP38OU1NTdOrUCT4+PrC2ti72Nf39a8gfT52KN05JROLhWt5EIjt+/Dh69+6NJk2a4ODBg8UuPUZEVU9KSgrOnDmDy5cv4/r167h16xYePHiA1NRUvHjxokjBqKmpCSMjI5iZmcHGxgYNGjRA06ZN4eHhAUdHxzIdr3337l1YWVkV2S6VAk5OwI0bsudz5iSVaMWctzl7Vh+TJ9sCAPT1gTt3gNd6zEkJsKAkUgIXL16Et7c3ateujaCgIJibm4sdiYjKWUZGBs6fP4+LFy8iKioK8fHxePDgAVJSUpCVlQXpa+sLamhowNDQEDVr1oS1tTUaNGgAZ2dneHh4oEmTJkozFvv4cVnXNwBoa0uxYsVtuLpmfdC57t3TxOjRdnj8WDYYc9kyYNq0MgpKZYoFJZGSuH79Orp06QI9PT0cOXIEdevWFTsSEZVCVlYWLly4gEuXLiEyMhLx8fG4f/8+njx5gufPnxcpGNXV1WFgYICaNWuiTp06qF+/Ppo0aQJ3d3c0b94cWlpaIr2T9zdrFvD337LHuroF+PXXe2jf/tl7nSM6WgczZ9rg0SNZMdmxI3DkCIp0oZNyYEFJpEQSEhLQpUsXZGdn48iRI2jUqJHYkYjoDXJzc3Hp0iVcuHAB165dw82bN3H37l08efIEmZmZRSboVldXh76+PqpXr446derA3t4ejRs3hru7O1xdXd84XU5llJsL9O0LBAa+3NavXypmz36I6tWLn7i8UFaWGtatq4HVq2siP182WLJRI+DkSaBGjbceSiJiQUmkZJKSktC1a1c8fPgQgYGBcOX8GESiyM/PR3h4OMLCwhAZGYnY2FjcvXsXjx8/xrNnz5Cfn6/QXk1NDXp6eqhevTpq166NevXqwcnJCW5ubnB3d4eRkZFI70Qc2dnAiBHA7t0vt2lqStG1awbat8+Ak1M2LC1zIZEAjx9r4Pp1XZw9a4D9+00UJjF3dQUCAgAzMxHeBJUYC0oiJZSSkoIePXrg+vXrOHDgANq2bSt2JCKV8+rk3REREYiNjUViYiIeP36MjIwM5L22vp9EIoGenh5MTU1haWkJOzs7ODk5oXnz5mjZsmWRuRUJEARg9Wrgs8+AZ+/X4w11deDrr4FvvgEqUW9/lcWCkkhJZWZmol+/fjh9+jR27NiBnj17ih2JqFIpnLz77NmziIiIwI0bN3Dnzp0STd5tYWEBW1tbNGrUCK6urmjRogVvliuFe/dkYyrXrAGevuOmbx0dKUaOVMPs2UDjxhUSj8oAC0oiJZadnY3hw4fjwIED2LhxI4YNGyZ2JCKlkpiYiHPnzuHy5cuIiYlBQkICHj16hLS0NOTk5BRpr6OjAxMTE5ibm6Nu3bpo1KgRmjVrhpYtW75xDkQqO9nZQHAwcPEicOUK8OSJbKohY2PAxiYVK1dOgb39bcTGnoeEk01WKiwoiZRcfn4+JkyYgI0bN8LPzw9TpkwROxJRhXnw4IF8tZfr16/j9u3bb528W1tbG8bGxqhVq5Z88u7CgtHW1rZM52KkshUcHIwuXboAAL799lssXLhQ5ET0PpRj0ioieiMNDQ2sW7cOJiYmmDp1KlJTUzF37lyxYxGViSdPnuDcuXO4dOkSoqOjER8fj6SkpDdO3q2lpQUjIyPY2trC2toaDRs2hLOzM1q2bAkHBwcWjJXYpUuX5I9/+OEH1K5dm39AVyIsKIkqATU1Nfzxxx8wNTXFV199hbS0NCxatIhdQqT0MjIy5MsDXrt2Dbdu3cL9+/eRkpKCFy9eFJmLUVNTE4aGhrCysoK1tTXq168vX+2lcePGSjN5N5W9ixcvQiKRyP+I8PHxgbm5Ofr27StyMioJdnkTVTJ//PEHPvnkE0ydOhX//PMP1NXV330QUTkpnLw7LCwMUVFRuHnzJu7fv4+nT58WO3m3hoaGwuTdDRo0QOPGjdGiRQs0bdq0Uk3eTWXL2toad+/eVdimpaWFCxcuoGnTpiKlopLin3pElczs2bNhbGyMiRMnIi0tDRs2bICmpqbYsUhF5ebmIiwsDBcvXkRkZCRu3ryJe/fuvXPy7po1a8LFxQX16tWDs7Mz3Nzc4ObmBh0dHZHeCSmz1NTUIsUkABgZGeHZ+843RKJgQUlUCY0bNw7GxsYYNmwYnj17hv/++w+6urpix6JKqHDy7gsXLihM3v3kyZN3Tt7t5OQEe3t7ODk5yafWMTAwEOmdUGVWWExqaWnBxMQEycnJCAkJQbt27Ti0p5JglzdRJXb48GH0798fbm5u2L9/v8JKHFKplDcoEKRSKa5du4YLFy7IJ+++c+cOkpOT8ezZszdO3l2tWrUik3d7enrCxMREnDdCKk0QBFy/fh316tVDSEgIevToge+//x7z588XOxqVEAtKokruzJkz6NmzJ+zs7BAYGAhDQ0P069cPABD46kK6pJKkUini4uJw7tw5REREICYmBomJiUhOTn7r5N3VqlWDubk57Ozs4OjoKF/txYzr25HIpFIptLS04OzsjMuXL4sdh0qIBSWRCrh69Sq6du0KExMTWFtb48iRIwCAmzdvol69eiKno9JKSEjAuXPnEB4ejhs3buD27dt4+PAh0tPT3zp5t4WFhXzybhcXF7Rs2RJ16tQR4R0QvR8HBwckJiYiOztb7ChUQiwoiVRETEwMmjdvjqysLACymyPmzZuH77//vkhbQQCuXQPCwoDLl4HkZKCgADAyApo0AVxdgZYtAW3tin4XVVPh5N2XL19GdHS0vGBMTU0t9h/UVyfvtrW1hYODA5o2bYpWrVrBxsaGQx2o0ps9ezb+/PNPhIWFwc3NTew4VAIsKIlUgCAI+Pjjj/H3338rbK9Tpw4SExPlBcbz58DGjYCvLxAZ+fZz1qgBTJgA+PgANjbllbxqePz4sbxgvH79OhISEko0eXetWrVgbW0NBwcHNGvWDB4eHpy8m6qE2NhYODg4YMKECVi9erXYcagEWFASqYCgoCB4e3sXu+/48eNo164dQkKA8eOB27ff79w6OsCPPwKzZwOc8rJ4aWlp8tVerl+/jvj4eDx48AApKSnIysoqUjAWTt5tZmYGKysrODg4oEmTJmjZsiUcHR05eTcRIJ9+6vb7/tIiUbCgJFIBWVlZ+P3337Fnzx5cunRJoYDp0qUr3NyCsGiR4jG2Hg/QpGc8rJolo4ZtOiQSAc+e6OJehBliT1rh6n57FOS9rCDbtgX27AFMTSvoTSmR58+f4/z587h48aLC5N0pKSnvnLzbysoK9evXh7OzM9zd3eHi4sKCkagEWrdujbNnzyI3N5f/z1QCLCiJVMzjx49x6NAh7N69G/v27Yea2l/Iz58m31+v1T30X3QCdZo8eet5niXr4cj/3HBqVTMIgmweuGbNgJAQQNVmjsnOzsbFixeLnbz7+fPnxU7ebWBggOrVq6NOnTqoX78+GjduDHd3d7i6unLybqIyULgq2L///ovhw4eLHYfegQUlkQpbujQfn38u+8teIhHQ5/tTaDftCt5nCF78WUusG9sTmY/1AACdOwOHDwOVaa7hvLy8IpN337t3D48fP0ZmZmaxk3fr6+ujWrVqqFOnDurVq4fGjRvD1dUVHh4enLybqAJkZGTA2NgY3t7eOHTokNhx6B1YUBKpqNhYoGlToPAm4RG+QfAYduODzvUozgR/9xosLyr9/ICpU8sqaelJpVJERkbi/PnzuHr1qnzy7sePH5do8u569erB0dERbm5uaNGiBSfvJlIStWrVQl5eHlJSUsSOQu/AgpJIRXXpAgQHyx63nXwFA345WWy7uxF3Ebg4EAnnEpCXk4fqNtXhOcYT7aa0U2h345g1lg/qDwAwMAASEmR3gr8uLy8P6urqZXonslQqRUxMDM6fP19k8u6MjIxiJ+/W1dWFqakpLCwsYGtrCycnJ/lcjJy8m6hyGDBgAHbv3o3Hjx+jRnG/cEhpcJQrkQqKinpZTFazTkfPb88U2+7GsRtYNWIV6jSpg66fd4W2vjae3H6C9AfpRdo27HgHLUZG4fxmJ2RmAmvXAl9++XK/VCrFihUr8Pnnn+N///sfpkyZUuK8UqkUt2/fxtmzZ+WTdycmJuLRo0dvnbzb1NQUTk5OsLW1RcOGDdG8eXO0aNGCk3cTqYjx48dj9+7dWL58Ob755hux49Bb8AolkQqaORP45x/Z4/6LjqPdlIgibbIzsvGTx0+w9bDFWP+xJbqi+PiWMX5yGwsAqFsXiI8H1NSAxMREjB07FsePHwcATJo0CStXrlQ49u7duzh37hyuXLmiMHl3WlraWyfvNjc3R926ddGwYUM0a9YMLVu2hK2t7Xt9HkRUORUuw9i0aVNcunRJ7Dj0FrxCSaSCgoJk/9XQzofH8Ohi21zaeQnPkp+hx7weUFNTQ87zHGjqar61sKxpl44G7e4g9oQ1bt8G4uIEHD++Ep988olCt/PevXtx+fJl+eTd2dnZxU7ebWxsjHr16sHGxka+2kvLli1Rv359Tt5NRFBTU4OdnR2ioqLEjkLvwIKSSMWkpwNxcbLHlk5PoGuUW2y72OOx0DHUQXpSOtZ8tAaPbz6Glr4W3Ia4of9P/aGpo1nscXae9xF7whoA4Ow8Drm564u0SU5ORmpqKgwNDWFjYwNra2s0aNAATZs2hYeHBxo3bsyCkYhKxNvbG3///TcuX76M5s2bix2H3oAFJZGKufHKjdx1nB+/sd3jW48hLZBizag1aDGqBXp92ws3T9/EqZWn8CL9BcasHlPscVZNX56zoKBhsW00NDSQnZ3NopGISm369On4+++/4efnh1WrVokdh96Av+2JVExm5svHeqZFxyYWynmeg9ysXLgNdcPAXwaiae+mGPjLQLQa2wpXdl3B4/jii1E90xfyxzNnzsXDhw/x999/w93dXb49Pz8fDx48KP2bIaIqz8HBAXp6egguvNOQlBILSiIV8+oKZQV5b/5fvLBL23Wgq8J210Gy5wlhCcUe9+pyjBoasnniZsyYgQsXLuDWrVv4+eefMWDAAFSvXv1D3wIRkQJnZ2ckJiYWWYSAlAcLSiIVU7v2y8eP403e2M7Y3BgAYFBTcdUXgxqy5y/SXhQ5BgCS414u5m1pqbjP1tYWX331FXbu3AldXd33SE1E9GaDBw+GIAjYuXOn2FHoDVhQEqkYOzvAyEj2+G54LbxpYrA6zWRzNaYnKc45mf5Q9rywsHzd3fCXk4K7uhbbhIioTE2cOBEAsGHDBpGT0JuwoCRSMWpqQIsWssfpSQZIOG9ZbDuXfi4AgPObzitsP7fxHNQ01GDf2r7IMfk56ogMqAdA1t3t4lKGwYmI3sDIyAg1a9bEuXPnxI5Cb8C7vIlU0OjRwJEjsscnVzaFXcuiN8jUca6DFiNb4Pzm8yjIL4B9a3vcDL2J8L3h6PxJZxhbGBc5JnyvPTKfyNbzHjgQMDQs17dBRCTXunVr7NmzB0+fPuUYbSXEK5REKmjQoJfrbIfvaYDYk8UvRTjkf0PgPccbiZcSsfvr3bgXeQ/9fuqHXt/2KtI2K00b+7/3kj/38SmX6ERExRo3bhwAYMWKFSInoeJw6UUiFbVsGTBjhuyxqVUGPju6FQY1ir/R5l2kUmDjZG9c2eUAAPD2BgICAImkrNISEb2dVCqFpqYmXFxccPHiRbHj0GtYUBKpKKkU6NRJwPHjsqrPsvFj+OzcDcOa71dUSqXAji864Mw6ZwCyG36iooA6xV/0JCIqN/Xr18e9e/fw4sWH/XFM5Ydd3kQqRhAExMXF4Z+//kDCpSbQxj0AwINrNbGk/QhEHa5b4nM9vmWMf3oPkheTamqAvz+LSSISh7e3N7KzsxEeHi52FHoNr1ASqYDc3FwcOnQIgYGBOHDgAO7duyff52nrjbt5h/DKJjh0SESbiRFo1DkR6ppShXMJAnD/Wg2cWeeMsG0NkfdCNgG6ujqwfj0wcmSFvCUioiKio6Ph6OiIKVOmYPny5WLHoVewoCRSAb/++ivmzJkDdXV1FBQUKOy7cuUKatZshokTgcBAxeM0tPNh6fQE1eumQ01NwLPHerh3tSayUhUnJa9bV3Zlsl278n0fRETvoqenB3Nzc9y6dUvsKPQKFpREKuDhw4fw8PDAgwcP5AWlBEATZ2dEREQAkF15XL8eWLgQSCh+VcUiDAyAyZOB77+XPSYiElvLli0RFhaG3NxcqKurv/sAqhAcQ0mkAszNzbH+l18gvHp1UiLB1KlTX32KsWOBuDjg4EFZ13WDBkXPVasW0KMH4OsLPHgALF3KYpKIlMfgwYMhlUqxa9cusaPQK3iFkkgFRGzfDu/hw6EhkeCJhgayc3KgpaWFR48ewcTE5K3HPnsGPHkCFBTI7uCuWZPTARGR8srIyICxsTF69uyJAwcOiB2H/h+vUBJVcif+/BNthw6FpbY2LoaHY/+BA9DQ0MCQIUPeWUwCstVubG0Be3vAzIzFJBEpNyMjI9SoUQNnz54VOwq9glcoiSqxXV9+iRG//QYvU1PsioyEUe3aAGR3QtauXRtGRkYiJyQiKnv9+vXD3r178fTpU1SrVk3sOAReoSSqtJaPGIHBv/2GvlZWOHj7tryYBIBGjRqxmCQilTV27FgAwMqVK8UNQnK8QklUyQhSKRZ26oQFx49jprMz/rh0CWoaGmLHIiKqMIXLMDZv3hxhYWFixyGwoCSqVApyczHDxQXLr1/HT1274qtDhyBRY0cDEVU99vb2uH//PpdhVBL8l4iokshOS8MQW1usvH4dq8eMwddBQSwmiajK6tatG7Kzs3H16lWxoxBYUBJVCul37sC7bl0EPHiA3V9/jQn+/mJHIiIS1bRp0wAAfn5+IichgF3eREovKTwc3T09kZiTg/3//AOv//8lSkRU1enp6cHCwgLx8fFiR6nyeIWSSInFHTmCVu7ueJKbi1O7drGYJCJ6RZMmTXD79m1IpVKxo1R5LCiJlNTFDRvQuls3aKup4UxoKBr36yd2JCIipTJw4EBIpVLs3r1b7ChVHgtKIiV05Jdf0H7MGNjp6SE0MhLWnp5iRyIiUjqTJ08GAPhzXLnoOIaSSMlsmTkTY/75B51r1sR/165B38xM7EhEREqrRo0aAIAnT56InKRq4xVKIiXy54ABGPHPPxhuZ4e9t2+zmCQiegdPT088ffoUaWlpYkep0lhQEikBQSrFV56emL17N77w8IB/XBw09fTEjkVEpPS4DKNyYJc3kcjys7Mx2dkZ6+LisLRPH3y6d6/YkYiIKo3CZRhdXV1x4cIFseNUWSwoiUSU9eQJhjo5ITA5GeumTsUoTtBLRPTe6tWrh6SkJGRlZYkdpcpilzeRSFLi49HZ1hYhycnY/+OPLCaJiD5Q165d8eLFC1y7dk3sKFUWC0oiEdw9fx5ejo6Iff4cx9auhfe8eWJHIiKqtHx8fAAAvr6+IieputjlTVTBru/bh24DBkAdQND+/XDo3l3sSERElZ6uri5q166Nmzdvih2lSuIVSqIKdHblSnj16wdTTU2cuXCBxSQRURlp3LgxEhISuAyjSFhQElWQA/Pno9OUKWhsaIiTN27AsnlzsSMREamMwmUY93KmDFGwy5uoAvhPnIiJa9agt4UF/r12DbrVqokdiYhIpaSkpKB69ero06cPi0oRsKAkKkeCVIpfe/bE3MBATG7YEMvCw6GhrS12LCIilVS9enWoqanh8ePHYkepctjlTVROpPn5+NTNDXMDA/FtmzZYHhXFYpKIqBx5enriyZMnXIZRBCwoicpBbmYmRtnb488rV7Bs6FAsPHkSEjX+70ZEVJ4Kl2FctWqVuEGqIHZ5E5WxZw8eYGCTJjiRkoLNn36KQUuXih2JiKhKKFyG0d3dHefOnRM7TpXCgpKoDCVHRaGnhwdisrKwd+lSdPj0U7EjERFVKXZ2dnj48CGXYaxg7IMjKiMJJ0/Cy8UFd7OzceLff1lMEhGJoHAZxqioKLGjVCksKInKQMT27WjVoQOkgoAzR4/CZfhwsSMREVVJU6dOBQAsX75c5CRVC7u8iUrpxJ9/os/s2bDX1UXAhQuo1bix2JGIiKo0XV1d1KlTB3FxcWJHqTJ4hZKoFHZ9+SW6zZ4Nd1NTHL95k8UkEZEScHJywq1bt7gMYwViQUn0gZaPGIFBv/2GflZWOHj7NgwtLcWOREREeLkM4/79+8WOUmWwoCR6T4JUiu87dIDPli2Y4eyMf2/dgraRkdixiIjo/02ZMgUA4O/vL26QKoRjKIneQ0FuLma4uGD59ev4uWtXzD10iBOWExEpoerVq0NdXR3JycliR6kS+C8hUQllp6VhiK0tVl6/jtVjx+KroCAWk0RESqply5Z4/PgxMjIyxI5SJfBfQ6ISSL9zB9516yLgwQPs/vprTFi3TuxIRET0FqNHjwYArF69WuQkVQO7vIneISk8HN09PZGYk4MDy5ahtY+P2JGIiOgd8vPzoaWlhRYtWuDs2bNix1F5LCiJ3iLuyBF07dEDeVIpgnbtglPfvmJHIiKiErK1tcWjR4+4DGMFYJc30Rtc3LABrbt1g7aaGs6EhrKYJCKqZLp06YIXL14gOjpa7CgqjwUlUTGO/PIL2o8ZAzt9fYRGRsLa01PsSERE9J4Kl2H08/MTOYnqY5c30Wu2zJyJMf/8g841a+K/a9egb2YmdiQiIvpAOjo6sLa2RmxsrNhRVBqvUBK94s8BAzDin38wvF497L19m8UkEVEl17hxY8THx3MZxnLGgpIIstVvvvL0xOzdu/GFhwf8Y2OhqacndiwiIiqlfv36QSqVIiAgQOwoKo0FJVV5+dnZGO/ggF/OncPSPn3w6/nznLCciEhFFI6jXLt2rchJVBvHUFKVlvXkCYY6OSEwORnrpk7FKA7cJiJSOdWqVYOGhgaXYSxHvAxDVVZKfDw629oiJDkZ+3/8kcUkEZGKatGiBZdhLGcsKKlKunv+PLwcHRH7/DmOrV0L73nzxI5ERETlpHAZRnZ7lx92eVOVc33fPnQbMADqAIIOHICDt7fYkYiIqBwVLsPYsmVLnDlzRuw4KolXKKlKObNiBbz69YOppibOXLjAYpKIqArQ0NCAtbU1IiIixI6islhQUpVxYP58dJ46FY0NDXHyxg1YNm8udiQiIqognTt3RlZWFmJiYsSOopJYUFKVsG78ePT74Qd0s7BA0O3bMLGxETsSERFVoMLpg3x9fUVOopo4hpJUmiCVYnGPHvgqKAiTGzbEsvBwaGhrix2LiIhEoKOjAxsbG16lLAe8QkkqS5qfj09cXfFVUBC+bdMGy6OiWEwSEVVhjo6OXIaxnLCgJJWUm5mJUfb2+Cs8HMuGDsXCkye5+g0RURXXr18/FBQUIDAwUOwoKof/wpLKefbgAXrZ2GBnYiK2f/oppm3dKnYkIiJSAoXjKNesWSNyEtXDMZSkUpKjotDTwwMxWVnYu3QpOnz6qdiRiIhIiZiamkJLSwuPHj0SO4pK4RVKUhkJJ0+itYsL7mZn48SWLSwmiYioiBYtWiA5ORmZmZliR1EpLChJJURs345WHTpAAHDm6FG4DBsmdiQiIlJCH330EQB2e5c1dnlTpXf8jz/Q95NPYK+nh4Dz51GrcWOxIxERkZIqXIaxVatWCA0NFTuOymBBSZXazi++wIglS9CmWjXsjoyEoaWl2JGIiEjJ2djY4OnTp+z2LkPs8qZKa/mIERi8ZAn6W1vj4O3bLCaJiKhEOnXqhOfPnyMuLk7sKCqDBSVVOoJUigXt28NnyxbMaNoU/8bHQ9vQUOxYRERUSfj4+ADgMoxliV3eVKkU5OZierNmWBEdjZ+7dcPcgABOWE5ERO9NW1sbtra2uHHjhthRVAL/JaZKIzstDUNsbbEqOhqrx47FV4GBLCaJiOiDODo64ubNm1yGsYzwX2OqFNISE+Fdty4CHjzA7nnzMGHdOrEjERFRJVa4DGNQUJDYUVQCC0pSeknh4WjXsCEiMjIQ7OuLPj/+KHYkIiKq5ArHUa5du1bkJKqBYyhJqcUGBaFbr17Ik0oRtGsXnPr2FTsSERGpCFNTU2hra+Phw4diR6n0eIWSlFbY+vVo3b07dNTUcCY0lMUkERGVKQ8PDzx69AhZWVliR6n0WFCSUjq8aBE6jB2Levr6CL12DdaenmJHIiIiFVO4DCO7vUuPXd6kdP6dPh1jfH3RtWZNbL92DfpmZmJHIiIiFVS4DGPr1q1x6tQpseNUaiwoSan80b8/PtmzB6Pr1cPqq1ehqacndiQiIlJh1tbWSElJ4TKMpcQub1IKglSKuS1b4pM9e/Blixbwj41lMUlEROWucBnG+Ph4saNUaiwoSXR5WVkY7+CAxefP4399+2LxuXOcsJyIiCrE1KlTAXAZxtJilzeJKuvJEwxxckJQcjL8fXwwkv9DExFRBdPW1oadnR2io6PFjlJp8TIQieZpXBw629rieHIyDvz0E4tJIiISRaNGjRAXF8dlGEuBBSWJ4u7582jTuDHinj/HsXXr0O3rr8WOREREVVSfPn1QUFCAo0ePih2l0mJBSRUuau9etGrdGlkFBTh96BA8xo4VOxIREVVh06ZNAwCsWrVK5CSVF8dQUoU6s2IFevn4oI62NgJPn4Zl8+ZiRyIiIoKJiQl0dXWRlJQkdpRKiVcoqcIcmD8fnadORRMjI5y8cYPFJBERKQ0PDw88fPiQyzB+IBaUVCHWjR+Pfj/8AG8LCwTdvg0TGxuxIxEREcmNHDkSAODv7y9ukEqKXd5UrgSpFIt79MBXQUGY3LAhfCMioK6lJXYsIiIiBbm5udDR0YGXlxdOnjwpdpxKhwUllRtpfj4+dXfHn+HhmN+2LRaEhHDCciIiUlrW1tZITU3Fs2fPxI5S6fBfdyoXuZmZGGVvj7/Cw+E7bBi+P3GCxSQRESm1Dh06IDMzEwkJCWJHqXT4LzyVuWcPHqCXjQ12JiZi+6efwmfLFrEjERERvdPkyZMBcBnGD8EubypTyVFR6OnhgdisLOz53//Q4ZNPxI5ERERUYtra2qhXrx6uX78udpRKRUPsAKQ6Ek6eRNfOnfGsoAAntm5Fs6FDxY5ERET0XhwcHBAdHQ2pVAo1DtUqMX5SVCYitm1Dqw4dAABnjh5lMUlERJVS3759kZ+fj5CQELGjVCosKKnUjv/xB9oOG4baOjo4feUK7Nq3FzsSERHRB/Hx8QHAZRjfF8dQUqns/OILjFiyBG2rVcOuyEgYWlqKHYmIiKhUjI2Noa+vjwcPHogdpdLgFUr6YMtHjMDgJUswwNoaBxMTWUwSEZFKcHd3R1JSEpdhfA8sKOm9CVIpFrRvD58tWzCzaVNsjo+HloGB2LGIiIjKROEyjOvXrxc5SeXBLm96LwW5uZjerBlWREdjUbdumBMQwAnLiYhIpRQuw9i2bVscP35c7DiVAgtKKrHstDSMdHLC3gcPsHLcOIxfu1bsSEREROWiTp06SE9P5zKMJcRLS1QiaYmJ8K5bFwEPHmD3N9+wmCQiIpXWsWNHZGZmIjExUewolQILSnqnpCtX0K5hQ1zNyECwnx96//CD2JGIiIjKFZdhfD/s8qa3ig0KQrdevZAnCAjauRNOffuKHYmIiKhCaGlpoX79+oiKihI7itLjFUp6o7D169G6e3foqqnhTGgoi0kiIqpSGjZsiNjYWLFjVAosKKlYhxctQoexY2Gvr49T167BumVLsSMRERFVqN69eyM/Px/Hjh0TO4rSY0FJRfw7fTp6fv012puZIfjWLVSvX1/sSERERBWucBnG1atXi5xE+XEMJSn4o39/fLJnD8bUq4dVV69CU09P7EhERESi4TKMJcMrlARAtvrN3JYt8cmePZjTogXWxcaymCQioirPzc0NSUlJyM7OFjuKUmNBScjLysL4Bg2w+Px5/K9vX/xy7hxXvyEiIgIwYsQIAMCGDRtETqLc2OVdxWU9fowhTk4IevwY/tOmYeSyZWJHIiIiUhqFyzC2a9cOISEhYsdRWiwoq7CncXHo3bw5rmZmYtfPP6PrV1+JHYmIiEjp1KlTBxkZGcjIyBA7itJiv2YVdffcObRp3Bhxz58jxN+fxSQREdEbtG/fHs+ePcOdO3fEjqK0WFBWQVF796KVlxeyCgpw+tAhuI8ZI3YkIiIipVW4DKOfn5/ISZQXu7yrmDPLl6PXtGmw0tbGodOnYdm8udiRiIiIlJ6WlhYaNGiAa9euiR1FKfEKZRWy/9tv0dnHB02MjHDixg0Wk0RERCXUoEEDxMTE4J9//kHv3r2xYMECsSMpFQ2xA1DFWDd+PCatW4c+lpb4NyoKOiYmYkciIiJSeqdOncKGDRuQmJiI/Px8zJo1C4IggB28ilhQqjhBKsXiHj3wVVAQpjRqhGXh4VDX0hI7FhERUaUwc+ZMREREyJ8LggANDQ24u7uLmEr5sMtbhUnz8/GJqyu+CgrCd+3awe/aNRaTRERE72H9+vXQ19eH2isLfuTn58PV1VXEVMqHBaWKyn32DKPq1cNf4eHwGz4cC44f5+o3RERE76lp06bYt2+fQkEJgAXla1hhqKBnDx6gV9262HnnDv77/HNM/fdfsSMRERFVWh07dsTGjRvlzw0NDWFhYYGcHODZMyA3V8RwSoJjKFVMclQUenp4IDYrC0G//472s2eLHYmIiKjSGzZsGK5cScevv95DdnZbmJsDjx693G9pCbi6Aq1aAaNGAXXqiJdVDJyHUoUknDiBrl26ILOgAIFbtqDpkCFiRyIiIqr0zp0DFi4EAgOBklRN6upA377A/PlA06bln08ZsKBUERHbtsF7xAgYqKvjcHAwbNu2FTsSERFRpZaVBXzzDfDHHyUrJF+noQF8/TUwbx6g6vfEsqBUAcd//x19P/0U9fX0EHDhAsycnMSOREREVKk9egR4ewPh4aU/l5cXsH8/oMpTQLOgrOR2fv45RixdinbVqmFnZCQMLS3FjkRERFSpPX4MtG0L3LhRdJ+OUQ7cBt+AXcsHsHB8Am2DPGQ/08KDazVw83QdXN7VALnPi16OdHMDjh4FjIwq4A2IgAVlJbZ8+HBM27oVw2xs4H/tGrQMDMSOREREVKlJpUDnzkBIiOJ2De18eM89h7aTIqCll//G47MztHDs7+YI/sMd0gLFyXQGDQK2bwckkvJILi4WlJWQIJXi+w4d8P3Jk/i4WTP8LywMahq8YZ+IiKi0li0DZsxQ3Fa9bhombdkHc4fUEp/nzhUzrB7ZGxkPFS/2bN0KDB1aFkmVCwvKSqYgNxfTmzXDiuhoLOrWDXMCAjhhORERURl4/BiwtQWeP3+5zcTyGWYHbYdJ7Uz5tpzMHBz7+xgSLyXizuU7yErLwvB/hqPFiBYK53sUZ4K/ug/B8xRd+bYaNYDbtwF9/fJ+NxWLlUglkp2WhiG2tlgdHY2148ZhbmAgi0kiIqIysnq1YjEJACOWHVEoJgEgMyUTQb8F4VHsI1g2fvO9C7Xqp2HQb4p950+eAKq43girkUoiLTER3nXr4tCDB9jz7bcYt3at2JGIiIhUhlQKrFihuM118A00aHe3SFvjWsZYGL0Q3139Dn2+7/PW87r0j0PDjrcVtvn6ljat8mFBWQkkXbmCdg0b4mpGBoKXL0evhQvFjkRERKRSrl0DEhMVt7WbEl5sWw1tDRjVKvnt2m2nRCg8Dw8H7t9/z4BKjgWlkosNCkIrd3ek5OUhdM8etJoyRexIREREKufiRcXnplYZsG7+qPjG76lhx0RoG+QobLt0qUxOrTRYUCqxMH9/tO7eHbrq6jhz+jQc+7z9sjoRERF9mAjFi4iwblY2xSQAqKkLqNP0scK2spgwXZmwoFQWubkKTw///DM6jBuH+vr6CL1+HVYtWrzhQCIiIiqt1NdmBKpeN71Mz1/dRvF8aWllenrRsaBUBhcuAMbG8tu+/p0+HT3nzUN7MzMEJySgWr16IgckIiJSbUUmUSzjycfV1BRfQCot2/OLjQWlMvD1BbKzgTFj8EerVhjp64tR9vbYnZAAvRo1xE5HRESkMp4/f47g4GA8ePBAYfvrSyKm3Tcs09dNvad4PlVbgpHLq5SB5ORk7Ny5ExcuXMDly5fx6NEjSKVSGBoaokmTJnBzc0PPnj3h4uJS9OCMDGDrVggAvsrPx+KzZzG3cWP8HBHBOSaJiIjK2P79+zF8+HAAQI0aNeDh4QEPDw8A/QE4y9vdjTArs9cUhKLna9q0zE6vFFhQlkJUVBR+/vln/Pfff8jLyyuy//Hjx7h16xb27t2Lb7/9Fi1btsRnn32GgQMHQlK4kOfWrcjLycFkAP4Afgcw+/594OZNoEGDCnw3REREqu/ViztPnjxBQEAAAgICAOwH8PJW78c3TZEUXQ0WjVJK/ZrxZ2ojK1VXYZura6lPq1R4CewD5OXl4aeffoKLiwv+/fdfhWJSTV0NpnVMUc26GrQNtBWOO3fuHAYPHoy+ffvKL7Vn/fMP+gPY/P9fswHZyOCxYyvmzRAREVURycnJ2Lp1KzQ0irueFg5tbcUbZ06tfvNlxFOrTuHwksM4v/k8ACAqMAqHlxzG4SWH8SLjhULb0NXOCs8bNABsbD7sPSgrXqF8T8+ePUP//v1x9OhR+Tb9avpoMaoFnHs5w9LJElq6WgAAqVSKJ7eeIO5UHE6vPY0HUbIicv/+/Th37hy2zpuHbyIjcRXAAQBdAcDMDOjRAxg/vsLfGxERkSrJzc3Fli1bsHnzZoSFhSGtmFur1dTUoKWlhR07duDsWWP89NPLfec2NEbLUVGwdkkuctyxf44h9e7LW8OvHriKqweuAgBch7hC10h2RTImxBrhexV7HH18AEkZ3/QjNokgFLmvid7gxYsX6NatG06dOgUAkKhJ0HFWR3T7opu8iHwTQRAQeTAS/33+H54lPwMAqEkkMBIEHG7YEO7jxwPe3kDjxqr3U0ZERFRBTp06hZUrV+L48eO4f/8+CsscMzMztGnTBuPGjUN6ejpGjhwJdXV1GBoaIigoCB4eHrh7F6hXD3h1FJtZ/RTMDtoOPZOcN7zim6Un6eP3rkMVbvAxMADu3AFMTUv9VpUKC8r34OPjg+XLlwMA9Ez0MPHfibBrafde53ie8hxrPlqDW2dvAQBq166NGzduwMDAoMzzEhERqbrExET4+vriwIEDiIuLkw9DMzAwgIuLC4YMGYLx48dDT09Pfszjx49Rq1YtWFtbIzg4GPb29vJ9P/wAzJ+v+BpWzR5h0pZ9MKqVVeJcT+8YYsXgfkiOq6aw3ddXdoVS1bCgLKGjR4+ic+fOAAAtPS3MPDATVs2s5PuTopMQuDgQ9yLuISM5A1q6WqjlUAsdZ3ZEY+/GCufKyczBsn7LcOfyHQDAtGnTsGzZsop7M0RERJVUVlYW1q5di+3bt+PKlSvIzMwEAGhqaqJBgwbo2bMnpk2bBpt3DFI8deoUGjVqhBqvTc+Xlwe0bAlcvqzYXs/0BQb8cgLNB8bgbZOwFORLcG5jY+z7zgs5mYq9lx06AMHBeOvxlRULyhIQBAFNmjRBVFQUAGDgrwPRZmIbhTbXj1zHyRUnUde9LozNjZH7IhcR+yNw6+wtDPnfELQa20qh/ZOEJ/i1za/IzZKtkBMVFQVHR8eKeUNERESVhFQqRUBAANatW4fQ0FAkJ8vGM0okEtSuXRsdOnTA5MmT4eXlVWavefs20Lo18NpUlQCAGrZpaDEqCnYtH6C20xNo6ech+5kW7kfWRPyZ2ji30QlpD4rOYWlvD4SGArVqlVlMpcKCsgSOHz+ODh06AABsXG3wcdDHUCvBnxfSAimWdFiC/Jx8fH3+6yL7j/51FPsX7AcAzJgxA3///XfZBiciIqqEoqKi4Ofnh6CgICQkJKCgoAAAYGJiAg8PD4wYMQLDhw+Hltbb718ojbg4oEsXIDGx9Odq1Ag4fBioU6f051JWKnjRteytWrVK/ridT7sSFZPA/08hVNsUL9JfFLvfc4wntPRk/zOsX78eOTnvP+CXiIiosktJScGiRYvg4eEBXV1dNG7cGMuWLcPdu3fRtGlTLFy4EI8ePUJqaiqCgoIwZsyYci0mAaB+fSAsDBg8uHTnGT8eOHtWtYtJgNMGlUjhXd3aBtpw7uX81rY5z3OQl52H7IxsXDt0DdHB0XDpX8wKOQD0jPXQpGcTXPrvEp49e4arV6/C3d29zPMTEREpk/z8fGzfvh2bNm3C+fPnkZIimzxcTU0NdevWRZcuXeDj44OmIi8nU7MmsH07sHMn8P33QGRkyY91d5fd4NOtW/nlUyYsKN8hOTkZd+/eBQDUaVoHGlpv/8j2frsXZ/zPAJBNK+TcyxkDfx34xvY2rja49N8lAMDFixdZUBIRkUo6f/48li9fjmPHjuHu3bvy6Xxq1qyJvn37YsyYMejbt2+JewEr0sCBwIABwOnTwMaNwPnzwLVrwP/3xAMANDWBJk0AT0/Z2iRubqLFFQULyne4deuW/LGlo+U727eb2g5N+zRF+sN0hO8JhyAVUJBb8Mb2Fo4W8sfx8fGlC0tERKQk7t27Bz8/P+zfvx8xMTHIzZXdhKqvrw9PT08MHjwY48ePh5GRkchJS0YiAby8ZF8A8OIFkJQE5OQAOjqApSWgrf32c6gyFpTv8Oq4Rk1dzXe2r9WgFmo1kN3C5THMA34D/LBqxCp8cuSTl+t3v0Jb7+VPH8dQEhFRZZWdnY3169dj69atuHRJNpQLADQ0NGBvb48ePXpg2rRpqFevnshJy4auLmD3flNRqzQWlO+g/cqfG4VT/LyPpn2aYvun25F8Mxm16hedKyAn62URqV3MnzbPnz9Heno6LC3ffXWUiIiookilUgQHB2PNmjU4efIkHj58KN9naWmJnj17YuLEiejYsWOxF1RItbCgfIdX/5JKup703sfnZctm7M/OyC52f+H63oWvFRsbi3PnzuHs2bM4deoUoqOjoaamhqysLGhqvvsKKRERUXmJi4uDr68vAgICEB8fL5/Ox8jICB07dsTw4cMxatQo6OjoiJyUKhoLyneoWbMmrK2tcefOHdy7eg/5OfnQ0C76sT17/AyGNRUnMi3IK0DYtjBo6mrC3MG82PMnXnw5wdW0adPkj9XV1eX/o9apU4fFJBERVbiMjAysWrUKO3bswNWrV5GVJVt6UEtLC40bN0afPn0wdepU9qIRC8qSaNOmDTZv3oyczBxEHIiA60DXIm22f7od2c+yUc+zHowtjPEs+Rku/ncRyXHJ6PtDX2gbFO3OzkrLQmSAbA4CHR0dFBQUyNcgLXjl1jEDAwNcu3YNjRs3LnIOIiKisiKVSrFr1y6sX78eZ8+exdOnTwHIVqWxtrZG586dMWXKFM5IQkVwpZwSOHnyJNq1awcAsG5ujdmHZxeZ1uDyzss4t+kckqKT8DzlOXQMdFCnaR20ndwWjbsXXwgG/xGMAwsPAABmzZqFhQsXon///ggJCSm2vZaWFho0aICuXbti0qRJaNiwYRm+SyIiqoouX74MPz8/BAcH486dO5BKpQCA6tWro2XLlhg9ejQGDBgADQ1eg6I3Y0FZAoIgwNnZGdeuXQMA9F/UH+2mtCvVOR/HP8avbX9F3gvZFcno6Gg0bNgQeXl5mDp1KtauXStvGxQUhP379yM4OBjx8fHyq5ja2tpwcHCAt7c3Jk2aBHt7+1JlIiIi1ffw4UMsX74ce/fuRXR0tHyGEV1dXTg7O2PgwIGYNGkSTExMxA1KlQoLyhIKCQlBx44dAcimD5qxbwZsXG0+6FzZz7KxrO8y3A2XTZj++jregiDgl19+wddffw1bW1uFuTABICwsDGvXrsWxY8dw69Yt5OfnA5B1mzdq1Ajdu3fHpEmTULdu3Q/KR0REqiM3NxebNm3Cv//+i4sXLyI9PR2AbKx+vXr14O3tDR8fH/Z6UamwoHwP06dPh6+vLwBAx0gHk/6dhHqt3m8+rcwnmVgzag0SLiQAAOzs7HD16lXo6+sXaRsYGAgNDQ107tz5rec8c+YM/P39cezYMdy+fVs+/lJXVxeOjo7yqRusrKzeKysREVVOx48fx6pVq3D8+HEkJSXJV6UxNzeHl5cXxo8fj27duinlqjRUObGgfA/Z2dno3r07jh8/DkC2tGL7ae3RfW53aOm9fZF6QRAQsS8CO77YgcwnmQAAU1NTnDhxAk2aNCnTnCdPnoS/vz9OnDiBxMREeYGpr68PJycn9OrVCxMmTOBdeUREKiIhIQG+vr44ePAg4uLi5D1XhoaGaN68OYYOHYoxY8ZAT09P5KSkqlhQvqfnz59jwIABOHz4sHybnoke3Ie7o2nvpqjdpDa09WV3dEsLpHgU+whxp+JwZt0ZPIx5OemrmZkZgoKC0KxZs3LNK5VKERISgg0bNuDEiRO4e/eufMC1gYEBmjRpgt69e2PChAkwMzMr1yxERFQ2MjMzsWbNGvz3338IDw/H8+fPAQCamppo2LAhevXqhalTp8La2lrkpFRVsKD8APn5+Vi6dCnmz58vX5u0kERNAqNaRpCoSZCVmlXs6jr9+/eHn58fatUqunJOeZNKpTh8+DA2btyIU6dO4f79+/IC09DQEM7Ozujbty8mTJiAatWqVXg+IiIqSiqVYv/+/fD398fp06fx+PFjALLpfOrUqYOOHTtiypQp8PT0FDkpVVUsKEvhxo0b+OWXX7B169YSrcPdpk0bfPLJJ+jXr5/SLEMllUoREBCATZs24fTp07h//758rI2RkRGaNWuG/v37Y+zYsbzjj4ioAkVGRsLX1xdHjhxBQkKC/I9/U1NTtGjRAqNGjcLQoUM5nQ8pBRaUZeDp06fYuXMnwsLCcPnyZSQnJ6OgoACGhoZo0qQJXF1d0atXrzIfK1keCgoKsG/fPmzevBlnz55VGMxtYmICFxcXDBgwAKNHj4aRkZHIaYmIVMeTJ0+wYsUK7N69G1FRUcjOli3Zq6OjAycnJ/Tv3x9TpkxBjRo1RE5KVBQLSnqr/Px87Nq1C1u2bMG5c+fw6NEjeYFpamoKV1dXDBw4EKNGjYKBgYHIaYmIKo/8/Hxs3boVmzZtwoULF5CamgoAUFNTg62tLbp27Ypp06ZxlTSqFFhQ0nvJzc3Fjh07sHXrVpw/fx7JycnyfdWrV4ebmxsGDRqEkSNHQldXV8SkRETK5/Tp01i5ciVCQkJw7949+R/oZmZmaN26NcaOHYtevXpxOh+qdFhQUqnk5ORg69at2L59O8LCwuQDxQGgRo0acHd3x9ChQzF06FDo6OiImJSIqOLduXMHfn5+OHDgAGJiYuQrnenr68PFxQVDhgzBuHHj2MNDlR4LSipTL168wL///ov//vsPFy9exNOnT+X7zMzM4OHhgWHDhmHw4MHQ0nr73J1ERJVNVlYW/P39sW3bNly+fBmZmbJ5hzU0NNCgQQP07NkTPj4+sLW1FTkpUdliQUnl6vnz59i0aRN27NiBy5cvIyUlBYBsqotatWqhZcuWGD58OAYMGMA7FYmo0pFKpQgKCsLatWsRGhqKhw9l8w1LJBJYWlqiffv2mDx5Mtq2bStyUqLyxYKSKlRGRgY2bNiAXbt24cqVK0hLSwMg++Vrbm6OVq1aYcSIEejTpw8LTCJSStHR0fDz80NgYCBu3bolX43M2NgY7u7uGDlyJEaMGMFeGKpSWFCSqNLS0uDv7489e/YgPDwc6enpAF7+de/l5YURI0ZwkDoRiSYtLQ0rV67Ezp07ERkZiRcvXgAAtLW14ejoiL59+2LKlCkwNzcXOSmReFhQklJ5+vQp1q1bh7179yIiIgLPnj0DIJtGo3bt2mjTpg1GjRqFbt26scAkonKRn5+PnTt3YsOGDTh37px8qI6amhpsbGzQuXNnTJ06Fc2bNxc5KZHyYEFJSi05ORlr167Fvn37EBkZKR/grqamBisrK7Rt2xZjxoxBhw4dWGAS0QcLCwvD8uXLcfToUdy5c0c+nU+NGjXQqlUrjB49Gv379+fvGaI3YEFJlUpSUhLWrFmD/fv3IyoqCs+fPwcAqKurw9raGu3atcO4ceM4AJ6I3urBgwdYvnw59u3bh+joaOTm5gIA9PT00LRpUwwaNAgTJ07kimBEJcSCkiq1e/fuYfXq1Th48CCuX7+OrKwsALICs27duujQoQPGjx8PT09PkZMSkZiys7OxadMmbNmyBRcvXkRGRgYA2e8Ke3t79OjRAz4+Pqhfv77ISYkqJxaUpFISExOxatUqHDp0CNHR0fLB8xoaGrC1tUWnTp0wfvx4uLu7i5yUiMqTVCpFSEgIVq9ejRMnTiApKUm+z8LCAm3btsWECRPQqVMndmMTlQEWlKTS4uPjsXr1ahw6dAgxMTHIzs4GAGhqaqJevXro2LEjJk2ahGbNmokblIhKLT4+HsuWLcOhQ4dw8+ZN5OfnAwCMjIzg6uqKYcOGYfTo0Vy1i6gcsKCkKiU6Ohpr165FUFAQYmNjkZOTAwDQ0tKCvb09OnfujEmTJqFx48YiJyWid8nIyMDq1auxY8cOREREyIe8aGlpoWHDhujTpw98fHxgaWkpclIi1ceCkqq0a9euYc2aNTh8+DBu3rwpH5ivra2NBg0aoEuXLpg8eTIcHBxETkpEUqkUe/fuxfr163H69Gk8efIEgGzeWmtra3Ts2BFTpkxBixYtRE5KVPWwoCR6RXh4ONasWYPg4GDEx8cjLy8PAKCjo4MGDRrA29sbkydPRr169UROSlQ1hIeHY/ny5Thy5Ahu374NqVQKAKhWrRpatmyJjz76CIMGDeLKWkQiY0FJ9BZhYWFYt24djh49ilu3bsnHZOno6KBRo0bo3r07Jk+eDBsbG5GTEqmG5ORkLF++HHv37kVUVJR8WIquri4aN26MgQMHYtKkSahWrZrISYnoVSwoid7D2bNnsW7dOoSEhCAhIUG+hq+uri4cHR3Rq1cvjB8/HtbW1iInJaoccnNzsWXLFmzevBlhYWFIS0sDIJvOx87ODt26dcPUqVPh5OQkblAieisWlESlEBoairVr1+LEiRNITEyUF5j6+vpwcnJCz549MXHiRN4UQPSKkydPYuXKlTh+/DgePHggX5WmVq1a8PLywvjx4+Ht7c3pfIgqERaURGVEKpXi+PHjWL9+PU6cOIG7d+/Kx3sZGBigcePG6NOnDyZMmAAzMzOR0xJVnMTERCxbtgwHDx5EbGysfOiIgYEBmjdvjqFDh2Ls2LHQ09MTOSkRfSgWlETlRCqV4siRI9iwYQNOnTqF+/fvywtMQ0NDODs7o2/fvpgwYQLHg5FKycrKwtq1a7F9+3ZcvnxZvkSqpqYmGjRogF69esHHx4djj4lUCAtKogoilUoREBCATZs24fTp07h//768q8/IyAjNmjVD//79MXbsWJiYmIgblug9FP5sr1u3DqGhoUhOTgYgm86ndu3a6NChAyZPngwvLy+RkxJReWFBSSSSwjn1Nm/ejLNnzyIpKUleYJqYmMDFxQUDBgzA6NGjYWRkJHJaIkVRUVHw9fXF4cOHcevWLfnVdxMTE3h4eGDUqFEYNmwYNDU1RU5KRBWBBSWRksjPz8euXbuwdetWnD17Fo8ePZIXmKampnB1dcXAgQMxatQoGBgYiJyWqpqUlBSsWLECu3btwrVr1+TLmOro6MDJyQn9+vXDlClTULNmTZGTEpEYWFASKanc3Fzs3LkTW7duxblz5+TdiABQvXp1uLm5YdCgQRg5ciR0dXVFTEqqKD8/H9u2bcOmTZtw/vx5pKamAgDU1NRQt25ddO3aFT4+PnB2dhY5KREpAxaURJVEbm4utm7dim3btiEsLAyPHz+W76tRowbc3d0xdOhQDB06FDo6OiImpcrq7NmzWLFiBY4dO4Z79+7Jr5DXrFkTrVu3xtixY9G7d29O50NERbCgJKqksrOzsXnzZvz333+4ePEinj59Kt9nZmYGDw8PDBs2DIMHD4aWlpaISUlZ3bt3D35+fti/fz9u3LghX2pUX18fTZs2xeDBgzFhwgQYGhqKnJSIlB0LSiIVkZWVhY0bN2LHjh24fPkyUlJSAMjutK1VqxZatmyJYcOGYeDAgVz3uIrKzs7G+vXrsXXrVly6dAnPnj0DAGhoaMDe3h49evTAtGnTuFY9Eb03FpREKiojIwMbNmzA7t27cfnyZfmSdhKJBObm5mjVqhWGDx+Ovn37ssBUUYVzoa5ZswanTp3Cw4cPAch+BiwsLNCuXTtMnDgRHTt2FDkpEVV2LCiJqoi0tDT4+/tjz549CA8PR3p6OgBZcWFpaYnWrVtj5MiR6NWrF8fIVWIxMTHw9fVFYGAg4uPj5cuBGhsbw83NDcOHD8eoUaOgra0tclIiUiUsKImqqJSUFKxbtw579uxBRESEvPtTTU0NtWvXhpeXFz766CN069aNBaYSS0tLw6pVq7Bz505cvXoVL168AABoaWnB0dERffr0wdSpU2FhYSFyUiJSZSwoiQgAkJycjLVr12Lfvn2IjIxEZmYmAFmBaWVlhbZt22LMmDHo0KFDpS4wCwqA588BQQD09YHK1tsvlUqxc+dObNiwAWfPnpXfjKWmpgZra2t06tQJU6ZMgbu7u8hJiagqYUFJRMVKSkrCmjVrsH//fkRFRcnXY1ZXV4e1tTXatWuHsWPHom3btpBIJCKnfTNBAEJCgO3bgYsXgchIIDdXtk9DA2jcGHBzAwYOBLp2BZSxVr506RL8/Pxw9OhR3LlzR74qTfXq1eHp6YnRo0djwIABUFdXFzkpEVVVLCiJqETu3buH1atX48CBA4iOjkZWVhYAWYFZt25dtG/fHuPHj0erVq1ETiojCMCGDcCiRUBMTMmOqVcP+PJLYOJEcQvLhw8fYvny5di7dy+uX7+O3P+vgHV1deHs7IyBAwdi0qRJXPOdiJQGC0oi+iCJiYlYuXIlAgMDER0dLR+7p6GhAVtbW3Ts2BETJkwQpev13j1g0iQgMFBxu0QioKZ9KkzryMaLpj8wwKM4UwhSxeqxfXtg7VrA1rZi8ubm5mLjxo3YsmULLl68KL9hSl1dHfXq1YO3tzemTZsGBweHiglERPSeWFASUZmIj4/H6tWrcejQIcTExMjXetbU1ISdnR06deqEiRMnwsXFpVxzREYCXboAjx693Fav9T14jb+KRp1vQ8cwT6F9znMNxBy3xum1zogJsZFvr1ZNVpCWVz187NgxrF69GidOnEBSUpJ8VRpzc3O0adMG48ePR9euXSv1eFUiqjpYUBJRuYiOjsbatWsRFBSE2NhY5OTkAJAVmPXr10fnzp0xadIkNG7cuMxeMzYW8PICClelNLbIxJDfj8Kp6+2SHX/CCltmdUbqXSMAgIkJcOIEUBbLVcfHx8PX1xcBAQG4efMm8vPzAQCGhoZo3rw5hg0bhjFjxnBddiKqlFhQElGFuHbtGtasWYPDhw/j5s2b8nGBWlpaaNCgAbp27YqJEyeiUaNGbzzHsWPHEBgYiIULFxZZrzwvD/DwAMLDZc+tmz/ElO17oV8t+71yvsjQwuqRvRF/ug4AwMEBuHIFeL3Oi4yMxIABAzB//nx89NFHRc6TmZmJ1atX47///kNERIT8piZNTU00bNgQvXv3xtSpU2FlZfVe+YiIlBELSiISRXh4ONasWYPg4GDEx8fL15HW1taGg4MDvL29MXHiRNSvX19+TI8ePXDo0CG0bNkS+/fvR40aNeT7Fi4EvvtO9rhWg6f4OGg79IxzFV4zPycfAYsCcHH7RbxIewELRwv0nNcTDh0UxybmZGri794DcS+iFgDg88+B3357uf/48ePo3bs3MjMz0blzZxw5cgRSqRT79u2Dv78/zpw5g8f/f5lUIpGgTp066NixI6ZMmQJPT88y+wyJiJQFC0oiUgphYWFYu3Ytjh07hlu3bsm7hHV0dNCwYUN4e3vjzz//xIsXL6Curg4rKyscPnwY9evXx9OnQJ06QHY2oKYuxSdHtsGqWXKR11g/cT0i9kWg3dR2qGlXExe2XMCdK3cwY98M2LW0U2j78EY1/NZ+OApyNaCuDiQmArVrA9u2bcOoUaMglUohlUqhpqYGGxsbJCYmyqfzMTU1RYsWLTBq1CgMHTqUS1sSkcpjQUlESuns2bNYt24dQkJCkJCQIF9CsJBEIoGBgQEOHjyICxfa4PPPZdvbTArHwMUnipwv8VIifu/yO/p83wcdZ8rWrs7LzsPi1othUMMAs4Nm/1979xMT1RHAcfwHCwsKWSSoWFCUoNbUVpcuEYpJUZNerNGUGlNjmsakRLxwqR5sEzUlIaYXU/+USOLBHtqmTTAxkrQBEqqpLXGtjdLWxD8giJFYUbbyd3fZHp6ywlto7Wibt3w/p7ezM/PmXTa/zM6bsbU5XV2qpoPWWzn790uJidXau3evrZ7b7daKFStUXl6uioqKcTOnADAdECgBOEJlZaXq6uoU6ycrLe2G+vutPX72tJ5Q9pIHtjqn9p1Sy2ctqrleo1RPdP1l48FGNVQ3aN+lfcqcnzmuzYPudH3s3a7RcKJcrlsKh+3rHV0ul3bt2qUDBw4YPiEAOBf7UQBwhPb2dkUiESUmJo47ESYpKWssTC4o7IkZJiXp1qVbmlMwZ1yYlKSFr1pbBXW3ddvazMp9qIJSqzwcnq+8vGLl5+eP3T8hIUHhcFjfTtzwEgCmGRb2AHCEgYEBZWVlafXq1SotLVVJSYl8Pp/8/nStXWvVySvsmbR9oCcgzzyPrdyTbZX13emL2W5BYY+unrVmJo8e/UkbNkjDw8Nqa2uT3++X3+9XTk6O4dMBgLMRKAE4wpkzZyTJdm54Z2f0Ontp76Ttg0NBJbntP3lJqVZZcDBo+25in4/vlZKSIp/PJ5/Ppx07dvyj8QNAPCNQAnCEiUHyseATOTDJHY5ZR5KSU5MVGgnZykNDVlnyjOSY7ZJSon2OjMSsAgDTHmsoATjazJnR66GAe9J6nmyPAncCtvJAj1WWMS8jZrvBvpSY9wIARBEoATjasmXR6+62OZPWy30lV3ev39VQYPzJOTcv3LS+fzk3Zrvuy9E+pzjEBwCmNQIlAEdbvlxyP5qYvNGao0d7i9us3LhSo+FRnTtxbqwsNBxS6xetWuhbaNsySJIiEam99QVJUkKCVFj4zIcPAHGBNZQAHM3tlsrKpMZG6X6XR1e/z9OLaztt9RYVLZJ3k1enq0/r4R8PNTt/ts5/dV69nb3aemhrzL47zs/TnSvWJuWrVknp6c/1UQDAsZihBOB4T75o3XzIp8mOa9hWu01llWXyf+1X/Z56hUNhVXxZoYLSAlvdSERqPlQ09nnnzmc9agCIH5yUA8DxgkFp8eLotj7vfNqkknd/Nerz5/ol+vz99ZKkuXOljg5pxgzDgQJAnGKGEoDjJSdLtbXRz/Ufvq6bF7L/dX+3f8vSNx+sG/t8+DBhEgCmQqAEEBfWr5e2b7euR/rdqi1/S7835z11P9d+yNXRjW9rsM86onHzZmnLlmc5UgCIP/zlDSBuDA5awbKlJVr22nuX9eZHPyp99uCUbfvvp+i7T4p1ts6rSMTaRL24WGpq4mUcAPg7BEoAcWVgwJpRbGiIlrncIXk3XdNLb3RogbdHs3L/VEKC9OB2urp+masrzYt08eRSBYeiG1+sWyedPCl57Md/AwAmIFACiDujo9KxY9Lu3VJ//9O1TU2VamqkqirJ5Xo+4wOAeEOgBBC3urqkI0ek48ele/emrpuRYa3BrKqS8vP/m/EBQLwgUAKIe0ND1rpKv1+6eFHq7bX2mczMlLxeqahIWrNGSkv7nwcKAA5FoAQAAIARtg0CAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABghUAIAAMAIgRIAAABGCJQAAAAwQqAEAACAEQIlAAAAjBAoAQAAYIRACQAAACMESgAAABj5CxmspgCE0LqYAAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "'Done Visualizing Node Prediction Sample'" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "from visualize_sgs_output import SGSVisualizer\n", "\n", @@ -2645,35 +487,9 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "split_strategy_cls_path: \"gigl.src.split_generator.lib.unsupervised_node_anchor_based_link_prediction.transductive.TransductiveUnsupervisedNodeAnchorBasedLinkPredictionSplitStrategy\"\n", - "assigner_cls_path: \"gigl.src.split_generator.lib.unsupervised_node_anchor_based_link_prediction.transductive.TransductiveEdgeToLinkSplitHashingAssigner\"\n", - "assigner_args {\n", - " key: \"seed\"\n", - " value: \"42\"\n", - "}\n", - "assigner_args {\n", - " key: \"test_split\"\n", - " value: \"0.2\"\n", - "}\n", - "assigner_args {\n", - " key: \"train_split\"\n", - " value: \"0.7\"\n", - "}\n", - "assigner_args {\n", - " key: \"val_split\"\n", - " value: \"0.1\"\n", - "}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(frozen_config.dataset_config.split_generator_config)" ] @@ -2694,7 +510,7 @@ "source": [ "%%bash\n", "\n", - "PYTHONPATH=\"/home/$(whoami)/GiGL/TODO/:$PYTHONPATH\" python -m \\\n", + "python -m \\\n", " gigl.src.split_generator.split_generator \\\n", " --job_name toy_graph \\\n", " --task_config_uri gs://TEMP DEV GBML PLACEHOLDER/toy_graph/config_populator/frozen_gbml_config.yaml" @@ -2710,25 +526,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "unsupervised_node_anchor_based_link_prediction_dataset {\n", - " train_main_data_uri: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/train/main_samples/samples/\"\n", - " test_main_data_uri: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/test/main_samples/samples/\"\n", - " val_main_data_uri: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/val/main_samples/samples/\"\n", - " train_random_negative_data_uri: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/train/random_negatives/neighborhoods-\"\n", - " val_random_negative_data_uri: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/val/random_negatives/neighborhoods-\"\n", - " test_random_negative_data_uri: \"gs://TEMP DEV GBML PLACEHOLDER/toy_graph/split_generator/test/random_negatives/neighborhoods-\"\n", - "}\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(frozen_config.shared_config.dataset_metadata)" ] @@ -2754,49 +554,9 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAIfCAYAAADOuEwnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABkB0lEQVR4nO3dd3hUdd7+8XsmlSTUhCQIhFADUgQEqYEEyACroNhx7aw+/hRWWWVVdpWioiIrWLCvXVefVbGvhpJQRDpIUwg11IQkhDRSJnN+f7DMQ0wIkJnkzEzer+viUk75ns9EIXe+7VgMwzAEAAAA1JDV7AIAAADg3QiUAAAAcAmBEgAAAC4hUAIAAMAlBEoAAAC4hEAJAAAAlxAoAQAA4BICJQAAAFxCoAQAAIBLCJQAUIcsFoumT59udhkA4FYESgBu9e6778pisTh/+fv7q2XLlrr99tt16NChWn329u3bNX36dO3bt++8rp8+fbosFouioqJUVFRU6XxsbKyuuOIKN1dZN878b3Dmr2eeecbs0gD4IH+zCwDgm2bOnKm2bduquLhYq1at0rvvvqsVK1Zo69atCg4OrpVnbt++XTNmzFBCQoJiY2PP+77MzEy9+uqrevDBB2ulLrMkJSXp1ltvrXCsV69eJlUDwJcRKAHUitGjR6tPnz6SpD/96U+KiIjQs88+q6+//lrXX3+9ydVV1LNnTz333HO699571aBBA7PLcZtOnTrp5ptvNrsMAPUAQ94A6kR8fLwkaffu3RWOL1myRPHx8QoNDVWTJk105ZVX6tdff610/8aNGzV69Gg1atRIYWFhGj58uFatWuU8/+677+q6666TJCUmJjqHeFNTU89Z2+OPP66MjAy9+uqr57y2sLBQDz74oFq3bq2goCDFxcVpzpw5MgyjwnUlJSWaPHmymjdvroYNG2rs2LE6ePBglW0eOnRId955p6KiohQUFKSuXbvq7bffrnRdenq6fvvtt3PWeKaTJ0+quLj4gu4BgAtFoARQJ07Pa2zatKnz2KJFizRy5EhlZmZq+vTp+stf/qKVK1dq0KBBFeZBbtu2TfHx8frll1/017/+VY899pj27t2rhIQErV69WpI0ZMgQ/fnPf5YkTZ06VR988IE++OADdenS5Zy1xcfHa9iwYZo9e7ZOnjx51usMw9DYsWM1d+5cjRo1Ss8//7zi4uI0ZcoU/eUvf6lw7Z/+9CfNmzdPNptNzzzzjAICAnT55ZdXajMjI0P9+/fXokWLNHHiRL3wwgvq0KGDJkyYoHnz5lW49tZbbz2vz3Pau+++q9DQUDVo0EAXX3yxPv744/O+FwAuiAEAbvTOO+8YkoxFixYZx44dMw4cOGB89tlnRvPmzY2goCDjwIEDzmt79uxpREZGGtnZ2c5jv/zyi2G1Wo1bb73Veeyqq64yAgMDjd27dzuPHT582GjYsKExZMgQ57F///vfhiQjJSXlvGqdNm2aIck4duyYsXTpUkOS8fzzzzvPt2nTxrj88sudv//yyy8NScaTTz5ZoZ1rr73WsFgsxq5duwzDMIxNmzYZkox77723wnU33XSTIcmYNm2a89iECROMFi1aGFlZWRWuvfHGG43GjRsbRUVFzmNDhw41zvev7YEDBxrz5s0zvvrqK+PVV181unXrZkgyXnnllfO6HwAuBD2UAGrFiBEj1Lx5c7Vu3VrXXnutQkND9fXXX6tVq1aSpCNHjmjTpk26/fbb1axZM+d9PXr0UFJSkr7//ntJUnl5uZKTk3XVVVepXbt2zutatGihm266SStWrFBeXp7L9Q4ZMkSJiYnV9lJ+//338vPzc/aEnvbggw/KMAz95z//cV4nqdJ1DzzwQIXfG4ahzz//XGPGjJFhGMrKynL+GjlypE6cOKENGzY4r09NTa00tH42P/30k+6//36NHTtW99xzj9avX69u3bpp6tSp1fbCAkBNECgB1Ir58+dr4cKF+uyzz/SHP/xBWVlZCgoKcp7fv3+/JCkuLq7SvV26dFFWVpYKCwt17NgxFRUVnfU6h8OhAwcOuKXm6dOn6+jRo3rttdeqPL9//35ddNFFatiwYaU6Tp8//U+r1ar27dtXuO73n+HYsWPKzc3VG2+8oebNm1f4dccdd0g6tQLdHQIDAzVx4kTl5uZq/fr1bmkTAE5jlTeAWnHZZZc5V3lfddVVGjx4sG666Sbt2LFDYWFhJldXtSFDhighIUGzZ8/WPffcU+vPczgckqSbb75Zt912W5XX9OjRw23Pa926tSQpJyfHbW0CgESgBFAH/Pz89PTTTysxMVEvv/yyHnnkEbVp00aStGPHjkrX//bbb4qIiFBoaKiCg4MVEhJy1uusVqszKFksFpdrnT59uhISEvT6669XOtemTRstWrRI+fn5FXopT6+8Pv2Z2rRpI4fDod27d1folfz9Zzi9Ary8vFwjRoxwufZz2bNnj/O5AOBODHkDqBMJCQm67LLLNG/ePBUXF6tFixbq2bOn3nvvPeXm5jqv27p1q5KTk/WHP/xB0qkwarPZ9NVXX1VY+Z2RkaGPP/5YgwcPVqNGjSRJoaGhklShvQs1dOhQJSQk6Nlnn6203c4f/vAHlZeX6+WXX65wfO7cubJYLBo9erQkOf/54osvVrju96u2/fz8dM011+jzzz/X1q1bK9Vy7NixCr8/322Dfn+fJOXn52vevHmKiIjQpZdees42AOBC0EMJoM5MmTJF1113nd59913dc889eu655zR69GgNGDBAEyZM0MmTJ/XSSy+pcePGFd53/eSTT2rhwoUaPHiw7r33Xvn7++v1119XSUmJZs+e7byuZ8+e8vPz07PPPqsTJ04oKChIw4YNU2Rk5AXVOW3aNCUmJlY6PmbMGCUmJupvf/ub9u3bp0suuUTJycn66quv9MADDzjnTPbs2VPjx4/XK6+8ohMnTmjgwIFavHixdu3aVanNZ555RikpKerXr5/uuusuXXzxxcrJydGGDRu0aNGiCsPTt956q5YuXXrOhTnz58/Xl19+qTFjxigmJkZHjhzR22+/rfT0dH3wwQcKDAy8oK8HAJyTqWvMAfic09sGrV27ttK58vJyo3379kb79u0Nu91uGIZhLFq0yBg0aJDRoEEDo1GjRsaYMWOM7du3V7p3w4YNxsiRI42wsDAjJCTESExMNFauXFnpujfffNNo166d4efnd84thM7cNuj3Tm/Rc+a2QYZhGPn5+cbkyZONiy66yAgICDA6duxoPPfcc4bD4ahw3cmTJ40///nPRnh4uBEaGmqMGTPGOHDgQKVtgwzDMDIyMoz77rvPaN26tREQEGBER0cbw4cPN954440qazqX5ORkIykpyYiOjjYCAgKMJk2aGDabzVi8ePE57wWAmrAYxnnuQQEAAABUgTmUAAAAcAmBEgAAAC4hUAIAAMAlBEoAAAC4hEAJAAAAlxAoAQAA4BICJQCP9+6778pisVR4U447HDhwQMHBwfrpp5/c2q5ZUlNTZbFYlJqaesH3vvbaa4qJiVFJSYn7CwPg8wiUAGosISFBFovlnL/OfOuNJ5k5c6b69eunQYMGVTj+zTffaOjQoYqMjFRISIjatWun66+/Xj/88INJlda+22+/XaWlpVW+wxwAzoWNzQHU2MKFC5WRkeH8/dq1a/Xiiy9q6tSp6tKli/N4jx491KNHjxo/p7y8XGVlZQoKCpLFYnGp5tOOHTumli1b6r333tP48eOdx+fMmaMpU6Zo6NChuvLKKxUSEqJdu3Zp0aJFuuSSS/Tuu++65fm1ITU1VYmJiUpJSVFCQsIF3//www/r008/1d69e932dQZQP/AubwA1lpSUVOH3wcHBevHFF5WUlFRtoCksLFRoaOh5P8fPz09+fn41LbNKH374ofz9/TVmzBjnMbvdrieeeEJJSUlKTk6udE9mZqZba/A0119/vWbPnq2UlBQNGzbM7HIAeBGGvAHUqunTp8tisWj79u266aab1LRpUw0ePFiStHnzZt1+++1q166dgoODFR0drTvvvFPZ2dkV2qhqDmVsbKyuuOIKrVixQpdddpmCg4PVrl07vf/+++dV15dffql+/fopLCzMeSwrK0t5eXmVhsBPi4yMdP57aWmpHn/8cV166aVq3LixQkNDFR8fr5SUlAr37Nu3TxaLRXPmzNH8+fPVrl07hYSEyGaz6cCBAzIMQ0888YRatWqlBg0a6Morr1ROTk6FNk5/1uTkZPXs2VPBwcG6+OKL9cUXX5zXZ129erVGjRqlxo0bKyQkREOHDq1y3uill16qZs2a6auvvjqvdgHgNAIlgDpx3XXXqaioSLNmzdJdd90l6dSQ+Z49e3THHXfopZde0o033qhPPvlEf/jDH3Q+s3F27dqla6+9VklJSfrHP/6hpk2b6vbbb9e2bduqva+srExr165V7969KxyPjIxUgwYN9M0331QKdb+Xl5ent956SwkJCXr22Wc1ffp0HTt2TCNHjtSmTZsqXf/RRx/plVde0aRJk/Tggw9q6dKluv766/X3v/9dP/zwgx5++GHdfffd+uabb/TQQw9Vuj8tLU033HCDRo8eraefflr+/v667rrrtHDhwmrrXLJkiYYMGaK8vDxNmzZNs2bNUm5uroYNG6Y1a9ZUur53794+s0gJQB0yAMBN/v3vfxuSjJSUFOexadOmGZKM8ePHV7q+qKio0rF//etfhiRj2bJlzmPvvPOOIcnYu3ev81ibNm0qXZeZmWkEBQUZDz74YLV17tq1y5BkvPTSS5XOPf7444YkIzQ01Bg9erTx1FNPGevXr690nd1uN0pKSiocO378uBEVFWXceeedzmN79+41JBnNmzc3cnNznccfffRRQ5JxySWXGGVlZc7j48ePNwIDA43i4uJKn/Xzzz93Hjtx4oTRokULo1evXs5jKSkpFb7+DofD6NixozFy5EjD4XA4rysqKjLatm1rJCUlVfpcd999t9GgQYMqv24AcDb0UAKoE/fcc0+lYw0aNHD+e3FxsbKystS/f39J0oYNG87Z5sUXX6z4+Hjn75s3b664uDjt2bOn2vtOD6k3bdq00rkZM2bo448/Vq9evfTjjz/qb3/7my699FL17t1bv/76q/M6Pz8/BQYGSpIcDodycnJkt9vVp0+fKmu/7rrr1LhxY+fv+/XrJ0m6+eab5e/vX+F4aWmpDh06VOH+iy66SOPGjXP+vlGjRrr11lu1ceNGHT16tMrPuWnTJqWlpemmm25Sdna2srKylJWVpcLCQg0fPlzLli2Tw+GocE/Tpk118uRJFRUVVf3FA4AqECgB1Im2bdtWOpaTk6P7779fUVFRatCggZo3b+687sSJE+dsMyYmptKxpk2b6vjx4+dVk3GWYfXx48dr+fLlOn78uJKTk3XTTTdp48aNGjNmjIqLi53Xvffee+rRo4eCg4MVHh6u5s2b67vvvquy9t/Xejpctm7dusrjv/8MHTp0qLTyulOnTpJ01v0509LSJEm33XabmjdvXuHXW2+9pZKSkkq1nv6asMobwIVglTeAOnFmb+Rp119/vVauXKkpU6aoZ8+eCgsLk8Ph0KhRoyr1nFXlbCu/zxYUTwsPD5dUObT9XqNGjZSUlKSkpCQFBATovffe0+rVqzV06FB9+OGHuv3223XVVVdpypQpioyMlJ+fn55++mnt3r37vGut6Wc4H6e/hs8995x69uxZ5TVnLkqSTn1NQkJCqvzvBQBnQ6AEYIrjx49r8eLFmjFjhh5//HHn8dO9arUpJiZGDRo00N69e8/7nj59+ui9997TkSNHJEmfffaZ2rVrpy+++KJCb960adPcXq90agGSYRgVnrVz505Jp1aBV6V9+/aSTgXjESNGnNdz9u7dW2EPUQA4Hwx5AzDF6Z653/fEzZs3r9afHRAQoD59+mjdunUVjhcVFennn3+u8p7//Oc/kqS4uDhJVde/evXqs97vqsOHD2vBggXO3+fl5en9999Xz549FR0dXeU9l156qdq3b685c+aooKCg0vljx45VOrZhwwYNHDjQfYUDqBfooQRgikaNGmnIkCGaPXu2ysrK1LJlSyUnJ19Qr6ErrrzySv3tb39TXl6eGjVqJOlUoBw4cKD69++vUaNGqXXr1srNzdWXX36p5cuX66qrrlKvXr0kSVdccYW++OILjRs3Tpdffrn27t2r1157TRdffHGV4c1VnTp10oQJE7R27VpFRUXp7bffVkZGht55552z3mO1WvXWW29p9OjR6tq1q+644w61bNlShw4dUkpKiho1aqRvvvnGef369euVk5OjK6+80u31A/Bt9FACMM3HH3+skSNHav78+Xr00UcVEBDg7AmsbbfccovKy8v19ddfO481adJEb775pqKjo/XOO+/o3nvv1WOPPaaCggI999xz+vTTT53X3n777Zo1a5Z++eUX/fnPf9aPP/6oDz/8UH369KmVejt27KhPP/1U33//vR555BGVlZXp008/1ciRI6u9LyEhQT///LP69Omjl19+WZMmTdK7776r6OhoTZ48ucK1//73vxUTE8NbcgBcMN7lDaDemjBhgnbu3Knly5ebXUq1YmNj1a1bN3377be19oySkhLFxsbqkUce0f33319rzwHgm+ihBFBvTZs2TWvXruXNMJLeeecdBQQEVLlfKACcCz2UAODh6qKHEgBcQQ8lAAAAXEIPJQAAAFxCDyUAAABcQqAEAACASwiUAAAAcAmBEgAAAC4hUAIAAMAlBEoAAAC4hEAJAAAAlxAoAQAA4BICJQAAAFxCoAQAAIBLCJQAAABwCYESAAAALiFQAgAAwCUESgAAALiEQAkAAACXECgBAADgEgIlAAAAXEKgBAAAgEsIlAAAAHAJgRIAAAAuIVACAADAJQRKAAAAuIRACQAAAJcQKAEAAOASAiUAAABcQqAEAACASwiUAAAAcAmBEgAAAC4hUAIAAMAlBEoAAAC4hEAJAAAAlxAoAQAA4BJ/swswW2GJXfuyC1VqdyjQ36rY8FCFBtX7LwsAAMB5q5fJKS0jXx+tTlfKjkyl5xTJOOOcRVJMsxAlxkXqj/1i1DGqoVllAgAAeAWLYRjGuS/zDQdyijR1wRYt35UlP6tF5Y6zf/TT5+M7RGjWuO5q3SykDisFAADwHvUmUH6yNl3Tvt4mu8OoNkj+np/VIn+rRTPGdtWNfWNqsUIAAADvVC8C5cspaZqTvNPldh6yddLExI5uqAgAAMB3+Pwcyk/WplcZJh2lJ5W3+guVHN6h0iM75SguUPgfHlBYjxFnbWtO8k41DwvSDfRUAgAAOPn0tkEHcoo07ettVZ5zFOXpxE//Uln2AQVEtj3vNh//epsO5BS5q0QAAACv59OBcuqCLbKfZb6kX1gztZr4gVrd+46aJt553m3aHYamLtjirhIBAAC8ns8GyrSMfC3flXXWBTgW/wD5hTW94HbLHYaW78rSrsx8V0sEAADwCT4bKD9anS4/q6VW2vazWvThqvRaaRsAAMDb+GygTNmReUHbA12IcoehlJ2ZtdI2AACAt/HJQFlQYld6LS+cSc8uUmGJvVafAQAA4A18MlDuzy5UbW+uaUjal11Yy08BAADwfD4ZKEvtDp96DgAAgCfzyUAZ6F83H6uungMAAODJfDIRxYaHqnbWd/8fy3+fAwAAUN/55KsXQ4P8FdMsRPvPsTAnb/03chQXqrwgR5J0ctca2fOzJEmNLh0ja/DZA2NMeIhCg3zyywcAAHBBfDYRJcZF6oPV+6vdOihv9QKV5/3f9j9FO1dKO1dKksK6Jp41UPpZLUrsFOneggEAALyUxTCM2l4QbYq0jHwlzVtWa+0vmjxEHSIb1lr7AAAA3sIn51BKUseohorvEOH2t+X4WS2K7xBBmAQAAPgvnw2UkjRrXHf5uzlQ+lstmjWuu1vbBAAA8GY+HShbNwvRjLFd3drmzLFd1bpZiFvbBAAA8GY+HSgl6ca+MXrI1sm1Rv47zXSKLU439I1xQ1UAAAC+w+cDpSRNTOyoZ67uriB/6wXPqbRaJIe9VCcWvqKIzPVyOHg7DgAAwJl8dpV3VQ7kFGnqgi1avitLflZLtVsKnT4f3yFCX0y9XkWZ6ZKkTp066YknntC1114rq7Ve5HEAAIBq1atAeVpaRr4+Wp2ulJ2ZSs8u0plfAItObVqe2ClSN/ePUYfIhho3bpy+/PLLU+ctFhmGobi4OL3wwgsaOXKkGR8BAADAY9TLQHmmwhK79mUXqtTuUKC/VbHhoZXegDNjxgw98cQTKi8vr3B8wIABWrlyZV2WCwAA4HF89k055ys0yF9dL2pc7TWXXHJJpTCZlJSkjz76qDZLAwAA8ApMAjwPPXr0kHRquPu0IUOGqHnz5maVBAAA4DHq/ZD3+XA4HGratKlCQkL0ySef6Nprr1V2drbWrl2rSy+91OzyAAAATEWgPE87duxQZGSkmjZtql9//VXdunVTw4YNdfToUQUHB5tdHgAAgGkY8j5PcXFxatq0qSSpS5cumjdvnk6cOKGkpCSTKwMAADAXgbKGJk2apBEjRmjFihV65plnzC4HAADANAx5u8Butys6Olo5OTlat26devfubXZJAAAAdY5A6aJt27apR48eatiwoTIyMhQUFGR2SQAAAHWKIW8Xde3aVc8//zzzKQEAQL1FoHSD+++/X8OHD9fy5cs1e/Zss8sBAACoUwx5u0lpaalatGih3NxcbdiwQZdcconZJQEAANQJAqUbbd26VZdccokaNWqkjIwMBQYGml0SAABArWPI2426deumOXPmKDc3VzabzexyAAAA6gSB0s0mT56sYcOGaenSpZozZ47Z5QAAANQ6hrxrwZnzKTdu3KgePXqYXRIAAECtIVDWktPzKRs3bqyjR48ynxIAAPgshrxrSbdu3TR79mwdP35cI0eONLscAACAWkOgrEUPPvigEhISlJqaqn/84x9mlwMAAFArGPKuZaWlpYqOjtaJEyf0yy+/qFu3bmaXBAAA4Fb0UNaywMBApaamSpKGDh2q0tJScwsCAABwMwJlHejRo4eeeeYZ5eTkaPTo0WaXAwAA4FYMedehhIQELV26VHPnztUDDzxgdjkAAABuQaCsQ6WlpYqKilJeXh7zKQEAgM9gyLsOBQYGKiUlRYZhaOjQobLb7WaXBAAA4DICZR3r2bMn8ykBAIBPYcjbJEOGDNHy5cs1b9483X///WaXAwAAUGMESpOcOZ9y8+bN6tq1q9klAQAA1AhD3iYJDAzUkiVLmE8JAAC8HoHSRL169dJTTz2l7Oxs/eEPfzC7HAAAgBphyNsDxMfHa8WKFXrxxRc1adIks8sBAAC4IARKD1BcXKzo6Gjl5+dr69at6tKli9klAQAAnDeGvD1AcHCwcz5lfHw88ykBAIBXIVB6iN69ezvnU15++eVmlwMAAHDeGPL2MIMGDdLKlSv10ksvaeLEiWaXAwAAcE4ESg9TXFysqKgoFRQUaPv27YqLizO7JAAAgGox5O1hgoODtXjxYuZTAgAAr0Gg9EB9+vTRzJkzdezYMY0ZM8bscgAAAKrFkLcHGzhwoH7++WfNnz9f9957r9nlAAAAVIlA6cFOz6csLCzUtm3bmE8JAAA8EkPeHiw4OFiLFi2Sw+FgPiUAAPBYBEoP17dvX02fPl3Hjh3TlVdeaXY5AAAAlTDk7SX69++v1atX67XXXtP//M//mF0OAACAE4HSSxQVFSk6OlpFRUX69ddf1bFjR7NLAgAAkMSQt9cICQnRwoUL5XA4NHjwYOZTAgAAj0Gg9CL9+vXT448/rszMTF111VVmlwMAACCJIW+v1K9fP61Zs0avv/667r77brPLAQAA9RyB0gudOZ9yx44dat++vdklAQCAeowhby8UEhKiH3/8UQ6HQ4MGDZLD4TC7JAAAUI8RKL3UgAED9Pe//10ZGRnMpwQAAKZiyNvL9e3bV+vWrdObb76pP/3pT2aXAwAA6iECpZcrKipSVFSUTp48yXxKAABgCoa8vVxISIiSk5Od+1MynxIAANQ1AqUPGDBggP72t7/p6NGjuvrqq80uBwAA1DMMefuQPn36aP369Xrrrbc0YcIEs8sBAAD1BIHShxQWFio6OlonT55UWlqa2rZta3ZJAACgHmDI24eEhobqhx9+kMPh0MCBA5lPCQAA6gSB0scMGjRIjz76qI4ePaprrrnG7HIAAEA9wJC3j7r00ku1YcMGvf3227rjjjvMLgcAAPgwAqWPKigoUHR0tEpKSrRr1y61adPG7JIAAICPYsjbR4WFhek///mPysvLNWDAAOZTAgCAWkOg9GHx8fF6+OGHdeTIEV133XVmlwMAAHwUQ971QK9evbRp0ya9++67uu2228wuBwAA+BgCZT1Q1XxKu92uvLw8NWvWzOzyAACAl2PIux4ICwvT999/L7vdroEDB2rv3r0aOHCgOnToILvdbnZ5AADAy9FDWY88/PDDmj17tvz9/VVeXi7DMLR9+3Z16dLF7NIAAIAXo4eynigtLVVZWZkkyW636/TPEZs3bzazLAAA4AMIlPXEzTffrLlz51Y45ufnR6AEAAAuI1DWE1dffbXCw8NlsVicx8rLy7Vhw4az3lNYYte2wye0Mf24th0+ocIS5lsCAIDKmENZjxQVFen111/XU089pezsbElScHCwTp486bwmLSNfH61OV8qOTKXnFOnM/zkskmKahSgxLlJ/7BejjlEN6/YDAAAAj0SgrIdOB8spU6aovLxcq1evVosO3TR1wRYt35UlP6tF5Y6z/29x+nx8hwjNGtddrZuF1GH1AADA0xAo67GsrCz1799fAXFD5eh9rewOo9og+Xt+Vov8rRbNGNtVN/aNqcVKAQCAJyNQ1nNPfbleb64+6nI7D9k6aWJiRzdUBAAAvA2LcuqxT9amuyVMStKc5J36dG26W9oCAADehR7KeupATpFGzF2qErujwvGSIztVuGWxitO3yH4iQ9YGjRR0UZyaDLlFAc1aVttmkL9ViyYPZU4lAAD1DD2U9dTUBVtkr2K+ZN6qz1S0Y6WC21yipiPuVtglI1V8YKuOvHO/So/tq7ZNu8PQ1AVbaqliAADgqeihrIfSMvKVNG9ZleeKD/6qoBYdZPELcB4ryzmkw/+cqNDOgxQx5qFztr9o8hB1iGRLIQAA6gt6KOuhj1any89qqfJccKsuFcKkJAU0a6nAiBiVZR04Z9t+Vos+XMVcSgAA6hMCZT2UsiPzgrYHMgxD5UW5soY0Oue15Q5DKTszXSkPAAB4GQJlPVNQYld6TtEF3VO4LVXl+dkK7Rx/XtenZxfxmkYAAOoRAmU9sz+7UBcyabYs+4ByFr6qoJadFdp9+HndY0jal11Yo/oAAID3IVDWM6W/2yaoOuUFx5X57xmyBoUq4qpHZbH61cpzAACAd/M3uwDUrUD/8/sZwlFcqIz/nSZHcaGibn5W/g3Da+U5AADA+/Fdv56JDQ9V1eu7/49hL1XmZzNlP35Ikdc9rsCIC3tPt+W/zwEAAPUDgbKeCQ3yV0w1b7IxHOU69uWzKjn8m5pf9YiCWna54GfEhIcoNIjObwAA6gu+69dDiXGR+mD1/iq3Djq+5J86uWu1GnS4TOUnC1SwNaXC+bBuidW27We1KLFTpFvrBQAAno1AWQ/9sV+M3v15X5XnSjP2SJJO7lqjk7vWVDp/rkBZ7jB0c/8LGyIHAADejUBZD3WMaqj4DhFauSe7Ui9l9B+fqXG7flaLBrYL57WLAADUM8yhrKdmjesu/7O8frGm/K0WzRrX3a1tAgAAz0egrKdaNwvRjLFd3drmzLFd1bqaBT8AAMA3ESjrsRv7xughWye3tDXFFqcb+jJ3EgCA+shiGMaFvIkPPuiTtema9vU22R1GlSu/z8YotyswwF9PXtWdMAkAQD1GDyV0Y98YLZo8VAPbnXobjt855lY6z2fu1L75E/Trd2+rtLS0tssEAAAeih5KVJCWka+PVqcrZWem0rOLdOb/HBad2rQ8sVOkbu4fow/mz9HMmTMlSbGxsZozZ46uvvpqWSzuXewDwH0KS+zal12oUrtDgf5WxYaH8iICAC4jUOKszvWNZ+XKlRo0aFCFe/r166e5c+dqwIABdV0ugLNw/qC4I1PpOVX8oNgsRIlxkfpjvxh1jGLbLwAXjkCJGsvMzFRUVFSFYxaLRYZhaP369erdu7dJlQGQpAM5RZq6YIuW78qSn9VS7Rzp0+fjO0Ro1rju7NgA4IIQKFFjhmEoLCxMRUVFFY7feeedeuWVVxQUFGRSZQBqutjOz2qRv9WiGWO76kYW2wE4TwRKuKR79+7aunWrs2cyISFBKSkp574RQK15OSVNc5J3utzOQ7ZOmpjY0Q0VAfB1rPKGSzp37ixJGjt2rNq3b6/U1FSlpqaaWxRQj32yNt0tYVKS5iTv1Kdr093SFgDfRg8lXLJ9+3bt2rVLY8aM0ZEjR9SmTRuFhITo2LFjCgwMNLs8oF45kFOkEXOXqsTuqHQu69u5Kty6+Kz3trzvXfk3jKh0PMjfqkWThzKnEkC1CJRwq5dfflmTJk3S6NGj9f3335tdDlCv3PLP1Vq5J7vKOZMlh35V2fGjvztqKOfH+fJvHKWL/vRKlW36WS0a2C5cH0zoVwsVA/AVDHnDrSZOnKi+ffvqP//5jz777DOzywHqjbSMfC3flXXWBThBLbsorFtihV/+jaNklJUo9OKEs7Zb7jC0fFeWdmXm11LlAHwBgRJul5ycrKCgIN1yyy3Ky8szuxygXvhodfo533L1e4Xbl0qyKPTiodVe52e16MNVzKUEcHYESrhdkyZN9N5776m4uFijRo0yuxygXkjZkXlB2wMZ5XYV/bZCQa26yL9JVLXXljsMpezMdLVEAD6MQIlaccMNN8hms+nnn3/Wa6+9ZnY5gE8rKLErPafo3Bee4eTeDXKczKt2uPtM6dlFKiyx16A6APUBgRK15quvvlJYWJgmTZqko0d/vxgAgLvszy7Uha6uLNy+VLL6K6TL4PO63pC0L7vwgmsDUD8QKFFrgoOD9eWXX8put2v48OFmlwP4rNIqtgmqjqP0pE6mrVKDtr3k16BRrT0HQP1BoEStGj58uMaPH6/t27dr5syZZpcD+KRA/wv7q7xo56pTq7u7JtTqcwDUH/ztgFr34YcfKjw8XDNmzFBaWprZ5QA+JzY8VBeyvrtwe6osgQ3UoOP57y1p+e9zAKAqBErUOqvVquTkZBmGwdA3UAtCg/wVc55vsikvOqHifZsU0rG/rAHB5/2MmPAQhQb517READ6OQIk60bt3b02aNEkHDhzQpEmTzC4H8DmJcZHntQ9l4a/LJEf5BQ13+1ktSuwU6UJ1AHwdgRJ15oUXXlBMTIzmz5+vdevWmV0O4FP+2C/mvPahLNyWKmtIEwXH9jzvtssdhm7uH+NCdQB8He/yRp3avXu3OnXqpKZNmyozM1NWKz/TAK5yOBx65513NGNptvxbXizHBc2orB7v8gZwPvhujjrVvn17zZw5U9nZ2brpppvMLgfwemvXrlX//v31pz/9Sf2sexTg5pXY/laLZo3r7tY2AfgeAiXq3N/+9jd17dpVn376qRYuXGh2OYBXysrK0t13361+/fqppKREy5Yt07/ffU0zxnZz63Nmju2q1ue54AdA/cWQN0yRmZmpVq1aKSgoSMeOHVNw8PmvNgXqs/Lycr3xxhv629/+JofDoSeffFL33HOP/P3/bwX2yylpmpO80+VnTbHF6b7EDi63A8D30UMJU0RGRurll19WQUGBxo4da3Y5gFdYuXKl+vTpo3vvvVfjxo3Tzp07NXHixAphUpImJnbUM1d3V5C/9bxWfp/Jz2pRkL9Vz17dnTAJ4LwRKGGau+++WwMHDtTChQv1r3/9y+xyAI919OhR3XbbbRo0aJD8/Py0atUq/fOf/1Rk5Nm38rmxb4wWTR6qge3CJemcwfL0+YHtwrVo8lDd0JdV3QDOH0PeMFVBQYGaN28uwzB09OhRNWnSxOySAI9RVlam+fPna9q0afL399fTTz+tCRMmyM/P74LaScvI10er05WyM1Pp2UU68y99i05tWp7YKVI3949Rh8iGbv0MAOoHAiVMt2DBAl199dXq06eP1q5da3Y5gEdITU3VpEmTtG3bNt1zzz164oknFB4e7nK7hSV27csuVKndoUB/q2LDQ3kDDgCXMeQN040bN06XX3651q1bpxdeeMHscgBTHTp0SOPHj1diYqLCwsK0bt06vfLKK24Jk9Kp1zR2vaixesU0VdeLGhMmAbgFPZTwCKWlpWrevLmKioq0d+9etWrVyuySgDpVWlqqefPmaebMmQoNDdWzzz6rW2+9lc3/AXgF/qaCRwgMDNQ333wju92u4cOHm10OUKcWLlyoHj16aOrUqfrTn/6kHTt26PbbbydMAvAa/G0FjzFkyBDddttt2rlzp/7+97+bXQ5Q6/bv369rrrlGNptNUVFR2rBhg+bNm8fiNABehyFveBSHw6EWLVro2LFj2rZtm7p06WJ2SYDbFRcXa86cOZo1a5aaNGmiOXPmaPz48bJY3PcObgCoSwRKeJzNmzerZ8+eio6O1sGDBxn2g0/57rvvdP/992v//v2aPHmyHnvsMTVsyFY9ALwb36nhcXr06KEHH3xQR44c0b333mt2OYBb7N69W2PGjNEVV1yhtm3bavPmzZo9ezZhEoBPoIcSHqt9+/bas2ePVq5cqQEDBphdDlAjRUVFeuaZZzR79mxFRkZq7ty5uvrqqxneBuBTCJTwWPv371f79u3VqFEjZWZmVnpfMeDJDMPQggULNHnyZB09elRTpkzRo48+qtDQULNLAwC3Y8gbHqtNmzZ6+umndfz4cd1www1mlwOctx07dmjUqFG65ppr1K1bN23btk1PPvkkYRKAz6KHEh6vZ8+e+uWXX/TNN9/oiiuuMLsc4KwKCgr05JNP6vnnn1erVq30wgsv6IorrmB4G4DPI1DC42VlZally5by9/fXsWPHFBISYnZJQAWGYejTTz/VQw89pOzsbD366KOaMmWKGjRoYHZpAFAnGPKGx4uIiNAbb7yhoqIiXX755WaXA1SwdetWDRs2TOPHj1ffvn3166+/6vHHHydMAqhXCJTwCrfddpuGDh2q1NRUvffee2aXA+jEiRP6y1/+op49e+rQoUP6z3/+owULFig2Ntbs0gCgzjHkDa9RVFSk5s2by26368iRI2rWrJnZJaEeMgxDH3zwgf76178qPz9fjz32mCZPnqygoCCzSwMA09BDCa8REhKiTz/9VKWlpRo+fLjZ5aAe2rRpk+Lj45095r/99pseeeQRwiSAeo9ACa9yxRVXaNy4cdq0aZP+8Y9/mF0O6omcnBzdd999uvTSS3X8+HEtXrxYn376qVq3bm12aQDgERjyhtex2+2KjIxUXl6edu/erTZt2phdEnyUw+HQ22+/rUcffVQlJSWaMWOGJk6cqICAALNLAwCPQg8lvI6/v7++++47lZeXa9iwYWaXAx+1du1a9e/fX3fddZdGjx6tHTt2aPLkyYRJAKgCgRJeacCAAbrrrru0Z88e/fWvfzW7HPiQrKws3XXXXerXr59KSkq0fPlyvf/++2rRooXZpQGAx2LIG17L4XCoVatWOnr0qDZt2qQePXqYXRK8WHl5uV5//XX9/e9/l2EYevLJJ/U///M/vEMeAM4DPZTwWlarVYsXL5Yk2Ww2ORwOkyuCt1q5cqX69Omj++67T1dffbV27Nih++67jzAJAOeJQAmv1qVLF02dOlUZGRmaMGGC2eXAyxw9elS33XabBg0aJD8/P61atUpvvfWWIiMjzS4NALwKQ97wCZ06dVJaWpqWLl2qIUOGmF0OPFxZWZnmz5+vadOmyd/fX08//bQmTJggPz8/s0sDAK9EoIRPOHjwoNq2bauQkBAdO3ZMgYGBZpcED5WamqpJkyZp27Ztuueee/TEE08oPDzc7LIAwKsx5A2f0KpVK82ZM0d5eXm65pprzC4HHujgwYMaP368EhMT1bBhQ61bt06vvPIKYRIA3IAeSviUPn36aP369friiy80btw4s8uBBygtLdXcuXP1xBNPKDQ0VLNnz9Ytt9wiq5WfpwHAXQiU8Cm5ubmKjo6W1WpVZmamwsLCzC4JJkpOTtakSZO0e/duTZw4UTNmzFDjxo3NLgsAfA4/osOnNGnSRO+8845OnjypUaNGmV0OTLJ//35dc801GjlypKKjo7Vx40bNmzePMAkAtYRACZ8zfvx4jRgxQj/99JPeeusts8tBHSouLtYTTzyhLl26aNWqVfr444+Vmpqq7t27m10aAPg0hrzhk4qLi9W8eXOVlJTo4MGD7CtYD3z77be6//77lZ6ersmTJ+uxxx5Tw4YNzS4LAOoFeijhk4KDg/X555+rrKxMw4cPN7sc1KLdu3drzJgxGjNmjNq1a6ctW7Zo9uzZhEkAqEMESvgsm82mG264QVu3btVTTz1ldjlws6KiIj3++OPq2rWrfvnlF3322WdKTk5W586dzS4NAOodhrzh0xwOh5o3b67c3Fzt3LlT7du3N7skuMgwDC1YsECTJ0/W0aNHNWXKFD366KMKDQ01uzQAqLfooYRPs1qt+vHHH2UYBkPfPmDHjh0aNWqUrrnmGnXr1k3btm3Tk08+SZgEAJMRKOHz+vTpo/vuu0/79+/X/fffb3Y5qIH8/Hw9/PDD6t69u9LS0vTVV1/p22+/VYcOHcwuDQAghrxRTzgcDsXGxurgwYNat26devfubXZJOA+GYejTTz/Vgw8+qJycHE2dOlVTpkxRcHCw2aUBAM5AoES9kZaWps6dO6tZs2bKyMjg1XsebuvWrZo0aZJSU1M1btw4Pf/884qNjTW7LABAFfiOinqjY8eOmjZtmrKysnTLLbeYXQ7O4sSJE5o8ebJ69uypQ4cO6YcfftAXX3xBmAQAD0YPJeqdiy++WL/++qsWLVrEQh0P4nA49OGHH+qvf/2rCgoK9Nhjj+mBBx5QUFCQ2aUBAM6BQIl65+jRo2rdurUaNGigrKwsBQYGml1Svbdp0ybdd999WrlypW644QbNmTNHrVq1MrssAMB5Ysgb9U50dLRefPFF5efna+zYsWaXU6/l5OTovvvu06WXXqrc3FwtXrxYn3zyCWESALwMPZSotwYMGKBVq1bp008/1fXXX292OfWKw+HQ22+/rUcffVQlJSWaMWOGJk6cqICAALNLAwDUAIES9VZeXp6ioqIkSRkZGWrUqJHJFdUPa9eu1X333ae1a9fqlltu0ezZsxUdHW12WQAAFzDkjXqrUaNG+uCDD1RcXKykpCSzy/F5x44d01133aV+/fqptLRUy5cv1/vvv0+YBAAfQKBEvXbttddq9OjRWrNmjV555RWzy/FJ5eXlmj9/vjp16qTPPvtML730ktatW6fBgwebXRoAwE0Y8ka9V1paqoiICJ08eVL79+/XRRddZHZJPuOnn37SxIkTtWnTJk2YMEFPP/20mjdvbnZZAAA3o4cS9V5gYKC+/PJL2e129qV0k6NHj+q2227T4MGD5e/vr9WrV+utt94iTAKAjyJQApKGDRumm2++Wb/99pumT59udjleq6ysTHPnzlWnTp303Xff6Y033tCqVat02WWXmV0aAKAWMeQN/JfD4VB0dLSys7O1fft2xcXFmV2SV0lNTdXEiRP166+/6p577tETTzyhZs2amV0WAKAO0EMJ/JfValVycrIMw9Dw4cPlcDjMLskrHDx4UOPHj1diYqIaNWqkdevWaf78+YRJAKhHCJTAGXr27KnJkyfr0KFDmjhxotnleLTS0lI9++yz6ty5s1JSUvTee+9pxYoV6tWrl9mlAQDqGEPeQBXatm2rffv2ac2aNerbt6/Z5Xic5ORkTZo0Sbt379akSZM0ffp0NW7c2OyyAAAmoYcSqMKSJUtktVo1atQo2e12s8vxGPv379c111yjkSNHqkWLFtq4caPmzp1LmASAeo5ACVShbdu2euqpp5STk6Px48ebXY7piouL9cQTT6hLly5atWqV/vWvfyklJUXdu3c3uzQAgAdgyBuoRo8ePbRlyxZ9//33Gj16tNnlmOLbb7/V/fffr/T0dP3lL3/R3//+dzVs2NDssgAAHoRACVQjKytLF110kQIDA5WVlaXg4GCzS6ozu3fv1v3336/vvvtOI0aM0EsvvaTOnTubXRYAwAMx5A1UIyIiQq+++qoKCwt1+eWXm11OnSgqKtJjjz2miy++WFu2bNHnn3+u5ORkwiQA4KzooQTOQ3x8vFasWKEPPvhAN998s9nl1ArDMLRgwQJNnjxZR48e1V//+lc9+uijCgkJMbs0AICHI1AC56GgoECRkZEqLy/XkSNHfG7T7h07dmjSpElauHChLr/8cs2bN08dOnQwuywAgJdgyBs4D2FhYfrkk09UWlqqpKQks8txm/z8fD388MPq3r27du/erW+++UbffvstYRIAcEEIlMB5Gjt2rMaOHasNGzbo+eefN7sclxiGoU8++USdO3fWiy++qMcee0zbtm3TFVdcYXZpAAAvxJA3cAHsdrsiIiJUUFCgvXv3qnXr1maXdMG2bt2qSZMmKTU1VePGjdPzzz+v2NhYs8sCAHgxeiiBC+Dv769vv/1W5eXlGjZsmNnlXJATJ05o8uTJ6tmzpw4fPqwffvhBX3zxBWESAOAyAiVwgQYPHqw77rhDu3bt0qOPPmp2OefkcDj0/vvvKy4uTm+++aZmzZqlLVu2aOTIkWaXBgDwEQx5AzXgcDjUsmVLZWRkaMuWLYqNjdWzzz6rYcOGKSEhwezynDZu3KiJEydq5cqVuvHGG/Xcc8+pVatWZpcFAPAx/mYXAHgjq9WqhQsXqkePHho6dKhCQ0OVnp6u3bt3e0SgzMnJ0d///ne9/vrr6tKli1JSUjyiLgCAb2LIG6ihTp06qV+/fsrOzlZ6erok6bfffjO1JofDoTfffFOdOnXSRx99pH/84x/auHEjYRIAUKsIlEANZGRkqHfv3lq9enWF47t27TKpImnNmjXq37+/7r77bl1++eXasWOHHnjgAQUEBJhWEwCgfiBQAjVw+PBh7d69W1ZrxT9CeXl5ysnJqdNajh07prvuukv9+/dXWVmZVqxYoffee0/R0dF1WgcAoP4iUAI10KtXL+3evVu33367LBZLhWC5e/fuStcXlti17fAJbUw/rm2HT6iwxH5BzysoKNAbb7yhkpIS57Hy8nLNnz9fnTp10meffaaXX35Z69at06BBg2r+wQAAqAFWeQMu2rp1qx566CH9+OOPkqQpU6Zo9uzZSsvI10er05WyI1PpOUU68w+aRVJMsxAlxkXqj/1i1DGqYbXPuPfee/Xqq69q1qxZevTRR/XTTz9p4sSJ+uWXXzRhwgTNmjVLzZs3r70PCQBANQiUgJssWLBA1113nVrFXaJBD7ysn/bkyM9qUbnj7H/ETp+P7xChWeO6q3WzkErXrFixQvHx8ZKkoKAgXXHFFfr888/Vt29fvfzyy7rssstq7TMBAHA+CJSAGz372XK9sjpLfgGBqiZHVuJntcjfatGMsV11Y98Y5/Hi4mJ1795de/fuVXl5uSQpMDBQ8+fP15133llpDicAAGbguxHgJi+npOnV9Xmy+F9YmJSkcoehErtDj3yxRS+npDmPP/XUU9q9e7czTEpSaWmp2rZtS5gEAHgMeigBN/hkbboe+WLLWc+XHN2lEys+VsnB7TLsZfJvEqWwnqPUqM/YKq9/9uruii7aq8TERFX1R7R79+7avHmz2+oHAMAVvCkHcNGBnCJN+3rbWc+f3LtBmZ/NVGBUezUeeKMsgcGy5x5VeX7WWe95/Ott6pT2qQzDkNVqVePGjdWsWTM1b95ckZGR6tevX218FAAAaoRACbho6oItsp9ljNtRUqSsb59Xg/Z91Xzco7JYzm+Y2u4w1CTp/ynvzZcUFhYmi8XizpIBAHArJmEBLkjLyNfyXVlnXclduD1VjsJcNR1yqywWqxylxTIMxznbLXcYWr4rSxknRZgEAHg8eigBF3y0Or3arYGK922SJShE9oJsZX7xpOw5h2QJCFZot0Q1G36XLP6BZ23bz2rRh6vSNX1s19oqHwAAt6CHEnBByo7MaveZLMs5LDnKdezzJ9SgbW81HzdVYT2SVLDxP8r6bl61bZc7DKXszHRzxQAAuB89lEANFZTYlZ5TVO01RlmxjLIShfUarWZJ/yNJCokbKKO8TAWbflBZ/B8V0KzlWe9Pzy5SYYldoUH8UQUAeC56KIEa2p9dqHPtuXV6SDu0y9AKx0MvTpAklRz6rdr7DUn7sgtrWCEAAHWDQAnUUKn93Itr/MLCT/0ztEnF46GNJUmO4gK3PAcAADMRKIEaCvQ/9x+fwOj2kiR7fnaF4/b8HEmSX0hjtzwHAAAz8Z0KqKHY8FCda0Of0M7xkqSCzckVjhdsTpasfgqK6V7t/Zb/PgcAAE/GTH+ghkKD/BXTLET7q1mYExjdXqE9klS4eaGOORwKjumm4vQtKvpthRoNuE7+DcOrfUZMeAgLcgAAHo/vVIALEuMi9cHq/dVuHRQ+8j75N2qugs2LVLTzZ/k3bq6mw+9So75XVtu2n9WixE6R7i4ZAAC3sxiGca6FqgDOIi0jX0nzltVa+4smD1GHyIa11j4AAO7AHErABR2jGiq+Q4T8rO59PaKf1aL4DhGESQCAVyBQAi6aNa67/N0cKP2tFs0aV/2CHQAAPAWBEnBR62YhmuHm923PHNtVrZuFuLVNAABqC4EScIMb+8boIVsnt7Q1xRanG/rGuKUtAADqAotyADf6ZG26pn29TXaHUe3K79/zs1rkb7Vo5tiuhEkAgNchUAJudiCnSFMXbNHyXVnys1qqDZanz8d3iNCscd0Z5gYAeCUCJVBL0jLy9dHqdKXszFR6dpEq/EEzDLWJCFVip0jd3D+G1dwAAK9GoATqQGGJXfuyC1Vqd+jpp57Qns2rte7nn8wuCwAAt+BNOUAdCA3yV9eLGkuSurdqqtQFaSZXBACA+7DKG6hjsbGxOnbsmAoLC80uBQAAtyBQAnWsbdu2kqT9+/ebXAkAAO5BoATqWGxsrCRp3759ptYBAIC7ECiBOtaiRQsFBAQQKAEAPoNACdQxPz8/xcTEECgBAD6DQAmYIDY2Vnv37jW7DAAA3IJACZggNjaWHkoAgM8gUAImIFACAHwJgRIwQWxsrLKyslRQUGB2KQAAuIxACZjg9NZB7EUJAPAFBErABOxFCQDwJQRKwATsRQkA8CUESsAEfn5+atOmDYESAOATCJSASVjpDQDwFQRKwCQESgCAryBQAiYhUAIAfAWBEjAJe1ECAHwFgRIwCVsHAQB8BYESMAmBEgDgKwiUgEnYixIA4CsIlIBJrFYre1ECAHwCgRIwESu9AQC+gEAJmIhACQDwBQRKwERt27YlUAIAvB6BEjBRbGyssrOzlZ+fb3YpAADUGIESMNHprYP2799vbiEAALiAQAmYiL0oAQC+gEAJmCg6OlqBgYEESgCAVyNQAiY6vRfl3r17zS4FAIAaI1ACJmPrIACAtyNQAiYjUAIAvB2BEjAZgRIA4O0IlIDJYmNjlZOTo7y8PLNLAQCgRgiUgMnYixIA4O0IlIDJ2IsSAODtCJSAyaKjoxUUFESgBAB4LQIlYLLTe1ESKAEA3opACXgAVnoDALwZgRLwAARKAIA3I1ACHiA2NpbXLwIAvBaBEvAAsbGxOn78uE6cOGF2KQAAXDACJeAB2IsSAODNCJSAB2AvSgCANyNQAh4gKiqKvSgBAF6LQAl4APaiBAB4MwIl4CHYOggA4K0IlICHaNu2LYESAOCVCJSAh6CHEgDgrfzNLgDAKWfuRdm4cWOzywEAVKOwxK592YUqtTsU6G9VbHioQoPqb6yqv58c8DBn7kXZo0cPc4sBAFSSlpGvj1anK2VHptJzimSccc4iKaZZiBLjIvXHfjHqGNXQrDJNYTEMwzj3ZQBq29GjR9WiRQt9+eWXuvLKK80uBwDwXwdyijR1wRYt35UlP6tF5Y6zR6fT5+M7RGjWuO5q3SykDis1D3MoAQ8RFRWl4OBg5lECgAf5ZG26RsxdqpV7siWp2jB55vmVe7I1Yu5SfbI2vdZr9AQMeQMewmKxsBclAHiQl1PSNCd5Z43uLXcYKncYeuSLLcoqKNHExI5urs6z0EMJeBBWegOAZ/hkbXqNw+TvzUneqU99vKeSHkrAg8TGxmr16tVmlwEA9dqBnCJN+3rbWc8b9jLlLv9QhdtS5CguUEDzWDUZcosatO111nse/3qbBraP8Nk5lfRQAh6EHkoAMN/UBVtkr2auZNZ3c5W39kuFXpygpiPulsVqVea/p6v4wNlDqN1haOqCLbVRrkcgUAIeJDY2Vrm5ucrNzTW7FACol9Iy8rV8V9ZZF9+UHN6hol+XqcnQ29R02J1q2HOUosbPkn+jSOWmvnPWdssdhpbvytKuzPzaKt1UBErAg7Rt21bSqb0oAQB176PV6fKzWs56vmjHT5LFqoY9RzmPWfwDFXZJkkoO/SZ73rGz3utntejDVb45l5JACXiQ05ubM+wNAOZI2ZFZ7dZApRl7FNCspaxBFedCBrbo5Dx/NuUOQyk7M91TqIchUAIeJDIykr0oAcAkBSV2pecUVXtNeUGO/MKaVjruF9bMeb466dlFKiyx17xID0WgBDyIxWJhYQ4AmGR/dqHO9fpAw14q+QVUOm7xD/y/89XdL2lfdmENK/RcBErAwxAoAcAcpXbHOa+x+AdK5WWVjp8OkqeDpavP8TYESsDDxMbGau/evWaXAQD1TqD/uWORX1gzlRccr3T89FD36aFvV5/jbXzvEwFejh5KADBHbHiozr6++5TAyHYqyzkkR0nFuZalh0+9VScwql2191v++xxfQ6AEPExsbKxOnDjBXpQAUAfsdrt+/vlnzZw5U6NGJKrs+JFqrw/pPEgyHMrf9IPzmGEvU8GWhQq8KE7+jZpXe39MeIhCg3zvRYW+94kAL3fm1kE9e/Y0tRYA8EW7d+/WwoULlZycrCVLlujEiRNq1KiRhg8frsjWYdpUKJWfZXVO0EVxCuk8WLlL35OjKFf+TS9S4ZbFsp/IVNTo+6t9rp/VosROkbXwicxHoAQ8DIESANwrNzdXS5YsUXJyshYuXKg9e/bIz89P/fv31+TJk2Wz2dS3b1/5+/srLSNfSfOWVdtexBV/Ue6yD1W4NUXlxQUKjIxV5LWPKzimW7X3lTsM3dw/xp0fzWMQKAEPw16UAOCasrIyrV692hkg16xZI4fDoY4dO2r06NFKSkpSQkKCGjduXOnejlENFd8hQiv3ZJ91g3OLf6CaDrtTTYfded41+VktGtguXB0iG9b4c3kyAiXgYdiLEgAujGEYSktLcwbIlJQU5efnq2nTphoxYoTuvPNOJSUlOUeAzmXWuO4aMXdptW/MuVD+VotmjevutvY8DYES8EBt27YlUAJANbKzs7V48WJniExPT1dAQIAGDhyohx9+WDabTb1795afn98Ft926WYhmjO2qR77Y4rZ6Z47tqtbNQs59oZciUAIeKDY2VitXrjS7DADwGCUlJfr555+dAXL9+vUyDENdunTRuHHjlJSUpKFDhyosLMwtz7uxb4yyCko0J3mny21NscXphr6+OXfyNAIl4IFiY2P18ccfm10GAJjGMAz9+uuvzgCZmpqqoqIiRUREKCkpSffee6+SkpLUqlWrWqthYmJHRYQFadrX22R3GBc0BO5ntcjfatHMsV19PkxKksUwDPdNEADgFv/7v/+rG264QcePH1eTJk3MLgcA6kRmZqYWLVrk3NLn8OHDCgwMVHx8vJKSkmSz2XTJJZfIaq3bbbQP5BRp6oItWr4rS35WS7XB8vT5+A4RmjWuu08Pc5+JQAl4oDVr1qhfv37auHEjWwcB8FnFxcVasWKFsxdy06ZNkqTu3bs7A2R8fLxCQjwjlKVl5Ouj1elK2Zmp9OwinRmgLDq1aXlip0jd3D/GZ1dznw2BEvBAmZmZioqK0hdffKFx48aZXQ4AuIVhGNqyZYszQC5btkzFxcWKioqSzWZTUlKSRowYoRYtWphd6jkVlti1L7tQpXaHAv2tig0P9ck34Jyv+vvJAQ/WvHlzNWjQgJXeALzekSNHtHDhQuevjIwMBQcHa+jQoXryySdls9nUrVs3WSzneou2ZwkN8lfXiyrvY1lfESgBD8RelAC8VVFRkZYtW+acB7l161ZJUq9evXT77bcrKSlJgwYNUnBwsMmVwp0IlICHIlAC8AYOh0ObNm1yDmOvWLFCpaWlatmypWw2m6ZOnXrqHdmRvvkOa5xCoAQ8VGxsrH766SezywCASg4cOOAcwl60aJGysrIUGhqqhIQEzZ49WzabTZ07d/a6YWzUHIES8FCxsbH66KOPZBgGfykDMFVBQYFSU1Odw9i//fabLBaL+vTpo7vvvls2m00DBgxQYGCg2aXCJARKwEPFxsYqLy9Pubm5atq0qdnlAKhHysvLtX79emeA/Pnnn1VWVqY2bdrIZrNp5syZGjZsmMLDw80uFR6CQAl4qLZt20qS9u3bR6AEUOv27dvnDJCLFy/W8ePH1bBhQw0bNkxz586VzWZThw4dGDFBlQiUgIeKjY2VdOov+V69eplbDACfc+LECaWkpDhD5K5du2S1WtWvXz9NmjRJNptNl112mQICAswuFV6AQAl4qIiICIWEhLDSG4Bb2O12rVmzxhkgV69erfLycrVv3142m02zZ89WYmIir3tFjRAoAQ/FXpQAXGEYhnbv3u0MkEuWLFFeXp6aNGmi4cOHa/78+UpKSlK7du3MLhU+gEAJeDACJYALcfz4cS1evNgZIvft2yd/f38NGDBADz30kGw2m/r06SM/Pz+zS4WPIVACHiw2NlbLly83uwwAHqq0tFSrVq1yBsh169bJ4XAoLi5OY8aMUVJSkhISEtSwYUOzS4WPI1ACHiw2NlYffPABe1ECkHRqGHvHjh3OAJmamqqCggKFh4drxIgRuvvuu5WUlKSYmBizS0U9Q6AEPFhsbKzy8/N1/PhxNWvWzOxyAJggKytLixcvdr7a8MCBAwoICNDgwYM1depU2Ww29erVS1ar1exSUY8RKAEPdubWQQRKoH4oKSnRTz/95Hy14YYNG2QYhrp27aprrrlGNptNQ4YMUWhoqNmlAk4ESsCDnRkoe/fubW4xAGqFYRjatm2bcxh72bJlKioqUmRkpJKSkjRp0iSNGDFCLVu2NLtU4KwIlIAHYy9KwDdlZGRo0aJFzmHsI0eOKCgoSEOGDNH06dNls9nUvXt3hrHhNQiUgAdjL0rAN5w8eVLLly939kJu3rxZknTJJZfo5ptvVlJSkgYPHqwGDRqYXClQMwRKwMO1bduWQAl4GYfDoc2bNzsD5PLly1VSUqIWLVrIZrPpr3/9q0aMGKGoqCizSwXcgkAJeLjY2FgtW7bM7DIAnMPhw4edAXLRokXKzMxUgwYNlJCQoKefflo2m00XX3wxW4DBJxEoAQ8XGxur999/n70oAQ9TWFiopUuXOkPk9u3bZbFY1Lt3b915552y2WwaOHCggoKCzC4VqHUESsDDsRcl4BkcDoc2bNjgDJArV65UaWmpWrduLZvNpscff1zDhw9XRESE2aUCdY5ACXg49qIEzJOenu4MkIsXL1Z2drbCwsKUmJioOXPmyGazqVOnToweoN4jUAIe7nSg3Lt3L3tRArUsLy9Pqampzk3Fd+zYIavVqr59++r//b//J5vNpv79+ysgIMDsUgGPQqAEPFx4eLhCQ0NZ6Q3UArvdrnXr1jl7IVetWiW73a62bdvKZrPpqaee0rBhw9S0aVOzSwU8GoES8HDsRQm41549e5wBcsmSJcrNzVWjRo00fPhwvfjii7LZbGrfvr3ZZQJehUAJeAECJVBzubm5SklJcb6VZvfu3fLz81P//v31wAMPyGazqW/fvvL351siUFP86QG8QGxsrJYuXWp2GYBXKCsr0+rVq53zIFevXi2Hw6GOHTtq1KhRSkpKUkJCgho3bmx2qYDPIFACXiA2Nlbvvfcee1ECVTAMQ2lpac5h7JSUFOXn56tp06YaMWKE7rjjDiUlJTkXuAFwPwIl4AViY2NVUFCgnJwchYeHm10OfFhhiV37sgtVanco0N+q2PBQhQZ53reKnJwcLV682DmMvX//fgUEBGjgwIF6+OGHZbPZ1Lt3b/n5+ZldKlAveN7fEgAqadu2raRTe1ESKOFuaRn5+mh1ulJ2ZCo9p0jGGecskmKahSgxLlJ/7BejjlENTamxtLRUK1eudPZCrl+/XoZhqEuXLrrqqquUlJSkoUOHKiwszJT6gPrOYhiGce7LAJgpOztbERER+uyzz3TNNdeYXQ58xIGcIk1dsEXLd2XJz2pRuePs3w5On4/vEKFZ47qrdbOQWq3NMAz9+uuvzgC5dOlSFRYWKiIiQklJSc5frVq1qtU6AJwfeigBL9CsWTOFhYWx0htu88nadE37epvs/w2R1YXJM8+v3JOtEXOXasbYrrqxb4xbazp27JgWLVrkHMY+dOiQAgMDFR8fr8cee0w2m02XXHKJrFarW58LwHUESsALsBcl3OnllDTNSd5Zo3vLHYbKHYYe+WKLsgpKNDGxY5XXbdiwQatWrdK999571raKi4u1YsUKZy/kpk2bJEndu3fXDTfcIJvNpvj4eIWE1G5vKADXMeQNeIkxY8ZIkr755huTK4E3+2Rtuh75Ykul46XH9uvEio9VenSXygtzZQkIUkB4azXqd7VCOvY7a3vPXt1dN/yup/L777/XNddco+LiYh08eFAtW7aUdGoYe8uWLc4AuWzZMhUXFys6Oto5hD1ixAi1aNHCvR8aQK2jhxLwErGxsUpJSTG7DHixAzlFmvb1tirPledlylF6UqHdh8svrJmMshIV7VipY58/oWajJqphz1FV3vf419s0sH2Ec07l22+/rbvuukun+yr+/e9/Kzw8XMnJyVq0aJGOHj2q4OBgDR06VE8++aRsNpu6devGdliAl6OHEvAS//jHPzRt2jTl5+fzzRc1css/V2vlnuxzzpc8zXCU68i7D8iwl6nl3a9VeY2f1aKB7cL1/p2X6cknn9Tjjz9e5XW9evWSzWZTUlKSBg0apODg4Bp/DgCehx5KwEvExsaqsLDQueIbuBBpGflavivrgu6xWP3k3zBCJUfTznpNucPQ8l1ZGnLF9Vrx/WeVzoeGhmrXrl2Kjo6+4JoBeA+WygFe4vRbPliYg5r4aHW6/Kzn7tl2lBarvOiEyo4fUd6aL3Vyz3oFt7mk2nv8rBbtNKL+7/d+fs5e9MLCQh09etS14gF4PHooAS9xZqDs06ePucXA66TsyDyvoe7jS95SwaYfTv3GYlVIpwFqZvt/1d5T7jDUedg12vjWVK1bt07r16/XmjVrtHbtWmVnZ2v//v3q2bOnGz4FAE9FoAS8BHtRoqYKSuxKzyk6r2sb9b1SIZ0Hqzw/W0W/rZBhOKTysnPel55dpMbhkRo7dqzGjh0r6dSq7tzcXDVt2tSl+gF4Poa8AS/BXpSoqf3ZhTrf1ZcB4a3VILanwroPV+R102SUFivzs5k61/pNQ9K+7MIKxywWC2ESqCcIlIAXadu2LYESF6zwZEmN7w3pPEilR9Jkzzl0zmtL7Y4aPweAd2PIG/AisbGxWrJkidllwAPZ7Xbt3btXv/32m3bs2FHhn/n+jRV9+ws1atcoOxVGHSWF57hSCvQ/1UdhGIYOHDig9evXa8eOHbrtttvYrBzwcQRKwIucHvI2DIO9KOup48ePa8eOHc6weDo47tq1S2Vlp+Y6hoaGqnPnzoqLi5PNZlPHLt306HpVO+xdXpgrv9AmFY4Z5XYVbl0ii3+QAiLO9d5uQy/Nelx7dv6qtWvXKjc313kmLi5O48aNq9HnBeAdCJSAF9iyZYtWrVqln376SYWFhbr00kt14MABXXbZZfruu+/MLg9uVl5ern379lXZ25iZmem8LiYmRp07d9aIESM0ceJExcXFqXPnzrrooosq/cDx6t4U7a9mYU72Dy/LKC1SUOtu8msYrvKC4yrcnip79kE1HTZB1sAG1dZclnNEr7/xYqXjFotF8fHxF/gVAOBtCJSAFxg1apQOHz4sf/9Tf2Q3btwoSWrQoPpv8vBsJ06cqLK3MS0tTaWlpZKkkJAQZ1AcNmyYs+exY8eOCg0NPe9nJcZF6oPV+8+6dVBol3gVbF6o/I3fy3EyX9bABgqM7qCmCXdU+y5v6dQ+lIndW+p/Q0J08uTJCgt4QkJC9Oabb8pms6lXr16yWpm6D/giXr0IeIFXXnlF9913X6XjX3zxBUOJHq68vFzp6elV9jaeueF3q1atnGHxzH+2bNnSLSEsLSNfSfOWudzO2SyaPET2nEMaPny4MjMzZbfbZbVa1b59ex05ckQFBQUKDw9XUlKS8xWMrVq1qrV6ANQtAiXgBRwOhxITE7Vy5UrZ7XZJUlhYmLKyshQUFGRydZCk/Pz8Knsbd+7cqZKSUwtbgoODKwXGuLg4derUSWFhYbVe44W+y/t8nH6X9wcTTvViHj58WElJSfrtt9/kcDiUmpqqAQMGaNWqVUpOTlZycrLWrVsnwzDUtWtX2Ww22Ww2DRkyRCEhIW6rC0DdIlACXmLv3r26+OKLVVxcLIvFojvuuEP//Oc/zS6rXnE4HDpw4ECVvY2HDx92XnfRRRdV2dvYunVrU4d8D+QUacTcpSpx4/Y+Qf5WLZo8VK2b/V8YPHHihMaMGaOtW7fq6NGjCgwMrHBPdna2Fi9erOTkZP344486ePCgAgMDFR8f7wyYPXr0YHgc8CIESsCLnDn0vXjxYg0bNszkinxTQUGBdu7cWWVv48mTJyVJQUFB6tSpkzMsntnb2KhRI5M/wdl9sjZdj3yxxW3tPXt1d93Qt/IK8PLycuXm5io8PLza+w3D0G+//ebsvUxNTVVRUZGioqIqDI9HR0e7rWYA7kegBLyIw+FQmzZtlJmZqaKiIvn5+ZldktcyDEMHDx6ssrfx4MGDzuuio6Or7G2MiYnx2q//yylpmpO80+V2ptjidF9iBzdU9H9KSkq0cuVKZ8DcsGGDJKlHjx7O3svBgwezIA3wMARKwMtkZWVp9/6DCmvRVqV2hwL9rYoND1VoEJs2VKWoqKhCb+Ppf+7cuVOFhac26w4MDFTHjh2rnN/YuHFjkz9B7fhkbbqmfb1NdodxQXMq/awW+Vstmjm2a5U9k+6WmZmpRYsWOQPmkSNHFBwcrCFDhmjkyJGy2Wzq2rUr+7ICJiNQAl4iLSNfH61OV8qOTKXnFFXYpNoiKaZZiBLjIvXHfjHqGNXQrDJNYRiGDh8+XGVvY3p6uvO6yMjIKnsbY2Njvba30RUHcoo0dcEWLd+VJT+rpdpgefp8fIcIzRrXvcKcybpiGIa2bdvmDJdLly5VcXGxWrRo4ey9HDFihCIjI+u8NqC+I1ACHs7bvunXppMnTyotLa1ScNyxY4cKCgokSQEBAerQoUOVvY1NmzY1+RN4JucPKzszlZ5dxQ8r4SFK7BSpm/vHqEOk5/ywUlxcrOXLlzsD5ubNmyVJvXr1cvZeDhw4kJ0QgDpAoAQ8mKvDkjPGdtWNdTAs6U6GYejo0aNV9jbu37/fuWl2RERElb2Nbdu2dW4AjwtXWGLXvuxCr5xOceTIkQrD45mZmQoJCVFCQoKzB7Nz584MjwO1gEAJeCh3LZx4yNZJExM7uqEi9youLtauXbuqDI75+fmSJH9/f7Vv377K3sZzrR5G/eZwOLR582ZnuFy+fLlKS0vVqlUr2Ww2jRw5UsOHD+f/I8BNCJSABzrb1i7F+zcr419Tq7wn+pY5CmrZucpzZ9vapbYZhqHMzMwKYfH0v+/bt08Ox6n9EJs2baouXbpUCo7t2rVTQEBAndcN31NUVKRly5Y5A+a2bdtksVjUp08fZ+9l//79K+2ZCeD8ECgBD1Pd5tOnA2XDS8cosEWnCucatOstv5CqVyRXtfn0aUVFRdq3b58uvvjiGtdcUlKi3bt3V9nbeOLECUmSn5+f2rVrV2VvY0REBMOQqFOHDh1yhsuFCxcqOztbYWFhSkxMdM6/7NChA/9fAueJQAl4mOpej3c6UEZc9YhCOw8+7zZ//3q809atW6cbbrhBBw4cUE5OTrWv/zMMQ1lZWVX2Nu7Zs8fZ29i4ceMKG32f/vf27dvT+wOP5HA4tHHjRmfA/Omnn1RWVqbY2Fhn7+WwYcNY1AVUg0AJeJC0jHwlzVt21vNnBsoGbXvLEhAki/X8t7tZNHmIOkQ2VHl5uWbPnq3HHntMhmHI4XBo/fr16t27t8rKys7a23j8+HFJktVqVdu2bavsbYyMjKRXB16toKBAqampzoC5Y8cOWa1WXXbZZc7ey8suu4zFX8AZCJSAB5n+9TZ9sHr/WVd0nw6UlsAGMkpPSharglp3VdPEOxXUovqFN35Wi27p10Z39myom266ST/99FOF87169VJhYaF2796t8vJySVKjRo2qDI0dOnRQcHCwez404OH279+vhQsXKjk5WYsWLdLx48fVqFEjDR8+3NmD2a5dO7PLBExFoAQ8yNDnUrQ/p+is54sP/qr8tQvUoF0fWUMaqywrXXlrFsgoK1b0zc8pMLp9te1HBBvaOPNK5/D0mdq3b68xY8ZUCI7R0dH0NgJnKC8v17p165y9lz///LPKy8vVvn17Z+9lYmKiR7/PHagNBErAQxSU2NV9+o+60D+QZccP68g/JymodVdF3TDz3M95525lZxx2BkXDMGSxWHTttdfqf//3f2tQOVB/5eXlKSUlxRkwd+3aJT8/Pw0YMMDZe9mnT596+SYm1C9WswsAcMr+7MILDpOSFND0IjXo2E/F6ZtlOMrPef3SDduVnp6uF154QYMHD5bFYpFhGPrtt99q8HSgfmvUqJGuvPJKzZ8/X2lpadq9e7defvllNW/eXHPmzFH//v3VvHlzXXfddXrrrbe0f/9+s0sGagU9lICH2Jh+XONeXVmje4+nvK281V+o9eT/lTWo+tctLvh/A9Ur5v9Wqx47dkxff/21mjZtqquvvrpGzwdQmd1u15o1a5y9l6tXr5bD4VBcXJyz9zIhIaHa3RUAb0GgBDzEtsMndPlLK2p077EFs3Ry9zq1fvAzWSzVDzx8N2mwul5U9X6VAGrP8ePHlZKSoh9//FE//vij9u/fr4CAAA0cONA5/7JXr16yWhk8hPchUAIeorDErm7nmENZXnSi0ublpRl7dOS9v6hBu0sVee1j1T7DImnr9JFe825mwFcZhqFdu3Y5ey+XLFmigoIChYeHKykpSTabTUlJSWrVqpXZpQLnhUAJeJBzrfI++vFUWQMCFdSyy39XeR9QwS8/SFZ/tbhljgIiWlfbfpvwEC19KNHdZQNwUVlZmX7++WdnwFy3bp0Mw1DXrl2dw+NDhgxRSEj1U1oAsxAoAQ9yrn0o89Z9rcJtqbIfPyJHaZH8QhoruM0lajx4vAKaXlRt26f3oZw+tmttlA7AjbKzs7V48WIlJyfrxx9/1MGDBxUYGKj4+HhnwOzRowfD4/AYBErAg5zrTTmuOv2mHADe4/QuDKd7L1NTU1VUVKSoqKgKw+PR0dFml4p6jEAJeJjq3uVdU2d7lzcA71NSUqKVK1fqxx9/VHJysjZu3ChJ6tGjh7P3Mj4+nrdZoU4RKAEPcyCnSCPmLlWJvfLbbGoqyN+qRZOHqnUz5l8BviYzM1OLFi1y9mAeOXJEwcHBGjp0qDNgdu3albdeoVYRKAEP9MnadD3yxRa3tffs1d11Q98Yt7UHwDMZhqFt27Y5ey+XLVum4uJitWjRQjabTSNHjtSIESPUvHlzs0uFjyFQAh7q5ZQ0zUne6XI7U2xxui+xgxsqAuBtTp48qRUrVjh7Lzdv3ixJ6t27t7P3cuDAgQoKCjK5Ung7AiXgwT5Zm65pX2+T3WFc0JxKP6tF/laLZo7tSs8kAKcjR45o4cKFSk5O1sKFC5WZmanQ0FAlJCQ4A2ZcXBzD47hgBErAwx3IKdLUBVu0fFeW/KyWaoPl6fPxHSI0a1x35kwCOCuHw6HNmzc7ey+XL1+u0tJStW7d2hkuhw8frvDwcLNLhRcgUAJeIi0jXx+tTlfKzkylZxdVeKOORVJMeIgSO0Xq5v4xbA0E4IIVFhZq2bJlzoC5fft2WSwW9e3b1xkw+/fvr4CAALNLrVZhiV37sgtVanco0N+q2PBQ3g5WBwiUgBfiL0wAte3gwYMVhsezs7PVsGFDJSYmOhf4tG/f3iOGx50/cO/IVHpOFT9wNwtRYlyk/tgvRh2j+IG7NhAoAQBAtRwOhzZs2ODsvfzpp59kt9vVtm1bZ+/lsGHD1KRJkzqtiylBnoNACQAALkh+fr6WLl3qfDXkzp07ZbVa1a9fP2fvZd++feXvX3sjJ64uWpwxtqtuZNGi2xAoAQCAS/bt2+ccHl+0aJFyc3PVuHFjDR8+3NmD2bZtW7c9z13bqj1k66SJiR3dUBEIlAAAwG3Ky8u1bt065+bqq1atUnl5uTp06ODsvUxISFCjRo1q1P75vvjhxMpPlbvsAwVExOiiP71y1ut48YN7ECgBAECtOXHihFJSUpzD43v27JG/v78GDBjg7L289NJL5efnd862zvfVtPa8LB1+838kWeTfOLLaQMmrad2DQAkAAOrM7t27nYt7lixZory8PDVr1kwjRoxwBszWrVtXee8t/1ytlXuyzzln8thXz8pRdEKGwyHHybxqA6Wf1aKB7cL1wYR+Ln2u+o5ACQAATFFWVqY1a9Y4A+aaNWvkcDjUuXNnZ7hMSEhQaGio0jLylTRv2TnbLE7fqox/TVWLO15UzsLXzhkoT1s0eQh7+LqAQAkAADxCTk6OlixZ4hweT09PV0BAgAYPHqwWV/xZq7IDq+2dNBzlOvLO/Qpq2Vnhoybq6EePnFeg9LNadEu/Npo+tqu7P1K9YTW7AAAAAElq1qyZrr32Wr3xxhvat2+fduzYoeeff15hYWHamFF6zqHugo3/kT3vmJoMueWCnlvuMJSyM9OV0us9AiUAAPA4FotFnTp10sSJE/Xxv7/QSWtotdeXn8xT7vKP1GTgDfILaXzBz0vPLlJhib2m5dZ7BEoAAODR9mcX6lzz83KXfSBrgzA17DOmRs8wJO3LLqzRvZB4+S8AAPBopefYJqgs55AKNv2opsPvUnl+jvO4UV4mw1Eue26GLEEh8mtQ/aKbcz0HZ0egBAAAHi3Qv/oB1fL8bMlw6Pii13V80euVzh96bYIa9hmrZiPuduk5ODsCJQAA8Gix4aGySGcd9g5o3kbNr/5bpeO5yz6Qo/Skmo24W/5NWlT7DMt/n4OaIVACAACPFhrkr5hmIdqfU1Tleb+QxgrpNKDS8by1X0lSled+LyY8RKFBxKKaom8XAAB4vMS4SPlZLbXStp/VosROkbXSdn3BxuYAAMDjne+bcmqKN+W4hh5KAADg8TpGNVR8hwi391L6WS2K7xBBmHQRgRIAAHiFWeO6y9/NgdLfatGscd3d2mZ9RKAEAABeoXWzEM1w8/u2Z47tqtbNQtzaZn1EoAQAAF7jxr4xesjWyS1tTbHF6Ya+MW5pq75jUQ4AAPA6n6xN17Svt8nuMFTuOP8o42e1yN9q0cyxXQmTbkSgBAAAXulATpGmLtii5buy5Ge1VBssT5+P7xChWeO6M8ztZgRKAADg1dIy8vXR6nSl7MxUenZRhTfqWHRq0/LETpG6uX8Mq7lrCYESAAD4jMISu/ZlF6rU7lCgv1Wx4aG8AacOECgBAADgElZ5AwAAwCUESgAAALiEQAkAAACXECgBAADgEgIlAAAAXEKgBAAAgEsIlAAAAHAJgRIAAAAuIVACAADAJQRKAAAAuIRACQAAAJcQKAEAAOASAiUAAABcQqAEAACASwiUAAAAcAmBEgAAAC4hUAIAAMAlBEoAAAC4hEAJAAAAlxAoAQAA4BICJQAAAFxCoAQAAIBLCJQAAABwCYESAAAALiFQAgAAwCUESgAAALiEQAkAAACXECgBAADgEgIlAAAAXEKgBAAAgEsIlAAAAHAJgRIAAAAuIVACAADAJQRKAAAAuIRACQAAAJcQKAEAAOASAiUAAABc8v8BK0B8ilXoImQAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAIfCAYAAADOuEwnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABj1klEQVR4nO3deVxVdcLH8e+9XEFBcEFRXHADd01Ts9wxl6zcMs30ttdMMzVtto095TJlmlpN2TbtebFM09RpSlPRNBPNpVxZ3MANBFQQBLzc8/zRyEQIgiznXvi8X69ez3jOued84Zm5ff2d8/sdi2EYhgAAAIArZDU7AAAAADwbhRIAAAClQqEEAABAqVAoAQAAUCoUSgAAAJQKhRIAAAClQqEEAABAqVAoAQAAUCoUSgAAAJQKhRIAKpDFYtHUqVPNjgEAZYpCCaBMffLJJ7JYLHn/2Gw2NW7cWHfffbeOHTtWrtfeu3evpk6dqsOHDxfr+KlTp8pisahBgwbKzMwssL958+a6+eabyzhlxfj9/w9+/8/MmTPNjgagErKZHQBA5TR9+nS1aNFCWVlZ2rx5sz755BNt3LhRu3fvVvXq1cvlmnv37tW0adM0YMAANW/evNifS0pK0jvvvKNJkyaVSy6zDB48WHfeeWe+bV27djUpDYDKjEIJoFwMGzZM3bt3lyTdf//9qlevnmbNmqXly5dr3LhxJqfLr0uXLpo9e7b++te/qkaNGmbHKTOtW7eW3W43OwaAKoBb3gAqRN++fSVJBw4cyLd97dq16tu3r/z8/FS7dm2NHDlS+/btK/D5HTt2aNiwYQoICFDNmjV1/fXXa/PmzXn7P/nkE40dO1aSFB4enneLd926dZfN9sILLygxMVHvvPPOZY/NyMjQpEmT1LRpU/n4+KhNmzaaM2eODMPId1x2drYef/xx1a9fX/7+/hoxYoSOHj16yXMeO3ZM9957rxo0aCAfHx916NBBH330UYHj4uPjtX///stm/L3z588rKyurRJ8BgJKiUAKoEBefa6xTp07ettWrV2vo0KFKSkrS1KlT9cQTT2jTpk3q3bt3vucg9+zZo759++qXX37R008/reeff16HDh3SgAEDFBUVJUnq16+fHnnkEUnS5MmTNX/+fM2fP1/t2rW7bLa+fftq4MCBeuWVV3T+/PlCjzMMQyNGjNBrr72mG264Qa+++qratGmjp556Sk888US+Y++//369/vrrGjJkiGbOnKlq1arppptuKnDOxMREXXvttVq9erUefvhh/fOf/1RoaKjuu+8+vf766/mOvfPOO4v181z0ySefyM/PTzVq1FD79u21YMGCYn8WAErEAIAy9PHHHxuSjNWrVxunTp0yEhISjMWLFxv169c3fHx8jISEhLxju3TpYgQFBRkpKSl523755RfDarUad955Z962UaNGGd7e3saBAwfyth0/ftzw9/c3+vXrl7dt0aJFhiQjMjKyWFmnTJliSDJOnTplrF+/3pBkvPrqq3n7mzVrZtx00015f/76668NScaLL76Y7zy33nqrYbFYjLi4OMMwDGPnzp2GJOOvf/1rvuMmTJhgSDKmTJmSt+2+++4zgoODjeTk5HzHjh8/3qhVq5aRmZmZt61///5Gcb+2e/XqZbz++uvGsmXLjHfeecfo2LGjIcl4++23i/V5ACgJRigBlItBgwapfv36atq0qW699Vb5+flp+fLlatKkiSTpxIkT2rlzp+6++27VrVs373OdO3fW4MGD9Z///EeSlJubq1WrVmnUqFFq2bJl3nHBwcGaMGGCNm7cqLS0tFLn7devn8LDw4scpfzPf/4jLy+vvJHQiyZNmiTDMPTtt9/mHSepwHGPPfZYvj8bhqGvvvpKw4cPl2EYSk5Ozvtn6NChOnv2rLZv3553/Lp16wrcWi/Mjz/+qEcffVQjRozQgw8+qG3btqljx46aPHlykaOwAHAlKJQAysVbb72l77//XosXL9aNN96o5ORk+fj45O0/cuSIJKlNmzYFPtuuXTslJycrIyNDp06dUmZmZqHHuVwuJSQklEnmqVOn6uTJk3r33Xcvuf/IkSNq1KiR/P39C+S4uP/i/7VarWrVqlW+4/74M5w6dUpnzpzRv/71L9WvXz/fP/fcc4+k32aglwVvb289/PDDOnPmjLZt21Ym5wSAi5jlDaBcXHPNNXmzvEeNGqU+ffpowoQJio6OVs2aNU1Od2n9+vXTgAED9Morr+jBBx8s9+u5XC5Jkt1u11133XXJYzp37lxm12vatKkkKTU1tczOCQAShRJABfDy8tLLL7+s8PBwzZs3T88++6yaNWsmSYqOji5w/P79+1WvXj35+fmpevXq8vX1LfQ4q9WaV5QsFkups06dOlUDBgzQe++9V2Bfs2bNtHr1aqWnp+cbpbw48/riz9SsWTO5XC4dOHAg36jkH3+GizPAc3NzNWjQoFJnv5yDBw/mXRcAyhK3vAFUiAEDBuiaa67R66+/rqysLAUHB6tLly769NNPdebMmbzjdu/erVWrVunGG2+U9FsZHTJkiJYtW5Zv5ndiYqIWLFigPn36KCAgQJLk5+cnSfnOV1L9+/fXgAEDNGvWrALL7dx4443Kzc3VvHnz8m1/7bXXZLFYNGzYMEnK+79vvPFGvuP+OGvby8tLY8aM0VdffaXdu3cXyHLq1Kl8fy7uskF//Jwkpaen6/XXX1e9evXUrVu3y54DAEqCEUoAFeapp57S2LFj9cknn+jBBx/U7NmzNWzYMF133XW67777dP78eb355puqVatWvvddv/jii/r+++/Vp08f/fWvf5XNZtN7772n7OxsvfLKK3nHdenSRV5eXpo1a5bOnj0rHx8fDRw4UEFBQSXKOWXKFIWHhxfYPnz4cIWHh+u5557T4cOHddVVV2nVqlVatmyZHnvssbxnJrt06aLbb79db7/9ts6ePatevXppzZo1iouLK3DOmTNnKjIyUj179tQDDzyg9u3bKzU1Vdu3b9fq1avz3Z6+8847tX79+stOzHnrrbf09ddfa/jw4QoJCdGJEyf00UcfKT4+XvPnz5e3t3eJfh8AcFmmzjEHUOlcXDZo69atBfbl5uYarVq1Mlq1amU4nU7DMAxj9erVRu/evY0aNWoYAQEBxvDhw429e/cW+Oz27duNoUOHGjVr1jR8fX2N8PBwY9OmTQWOe//9942WLVsaXl5el11C6PfLBv3RxSV6fr9skGEYRnp6uvH4448bjRo1MqpVq2aEhYUZs2fPNlwuV77jzp8/bzzyyCNGYGCg4efnZwwfPtxISEgosGyQYRhGYmKi8dBDDxlNmzY1qlWrZjRs2NC4/vrrjX/961+XzHQ5q1atMgYPHmw0bNjQqFatmlG7dm1jyJAhxpo1ay77WQC4EhbDKOYaFAAAAMAl8AwlAAAASoVCCQAAgFKhUAIAAKBUKJQAAAAoFQolAAAASoVCCQAAgFKhUAKoUhISElS9enX9+OOPZkcpE+vWrZPFYtG6detK/Nl3331XISEhys7OLvtgAKoUCiWAMjNgwABZLJbL/vP7t+CUxttvv61PPvmkRJ+ZPn26evbsqd69e+fbvmLFCvXv319BQUHy9fVVy5YtNW7cOH333XdlktUd3X333crJybnke8sBoCRY2BxAmfn++++VmJiY9+etW7fqjTfe0OTJk9WuXbu87Z07d1bnzp1Lfb2OHTuqXr16xR6dO3XqlBo3bqxPP/1Ut99+e972OXPm6KmnnlL//v01cuRI+fr6Ki4uTqtXr9ZVV11V4tJakdatW6fw8HBFRkZqwIABJf78M888o4ULF+rQoUOyWCxlHxBAlcC7vAGUmcGDB+f7c/Xq1fXGG29o8ODBV1R2yprD4ZDNZtPw4cPztjmdTv3jH//Q4MGDtWrVqgKfSUpKqsiIFW7cuHF65ZVXFBkZqYEDB5odB4CH4pY3gAr37bffqm/fvvLz85O/v79uuukm7dmzJ98xJ0+e1D333KMmTZrIx8dHwcHBGjlypA4fPixJat68ufbs2aP169fn3Uq/XGn9+uuv1bNnT9WsWTNvW3JystLS0grcAr8oKCgo7z/n5OTohRdeULdu3VSrVi35+fmpb9++ioyMzPeZw4cPy2KxaM6cOXrrrbfUsmVL+fr6asiQIUpISJBhGPrHP/6hJk2aqEaNGho5cqRSU1PznaN58+a6+eabtWrVKnXp0kXVq1dX+/bttWTJksv9eiVJUVFRuuGGG1SrVi35+vqqf//+l3xutFu3bqpbt66WLVtWrPMCwKVQKAFUqPnz5+umm25SzZo1NWvWLD3//PPau3ev+vTpk1cWJWnMmDFaunSp7rnnHr399tt65JFHlJ6ervj4eEnS66+/riZNmqht27aaP3++5s+fr+eee67Q6164cEFbt27V1VdfnW97UFCQatSooRUrVhQodX+UlpamDz74QAMGDNCsWbM0depUnTp1SkOHDtXOnTsLHB8REaG3335bf/vb3zRp0iStX79e48aN0//93//pu+++0zPPPKM//elPWrFihZ588skCn4+NjdVtt92mYcOG6eWXX5bNZtPYsWP1/fffF5lz7dq16tevn9LS0jRlyhTNmDFDZ86c0cCBA7Vly5YCx1999dWVZpISAJMYAFBOFi1aZEgyIiMjDcMwjPT0dKN27drGAw88kO+4kydPGrVq1crbfvr0aUOSMXv27CLP36FDB6N///7FyhIXF2dIMt58880C+1544QVDkuHn52cMGzbMeOmll4xt27YVOM7pdBrZ2dn5tp0+fdpo0KCBce+99+ZtO3TokCHJqF+/vnHmzJm87X//+98NScZVV11lXLhwIW/77bffbnh7extZWVl525o1a2ZIMr766qu8bWfPnjWCg4ONrl275m2LjIzM9zt2uVxGWFiYMXToUMPlcuUdl5mZabRo0cIYPHhwgZ/rT3/6k1GjRo1L/t4AoDgYoQRQYb7//nudOXNGt99+u5KTk/P+8fLyUs+ePfNuHdeoUUPe3t5at26dTp8+XSbXTklJkSTVqVOnwL5p06ZpwYIF6tq1q1auXKnnnntO3bp109VXX619+/blHefl5SVvb29JksvlUmpqqpxOp7p3767t27cXOO/YsWNVq1atvD/37NlTkmS322Wz2fJtz8nJ0bFjx/J9vlGjRho9enTenwMCAnTnnXdqx44dOnny5CV/zp07dyo2NlYTJkxQSkpK3u84IyND119/vX744Qe5XK58n6lTp47Onz+vzMzMS//yAOAymJQDoMLExsZKUqGTPwICAiRJPj4+mjVrliZNmqQGDRro2muv1c0336w777xTDRs2LFUGo5CFLW6//XbdfvvtSktLU1RUlD755BMtWLBAw4cP1+7du1W9enVJ0qeffqq5c+dq//79unDhQt7nW7RoUeCcISEh+f58sVw2bdr0ktv/WJ5DQ0MLzLxu3bq1pN+e07zU7+Li7/iuu+665M8pSWfPns1XrC/+TpjlDeBKUSgBVJiLI2Pz58+/ZBn6/ajdY489puHDh+vrr7/WypUr9fzzz+vll1/W2rVr1bVr1xJfOzAwUFLB0vZHAQEBGjx4sAYPHqxq1arp008/VVRUlPr37y+Hw6G7775bo0aN0lNPPaWgoCB5eXnp5Zdf1oEDBwqcy8vL65LXKGx7YWW3JC7+jmfPnq0uXbpc8pjfT0qSfvud+Pr6qkaNGqW+PoCqiUIJoMK0atVK0m8TYQYNGlSs4ydNmqRJkyYpNjZWXbp00dy5c+VwOCSVbEQtJCRENWrU0KFDh4r9me7du+vTTz/ViRMnJEmLFy9Wy5YttWTJknzXnjJlSrHPWRJxcXEyDCPftWJiYiT9Ngv8Ui7+jgMCAor1O5akQ4cO5VsnFABKimcoAVSYoUOHKiAgQDNmzMh3u/iiU6dOSZIyMzOVlZWVb1+rVq3k7++f7zWBfn5+OnPmTLGuXa1aNXXv3l0///xzvu2ZmZn66aefLvmZb7/9VpLUpk0bSf8bWfz9SGJUVFShny+t48ePa+nSpXl/TktL02effaYuXboUeuu/W7duatWqlebMmaNz584V2H/xd/x727dvV69evcouOIAqhxFKABUmICBA77zzju644w5dffXVGj9+vOrXr6/4+Hh988036t27t+bNm6eYmBhdf/31GjdunNq3by+bzaalS5cqMTFR48ePzztft27d9M477+jFF19UaGiogoKCilyce+TIkXruueeUlpaW97xmZmamevXqpWuvvVY33HCDmjZtqjNnzujrr7/Whg0bNGrUqLxb7DfffLOWLFmi0aNH66abbtKhQ4f07rvvqn379pcsb6XVunVr3Xfffdq6dasaNGigjz76SImJifr4448L/YzVatUHH3ygYcOGqUOHDrrnnnvUuHFjHTt2TJGRkQoICNCKFSvyjt+2bZtSU1M1cuTIMs8PoAoxdY45gErtj8sGXRQZGWkMHTrUqFWrllG9enWjVatWxt133238/PPPhmEYRnJysvHQQw8Zbdu2Nfz8/IxatWoZPXv2NL788st85zl58qRx0003Gf7+/oakyy4hlJiYaNhsNmP+/Pl52y5cuGC8//77xqhRo4xmzZoZPj4+hq+vr9G1a1dj9uzZ+ZYJcrlcxowZM/KO69q1q/Hvf//buOuuu4xmzZrlHXdx2aA/Lnt0cYmfRYsW5dv+8ccfG5KMrVu35m1r1qyZcdNNNxkrV640OnfubPj4+Bht27Yt8Nk/Lht00Y4dO4xbbrnFCAwMNHx8fIxmzZoZ48aNM9asWZPvuGeeecYICQnJt8QQAJQU7/IGUKXcd999iomJ0YYNG8yOUqTmzZurY8eO+ve//11u18jOzlbz5s317LPP6tFHHy236wCo/HiGEkCVMmXKFG3dupU3w0j6+OOPVa1aNT344INmRwHg4RihBAA3VBEjlABQVhihBAAAQKkwQgkAAIBSYYQSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUCoUSAAAApUKhBAAAQKlQKAEAAFAqFEoAAACUis3sAABQlIxspw6nZCjH6ZK3zarmgX7y8+GrCwDcCd/KANxObGK6IqLiFRmdpPjUTBm/22eRFFLXV+FtgjSxZ4jCGvibFRMA8F8WwzCMyx8GAOUvITVTk5fu0oa4ZHlZLcp1Ff71dHF/39B6mjG6k5rW9a3ApACA36NQAnALX2yN15Tle+R0GUUWyT/yslpks1o0bUQHje8RUo4JAQCFoVACMN28yFjNWRVT6vM8OaS1Hg4PK4NEAICS4BlKAKb6Ymv8Jctk8r9fU8buNYV+rvFDn8jmXy/ftjmrYlS/po9uY6QSACoUhRKAaRJSMzVl+Z5L7vPveoOqN+/yh62GUle+JVutBgXK5EUvLN+jXq3q8UwlAFQgCiUA00xeukvOQp6X9GncTj6N2+XblpWwR8aFbPm1H1DoOZ0uQ5OX7tL8+3qWZVQAQBFY2ByAKWIT07UhLrlEE3Ay9q6XZJFf+/6FHpPrMrQhLllxSellkBIAUBwUSgCmiIiKl5fVUuzjjVynMvdvlE+TdrLVblDksV5Wixyb40sbEQBQTBRKAKaIjE4q0ejk+UPb5TqfVuTt7otyXYYiY5JKkQ4AUBIUSgAV7ly2U/GpmSX6TMbe9ZLVJt92fYp1fHxKpjKynVcSDwBQQhRKABXuSEqGSrIArivnvM7HblaNFl3lVSOgWJ8xJB1OybiifACAkqFQAqhwOU5XiY7PjNn82+zuDgPK9ToAgCtDoQRQ4bxtJfvqydi7ThbvGqoRVrKlgEp6HQDAleHbFkCFax7op+LO787NPKuswzvlG3atrNWqF/salv9eBwBQ/iiUACqcn49NIcV8k03Gvh8kV26Jb3eHBPrKz4d3NwBARaBQAjBFeJugYq1DmbFnnay+tS/xGsbCeVktCm8dVIp0AICSsBiGUZLJlgBQJmIT0zX49R/K7fyrH++n0CD/cjs/AOB/GKEEYIqwBv7qG1qvRG/LKQ4vq0V9Q+tRJgGgAlEoAZhmxuhOspVxobRZLZoxulOZnhMAUDQKJQDTNK3rq2kjOpTpOaeP6KCmxZzwAwAoGxRKAKYa3yNETw5pXSbnempIG93WI6RMzgUAKD4m5QBwC19sjdeU5XvkdBnKdZXga8mVKx/vapo+ogNlEgBMwgglALcwvkeIVj/eX71aBkrSZSfrXNx//sivuqPuYcokAJiIEUoAbic2MV0RUfGKjElSfEqmfv8lZdFvi5aHtw6S/doQvfz3x7Rs2TLFxcWpdu3aJiUGgKqNQgnArWVkO3U4JUM5Tpe8bVY1D/TL9wac48ePKywsTH/96181e/ZsE5MCQNVFoQTg8aZNm6YZM2Zo//79atGihdlxAKDKoVAC8HgZGRlq3bq1+vTpo4ULF5odBwCqHCblAPB4fn5+eumll/Tll1/qp59+MjsOAFQ5jFACqBRcLpe6deum6tWra9OmTbJYyvYNPACAwjFCCaBSsFqtmjt3rjZv3qxFixaZHQcAqhRGKAFUKsOHD9fu3bu1b98+Va9e3ew4AFAlMEIJoFKZPXu2EhIS9Oabb5odBQCqDEYoAVQ6Dz30kCIiIhQXF6d69eqZHQcAKj1GKAFUOlOnTpVhGJo2bZrZUQCgSqBQAqh06tevr8mTJ+vdd99VdHS02XEAoNLjljeASikrK0tt27bVVVddpWXLlpkdBwAqNUYoAVRK1atX18svv6zly5dr3bp1ZscBgEqNEUoAlZZhGLr22mvldDq1detWWa38HRoAygPfrgAqLYvFoldffVXbt2+Xw+EwOw4AVFqMUAKo9MaOHauffvpJMTEx8vX1NTsOAFQ6jFACqPRmzpyppKQkvfrqq2ZHAYBKiRFKAFXCpEmT9N577ykuLk4NGzY0Ow4AVCoUSgBVwunTpxUaGqpbbrlF77//vtlxAKBS4ZY3gCqhTp06euGFF/TRRx9p165dZscBgEqFEUoAVUZOTo46dOigli1bauXKlWbHAYBKgxFKAFWGt7e3XnnlFa1atUrfffed2XEAoNJghBJAlWIYhvr376/U1FTt3LlTNpvN7EgA4PEYoQRQpVxc7HzPnj366KOPzI4DAJUCI5QAqiS73a7vv/9ecXFx8vf3NzsOAHg0RigBVEkzZsxQWlqaZs2aZXYUAPB4FEoAVVJISIgef/xxzZ07VwkJCWbHAQCPxi1vAFVWWlqawsLCNHToUH322WdmxwEAj8UIJYAqKyAgQNOmTdP8+fO1bds2s+MAgMdihBJAleZ0OnXVVVepfv36ioyMlMViMTsSAHgcRigBVGk2m01z5szR+vXrtWzZMrPjAIBHYoQSQJVnGIaGDh2qw4cPa/fu3fL29jY7EgB4FEYoAVR5FotFc+bMUVxcnN59912z4wCAx2GEEgD+6/7779fSpUsVFxenOnXqmB0HADwGI5QA8F//+Mc/lJ2drZdeesnsKADgUSiUAPBfwcHBevrpp/Xmm2/q4MGDZscBAI/BLW8A+J2MjAy1bt1avXv31pdffml2HADwCIxQAsDv+Pn56aWXXtKiRYu0adMms+MAgEdghBIA/sDlcqlbt26qXr26Nm3axGLnAHAZjFACwB9YrVbNnTtXmzdv5rY3ABQDI5QAUIjhw4dr9+7d2rdvn6pXr252HABwW4xQAkAhZs+erYSEBL355ptmRwEAt8YIJQAU4eGHH9b8+fMVFxen+vXrmx0HANwSI5QAUIQpU6ZIkqZNm2ZyEgBwXxRKAChC/fr19dxzz+ndd9/V/v37zY4DAG6JW94AcBlZWVlq27atOnfurOXLl5sdBwDcDiOUAHAZ1atX18yZM7VixQpFRkaaHQcA3A4jlABQDIZh6LrrrlNOTo5+/vlnWa38fRwALuIbEQCKwWKx6NVXX9WOHTs0f/58s+MAgFthhBIASmDs2LH66aefFBMTI19fX7PjAIBbYIQSAEpg5syZSkpK0ty5c82OUuVlZDu15/hZ7Yg/rT3Hzyoj22l2JKDKYoQSAEpo0qRJeu+99xQbG6vg4GCz41QpsYnpioiKV2R0kuJTM/X7f4FZJIXU9VV4myBN7BmisAb+ZsUEqhwKJQCU0OnTpxUaGqpbbrlF77//vtlxqoSE1ExNXrpLG+KS5WW1KNdV+L+6Lu7vG1pPM0Z3UtO6PJoAlDcKJQBcgX/+85964okntGPHDnXu3NnsOJXaF1vjNWX5HjldRpFF8o+8rBbZrBZNG9FB43uElGNCABRKALgCOTk56tChg1q0aKGVK1fKYrGYHalSmhcZqzmrYkp9nieHtNbD4WFlkAjApTApBwCugLe3t2bPnq3vv/9e3333ndlxKqUvtsYXq0ye3bRQR2berOMf/LXQY+asitHCrfFlGQ/A7zBCCQBXyDAMDRgwQMnJyfrll19ks9nMjlRpJKRmatBr65XtdBV5nDMtWcff/7Mki2y1gtTo/rcLPdbHZtXqx/vzTCVQDhihBIArZLFYNHfuXO3du1cffvih2XEqlclLd8lZjOclT0d+KJ9GbeTdMPSyxzpdhiYv3VUW8QD8AYUSAEqhe/fustvteuGFF5SWlmZ2nEohNjFdG+KSLzsBJyt+tzL3/6g61/+pWOfNdRnaEJesuKT0sogJ4HcolABQSjNmzFBaWppmzZpldpRKISIqXl7Woic5Ga5cpX7/rmpeNUTeQc2LfW4vq0WOzTxLCZQ1CiUAlFLTpk31xBNP6NVXX1VCQoLZcTxeZHTSZUcnz+34Vs60U6rd744SnTvXZSgyJqk08QBcAoUSAMrAs88+q4CAAE2ePNnsKB7tXLZT8amZRR6Tez5NZzZEqHav2+TlW6vE14hPyeQ1jUAZo1ACQBnw9/fX9OnT5XA49PPPP5sdx2MdScnQ5abinPlhvqw1asq/+/AruoYh6XBKxhV9FsClUSgBoIzcd999at++vSZNmiRWZLsyOZdZJuhC6jGd27lS/t1GKDc9Vc4ziXKeSZSRe0GGK1fOM4nKPX/5STeXuw6AkmHRNAAoIzabTXPmzNGNN96oZcuWadSoUWZH8jjetqLHOXLTUyTDpdOr39Pp1e8V2H/s3fvk332E6g4qeub35a4DoGRY2BwAypBhGBo6dKgOHz6s3bt3y9vb2+xIHiUj26mOU1cWets7N/Osso/uLbD9zA/z5co5r7qD/iRb7eAiZ35bJO2eOlR+PoypAGWFv6IBQBmyWCyaM2eODhw4oHfeecfsOB7Hz8emkCLeZOPlW0u+ra8r8I+1RoCs3jXk2/q6yy4jFBLoS5kEyhiFEgDKWOfOnXXPPfdo+vTpOn36tNlxPE54m6DLrkN5pbysFoW3DiqXcwNVGbe8AaAcnDhxQmFhYfrzn/+suXPnmh3Ho8Qmpmvw6z+U2/lXP95PoUH+5XZ+oCpihBIAykFwcLCeeeYZvfnmmzpw4IDZcTxKWAN/9Q2tV+ajlF5Wi/qG1qNMAuWAEUoAKCeZmZlq3bq1rrvuOi1atMjsOB4lITVTg15br+wyXN7Hx2bV6sf7q2kRz2gCuDKMUAJAOfH19dVLL72kxYsX68cffzQ7jkdpWtdX00Z0KNNzTh/RgTIJlBNGKAGgHLlcLnXv3l3e3t766aefZLGUz2STympeZKzmrIop9XmeGtJGD4WHlkEiAJfCCCUAlCOr1aq5c+cqKipKCxcuNDuOx3k4PEwzb+kkH5u1xM9Uelkt8rFZNeuWTpRJoJwxQgkAFWDEiBH69ddftX//flWvXt3sOB4nITVT9727WjHpXvKyWpTrKvxfXRf39w2tpxmjO3GbG6gAFEoAqAD79+9Xx44dNWPGDD399NNmx/FIAwYMUK5ffV3/56mKjElSfEpmvjfqWPTbouXhrYNkvzaE2dxABaJQAkAFefjhhzV//nzFxcWpfv36ZsfxKHFxcQoLC1NERIQmTJgg6bfXNB5OyVCO0yVvm1XNA/14Aw5gkipfKPlCAlBRTp06pdDQUN1xxx2aN2+e2XE8ynPPPae33npLJ06cUI0aNcyOA+APqmShjE1MV0RUvCKjkxSfeolbJnV9Fd4mSBN7hiisAbdMAJSdV155RZMnT9bu3bvVtm1bs+N4hNzcXIWEhGjkyJF6++23zY4D4BKqVKFMSM3U5KW7tCEumYe6AZgiKytLbdu2VadOnbRixQqz43iEb7/9VjfeeKO2bt2q7t27mx0HwCVUmUL5xdZ4TVm+R06XUWSR/CMvq0U2q0XTRnTQ+B4h5ZgQQFWxcOFCjR8/XmvWrNHAgQPNjuP2xo4dq+joaP3yyy+s4wm4qSpRKMtqYdwnh7TWw+FhZZAIQFVmGIZ69eqlrKws/fzzz/Ly8jI7kts6deqUGjdurNmzZ+vRRx81Ow6AQlT6hc2/2Bp/yTLpyjmvMxsilLjwBSW8Pl5HZt6sc7+uLvJcc1bFaOHW+PKKCqCKsFgsmjt3rnbu3Kn58+ebHcetRURESJImTpxochIARanUhTIhNVNTlu+55D5XZprO/vi5LqQkqFpQi2Kf84Xle5SQmllWEQFUUb169dLYsWP13HPPKSMjw+w4bskwDH300UcaOXKk6tWrZ3YcAEWo1IVy8tJdchbyvKRXzbpq8vB8Nfnrx6oTfm+xz+l0GZq8dFdZRQRQhc2cOVPJycmaO3eu2VHc0rZt27Rr1y7de2/xv6MBmKPSFsrYxHRtiEsudAKOxVZNXjXrlPi8uS5DG+KSFZeUXtqIAKq4li1b6m9/+5teeeUVnThxwuw4buejjz5S48aNNWTIELOjALiMSlsoI6Li5WUtn9mAXlaLHJt5lhJA6T333HPy8fHR888/b3YUt3L+/HktWLBAd999N5OWAA9QaQtlZHRSiZYHKolcl6HImKRyOTeAqqVOnTqaMmWKPvroI/36669mx3EbS5Ys0dmzZ3X33XebHQVAMVTKQnku26n4cp44E5+SqYxsZ7leA0DV8OCDDyo0NFRPPvmkqsBKbsXy0UcfqX///goNDTU7CoBiqJSF8khKhsr7K9mQdDiFmZkASs/b21uvvPKKvv/+e3333XdmxzHdoUOHtHbtWibjAB6kUhbKHKfLlOtkZ2frp59+0tmzZyvk+gAqj5EjR6pfv3568skn5XRW7bsfn3zyifz9/TVmzBizowAopkpZKL1tFfNjVbNatHPnTs2ZM0eDBw9WrVq11KtXL73//vsVcn0AlYfFYtGrr76qvXv36oMPPjA7jmlyc3P18ccf6/bbb5efn5/ZcQAUk83sAOWheaCfLFK53vY2DENdQhsrNzsz792yF599atWqVTleGUBl1a1bN91xxx164YUXNGHCBAUEBJgdqcKtWbNGCQkJ3O4GPEylHKH087EppK7vZY9L27ZCZ378Qud+/V6SdD5ui878+IXO/PiFXFmXeT4yPUm52b9N/DEMI9+D9F999ZX+/e9/V/nbVgBK7qWXXlJ6erpmzpxpdhRTfPTRR2rfvr2uueYas6MAKAGLUUmnFE5dvkfzo44UuXTQ0bfvVW7apZf/afzgh7LVbnDJfV5Wi+7o2UxNT23WX/7yFzmdzkJnZtauXVuhoaHq06ePRo8erT59+shqrZQ9HkAZ+b//+z/NnTtX0dHRCgkJMTtOhUlNTVVwcLBmzJihSZMmmR0HQAlU2kIZm5iuwa//UG7nX/14P4UG+evnn3/WyJEjlZiYKJfLpWHDhmnevHlatGiR1qxZo927dysxMVG5ubmSfntOqm7dumrXrp369eunMWPG6Oqrry63nAA8T3p6usLCwjRo0CA5HA6z41SYefPm6fHHH9exY8cUFBRkdhwAJVBpC6Uk3fFhlDYdTCnTBc69rBb1ahmo+ff1zNuWnJyssWPHat26dZo2bZpeeOGFAp/bvXu3Fi9erHXr1mnv3r1KTk7OG9W0Wq0KCgpShw4dFB4ernHjxiksLKzMMgPwPP/617/05z//WVu2bFGPHj3MjlMhunbtqhYtWmjJkiVmRwFQQpW6UCakZmrQa+uVXYbLCPnYrFr9eH81/cMzmk6nUxEREbrppptUr169y57H5XIpKipKS5Ys0caNGxUdHa3Tp0/n7bfZbAoODlbnzp01aNAgjRs3To0aNSqznwOAe3M6nerSpYvq1q2r9evX503+q6x27Nihq6++WitWrNDNN99sdhwAJVSpC6UkfbE1Xs8u2VVm55t1Syfd1qN8nmlyOp2KjIzU119/rZ9++kkHDhxQWlpa3n5vb281adJEXbt21dChQzVmzBjVrVu3XLIAMN93332nYcOGacmSJRo9erTZccrV3/72Ny1evFgJCQmy2SrlAiRApVbpC6UkzYuM1ZxVMaU+z1ND2uih8Ip9DVhWVpa++eYbffPNN9qyZYsOHTqkzMz/vVayRo0aCgkJUffu3XXTTTdp5MiR8vW9/Ax3AJ5h6NChOnjwoPbs2SNvb2+z45SLrKwsNWrUSH/605+q7Ox2wNNViUIp/TZSOWX5HjldRomeqfSyWmSzWjR9RIdyG5ksqbS0NH399df69ttvtW3bNiUkJCgrKytvv5+fn1q2bKlrr71WN998s2644YZK+y8ioLLbtWuXunTpoldffVWPPvqo2XHKxRdffKHbb79d+/fvV5s2bcyOA+AKVJlCKf32TOXkpbu0IS5ZXlZLkcXy4v6+ofU0Y3SnAs9MupukpCR99dVXWrVqlXbs2KHjx4/rwoULeftr1aql0NBQ9e7dW6NGjVL//v1ZvgjwEA888ICWLFmiuLg41alTx+w4ZW7IkCE6f/68NmzYYHYUAFeoShXKi2IT0xURFa/ImCTFp2Tme6OORVJIoK/CWwfJfm2IQoP8zYpZakeOHNGXX36ptWvXateuXTp58mSB5YvatGmTt3xR9+7dTU4M4FJOnjyp0NBQ/fnPf9bcuXPNjlOmjhw5ohYtWujDDz/UPffcY3YcAFeoShbK38vIdupwSoZynC5526xqHugnP5/K+0D4nj17Cixf5HL9NgvearWqfv366tChg/r376/bbruN20+Am3jxxRc1ffp07du3r1K93nX69Ol65ZVXdPLkSdWsWdPsOACuUJUvlFWdy+XS1q1btWTJEm3YsCFv+aKL/7Ww2Wxq2LChOnXqpEGDBmns2LFq2rSpyamBqiczM1OtW7fWtddeq8WLF5sdp0y4XC61atVKAwcO1Icffmh2HAClQKFEAS6XK9/yRbGxsQWWL2rcuLG6dOmSt3xRcdbe9ERVbQQb7u2zzz7TXXfdpQ0bNqhPnz5mxym1NWvWaNCgQdq4caN69+5tdhwApUChRLHk5OTo22+/1YoVKxQVFaVDhw4pIyMjb3/16tXzli+68cYbNXLkSI+9fZX3jG10kuJTL/GMbV1fhbcJ0sSeIQpr4LnP2MLzuFwu9ejRQzabTT/99JPHT6ybOHGitm3bpn379lX6hduByo5CiSt27tw5LVu2TP/5z3/0888/Kz4+/pLLF11zzTUaPny4hg0b5tbLF1XmVQBQeaxbt07h4eFasGCBbr/9drPjXLHTp08rODhY06dP19NPP212HAClRKFEmUpOTtZXX32llStXaufOnTp27JhycnLy9gcEBCgsLEzXXXedRo0apfDwcLcYZSntOqXTRnTQeDdZpxSV38iRI/XLL79o//79ql69utlxrsg777yjv/3tb0pISFBwcLDZcQCUEoUS5S4+Pl6LFy/W6tWr85Yvcjqdkn5bvqhOnTpq06aN+vbtq1tvvVXdunWr0JJZVm9SenJIaz0cHlYGiYCiRUdHq2PHjnrxxRf1zDPPmB3nivTo0UPBwcFavny52VEAlAEKJUyxb9++vOWL9uzZo1OnTuVbvqhevXp5yxeNGzdO7dq1K5ccnvSud+D3/va3v+mzzz5TXFyc6tevb3acEvn111911VVXaenSpRo1apTZcQCUAQol3IJhGNq2bZu++uor/fDDD4qOjlZqamre8kVeXl4Fli8KCSm6uL311ltatWqV3n//fQUFBRXYn5CaqUGvrVe201VgX/aJGGXsWqOs+F1ynk2UtUaAfBq1Ue1+d6ha3caFXtPHZtXqx/vzTCXKXXJyskJDQzVx4kS99dZbZscpkccee0yff/65jh49qmrVqpkdB0AZoFDCbblcLq1fv15ff/21Nm3apNjYWJ09ezZvf7Vq1fKWLxoyZIjGjh2bb/mi7t27a9u2bWrQoIGWLVumnj175jv/HR9GadPBlEs+M3lq6QxlH90n37Z9VC2ouXLPnVb69n/LyMlSwzvnyLt+80tm9rJa1KtloObf1/OS+4GyNHv2bP3973/Xrl27ym0Uv6xlZ2ercePGuueeezR79myz4wAoIxRKeJScnBytXLlSK1as0ObNm3Xo0CGdO3cub3/16tXVtGlTXX311Vq8eLFyc3Pl5eUli8Wit956Sw888IAsFotiE9M1+PUfCr1O1tF98gkOlcXrf6MnF1KP6fiHD8uvbW/VG/5kkTlXP97Po1/bCc+QlZWldu3aqWPHjlqxYoXZcYpl8eLFGjt2rPbs2aP27dubHQdAGaFQwuNlZGRo+fLl+s9//qOtW7cqPj5e58+fv+Sx4eHhWrZsmeZGxmt+1JESzeiWpBMfPypJCr7nn4Ue42W16I6ezTR1RIcSnRu4El9++aVuu+02rVmzRgMHDjQ7zmXdeOONOn36tH766SezowAoQxRKVEqvvvqqJk2adMl9VqtVLR/9TBd8apfonIZh6Njbd6tavRA1uO0fRR7bLNBX658ML9H5gSthGIZ69+6t8+fP6+eff5aXl5fZkQp19OhRNWvWTO+9957uv/9+s+MAKEPmLwAIlIN9+/ZJ+u1d5Bf/b8+ePXXzzTfroccm6YJ3rRKfM2PPOuWmp8ivbd/LHhufkqmMbGeJrwGUlMVi0dy5c7Vz507Nnz/f7DhF+vTTT1W9enWNGzfO7CgAyhiFEpVScHCwOnbsqMcff1wrV67U2bNntXnzZq1YsUJ/nvScVMLXvF1ISVDq9+/Ip3Fb+XW6/rLHG5IOp2QU3G4YOnXqlLgxgLJ03XXXady4cZo8eXK+V6K6E5fLpY8++khjx45VQECA2XEAlDEKJSql6dOna9euXXrllVc0ZMgQ+fr+bxmfnEssE1SU3HOnlbRomqw+fqo36u+yWIt3S/HidVwul6KiovTMM8+oefPmCgoK0vbt20uUAbicmTNnKiUlRXPmzDE7yiVt2LBBBw8e1L333mt2FADlwGZ2AKCieduK//coV1aGEr+cIldWhhrYZ8nmH1jsz/7rnbd1bE+UtmzZosTERNlstrw3BPn7MwMcZatFixZ65JFH9Morr+iBBx5Qo0aNzI6Uz4cffqjQ0FD17Xv5R0YAeB5GKFHlNA/0U3FueBvOHCUtni7n6WMKGvuCvOsV/w04hmHovbkvasWKFUpMTJSkfK+b/Pbbb7Vnz568twMBZeG5555TjRo19Pzzz5sdJZ+zZ89q8eLFuvfee2Up4eMmADwDhRJVjp+PTSGXeZON4crVqa9nKfv4ftUf9ax8Gpds0ejm9fy0+IuIS76hxzAMPfbYY+rYsaNsNpv8/f0VGhqqoUOH6umnn9by5cuVlpZWousBklS7dm1NmTJFH3/8sX755Rez4+RZuHChsrOzdeedd5odBUA5YdkgVElTl+8pch3K1NX/UvrPy1Uj9Br5XmJWd82OhS8J9Pt1KLOysvTMM8/ojTfekNVqlWEYuuGGG/TAAw9o48aN+vXXX3Xw4EElJiYWmExhs9lUp04dNW3aVO3atVOPHj0UHh6ujh07ymrl74K4tAsXLqhjx44KCQnRqlWr3GJE8Nprr1VgYKC++eYbs6MAKCcUSlRJl3tTzsmIZ5WdsLvQ/c2e/XeR5//jm3JWrlwpu92u5ORkPf3005o1a1aBz7hcLu3du1dr1qzR1q1btX//fiUkJOj06dO6cOFCvmP9/PzUoEEDtWzZUp07d1avXr10/fXXq3bt2kXmQtWwbNkyjRo1St98841uvPFGU7Ps2bNHHTt21OLFizVmzBhTswAoPxRKVFlFvcv7ShX1Lu/k5GS9+OKLuvfee9W5c+cSnTctLU3r1q3Tjz/+qF9++UUHDhzQyZMnlZGRkW8JIpvNptq1a6tJkyZq27atunfvrvDwcHXp0oVRzSrEMAyFh4crKSlJv/76a956rGaYNGmSPvvsMx07dkze3t6m5QBQviiUqLISUjM16LX1yi7hMkJF8bFZtfrx/mp6mWc0y4rL5VJ0dLQiIyO1ZcsW7d27V/Hx8UpNTS0wqunr66v69eurZcuWuuqqq3Tddddp4MCBqlevXoVkRcXatm2bunfvrnfeeUcPPvigKRlycnLUpEkT2e12vfrqq6ZkAFAxKJSo0r7YGq9nl+wqs/PNuqWTbutR/Nng5SkzM1Pr16/Xxo0btWPHDsXFxenkyZM6d+5cvlFNLy8v1apVS02aNFGbNm3UvXt39e/fXz169GBU08Pddddd+vbbbxUXF2fKYuJLly7VLbfcol27dqljx44Vfn0AFYdCiSpvXmSs5qyKKfV5nhrSRg+Fh5ZBovIXGxurtWvXKioqKm9UMyUlRTk5OfmOq1GjhurXr68WLVqoU6dOuvbaa3X99derYcOGJiVHSRw9elStW7fWY489phkzZlT49YcPH67ExERt2bKlwq8NoGJRKAH9NlI5ZfkeOV1GiZ6p9LJaZLNaNH1EB7cZmSyNrKwsbdiwQRs2bMgb1Tx+/LjS09MLjGoGBASocePGat26tbp166Z+/frp2muvNfV5PRT0/PPPa/bs2dq/f7927Nihf/7zn5o3b165jxgeP35cTZs21VtvvWXaLXcAFYdCCfxXQmqmJi/dpQ1xyfKyWooslhf39w2tpxmjO1XYM5NmOnLkiFavXq2oqCjt3r1bR44cUUpKirKzs/MdV716ddWrV0/NmzdXx44ddd1112nQoEFu9+aWqiI9PV3NmzeX1WpVcnKypN/eWlPer0CcOXOmpk2bphMnTrD6AFAFUCiBP4hNTFdEVLwiY5IUn5Kp3/8PxCIpJNBX4a2DZL82JN/SQFVVTk6ONm3apPXr12vHjh2KiYnRiRMnlJaWlu9NQFarVQEBAWrUqJFat26trl27ql+/furVqxezf8tJQkKCnnnmGX3++ed526xWq1577TU98sgj5XZdwzDUpk0b9ezZU/Pnzy+36wBwHxRKoAgZ2U4dTslQjtMlb5tVzQP95OfDLd3iOnr0qNasWaPNmzdr9+7dOnz4sJKTk5WVlZXvOB8fH9WrV0/NmjVThw4d8p7VbNasmUnJK4f+/fvrhx/yr7dqs9k0ffp0/f3vfy+3627cuFF9+/bV2rVrFR5e+EsAAFQeFEoAFc7pdGrz5s1av369tm/frpiYGB07dkxnz54tMKrp7++v4OBghYWF6eqrr1bfvn3Vu3dvVa9e3cSfwDPs2LFD48ePV1xcXN7v1cvLS88884xeeumlcrvuvffeq3Xr1ikuLo6VAoAqgkIJwK2cPHkyb1Tz119/1aFDh5ScnKzz58/nO87b21uBgYF5o5rXXHONrr/+erVq1cqk5O4pKytLzz//vObOnZs3seqhhx7SvHnzyuV66enpCg4O1jPPPKPnn3++XK4BwP1QKAF4BKfTqW3btmndunX6+eefFR0dnTeqmZubm3ecxWKRv7+/GjZsqNDQUHXt2lV9+vRRv3795Otb+SdPFebHH3/U2LFjdeLECV133XXatGlTvv1l9XjHhx9+qAceeECHDx9WSIjnr3wAoHgolAA8XlJSkiIjI7Vp0ybt2rVLBw8e1KlTp5SZmZnvOG9vb9WtW1chISFq3769rrnmGg0cOFBt2rQxKXnFysjI0C233KKePXtq+vTp/5uAFp2k+NRLTECr66vwNkGa2DNEYQ2KNwGtd+/e8vf313fffVcuPwMA90ShBFBpuVwubd++XevXr9fPP/+s/fv3KyEhQWfPnpXT6cw7zmKxqGbNmmrQoIFCQ0PVpUsX9e7dWwMGDFDNmjVN/AnKR3ktkbV//361a9dOCxcu1Lhx48ojOgA3RaEEUCWlpqbmjWr+8ssvOnjwoJKSkpSRkZHvuGrVqqlOnTpq2rSp2rdvrx49emjgwIFq166dR044Ke0i/tNGdND4Qhbxf+aZZ/TBBx/o+PHj8vHxKavIADwAhRIAfsflcmnXrl2KjIzU1q1b80Y1T58+XWBU09fXN29Us3PnzurTp4/Cw8NNeW92cZTVa0afHNJaD4eH5dt24cIFNW3aVOPGjdMbb7xR6msA8CwUSgAoprS0NEVGRurHH3/UL7/8ogMHDujkyZPKzMzM92pKm82WN6rZtm1b9ejRQwMGDFDnzp1NG9X8Ymu8nl2y65L7sk/G6ezGBco+uleG84JstRuoZpcbFNB9RKHnm3VLp3yvG12+fLlGjhypHTt2qEuXLmUdH4Cbo1ACQCm5XC7t27cvb1Rzz549eaOaFy5cyHesn5+fgoKC1LJlS3Xu3Fm9e/fWwIEDVadOnSu69sGDB3XixAn17t270GMSUjM16LX1yna6Cuw7f2i7khZPl3eDVvJr21cW7+pynjkpGS7VCS/89Yw+NqtWP94/75nKUaNGKSEhQdu2bbuinwOAZ6NQAkA5OnfunH744Qdt2LBBO3fuzBvVPHfuXIFRzVq1aqlJkyZ5o5r9+/fX1VdfXeSo5tixY7V48WL96U9/0quvvio/P78Cx9zxYZQ2HUwp8MykKztTx/71J/k0bqf6o/8ui6X4o6deVot6tQzU/Pt66uTJk2rSpIlef/11Pfzww8U+B4DKg0IJACaJjo7W2rVrtWXLFu3du1fx8fFKTU1VTk5OvuNq1KiRN6rZqVMn9erVS9dff73q1auntm3bKjo6WlarVS1atNDChQvVrVu3vM/GJqZr8Os//PHSkqT0Hf9R6sq31ej+d1StXlO5crJkqeZdomK5+vF++vqz9/R///d/On78uOrWrXtlvwwAHo1CCQBuJjMzUxs2bNDGjRu1Y8cOxcXF6cSJE0pPT883qunl5VVgUXdJ+sc//qFnn31WXl5emrp8j+ZHHbnkjO5TS2fo/OGdqj96slJXvSNn6jFZqlWXX8dw1b3+AVls3kXm9LJaZO8Zoi+fGasuXbro888/L6PfAABPQ6EEAA9y6NAhrVmzRlFRUdq2bZt27NhxyeOqV6+uMWPG6FDHu3TsbM4ljzn+4cNynjkhSarZeYiqh3RSVvwupW9bId92/VR/5NOXzdPAz6otz9+oVatWafDgwVf+gwHwaBRKAPBQ3333nYYNG1Zge+3atdWiRQs1aNJM+zrcX+jnj717v5xnTqpm12EKHPpQ3vaU7+bp3M7v1OhP76la3cZFhzAMGYue0KHY/R65LieAssH/+gHAQ8XHx0v67VZ3v3799Oabb+bNLt++fbvmvPtJkZ+/eEvbr13/fNv92g+QJGUf23/5EBaLRky8jzIJVHE2swMAAK7MLbfcolq1amngwIGqX79+gf05l1gm6Pe8agbqQnK8vPxq59/uV0uS5Mo6V6wcw24aXrzAACot/koJAB6qXr16uu222y5ZJiXJ21b0V7x3w1aSJGd6Sr7tzvRUSZKXb61i5WjW9DK3xQFUehRKAKikmgf6yVLEfr+2fSVJ535dlW/7uV9XSVYv+YR0KsZVDDUPLLj2JYCqhVveAFBJ+fnYFFLXV0dSMy+537thK/l1HqyMX7/XKZdL1UM6Kit+lzL3b1TAdWNl8w+87DUa+HrJ19urrKMD8DCMUAJAJRbeJkhe1sLHKQOHPqRafSYo+3i0Ule/r5zEA6pz/QOq0/+uy57bcOUq9oev1a5dO7344os6ePBgWUYH4EFYNggAKrGi3pRTFqZdY9OapRFasmSJMjIy1KtXL9ntdo0bN06BgZcf4QRQOTBCCQCVWFgDf/UNrVfkKOUVceWqT2g93TV6qD777DMlJiYqIiJCtWrV0t/+9jcFBwdr5MiRWrRokc6fP1+21wbgdhihBIBKLiE1U4NeW6/syywjVFyGYchmMbTuqevVtK5vgf1JSUlauHChHA6HtmzZooCAAN16662y2+3q378/a1YClRCFEgCqgC+2xuvZJbvK7HzPD22p+wa0u+xxMTExioiIkMPh0MGDB9WkSRNNmDBBdrtdnToVZxY5AE9AoQSAKmJeZKzmrIop9XmapGzXxn89X6LPGIahzZs3y+FwaOHChUpJSVGnTp1kt9s1YcIENWnSpNS5AJiHQgkAVcgXW+M1ZfkeOV2Gcl3F//r3slpklaGT//6nFs16QjfccMMVZ8jJydHKlSvlcDi0fPlyZWdna8CAAbLb7RozZoxq1SreguoA3AeFEgCqmITUTE1euksb4pLlZbUUWSwv7u8bWk+WbV8ocsViHT58WF5eZbP2ZFpampYsWSKHw6G1a9fK29tbI0aMkN1u1w033CBvb+8yuQ6A8kWhBIAqKjYxXRFR8YqMSVJ8SqZ+/y8Di6SQQF+Ftw6S/doQNarppeDgYD3yyCP6xz/+US55jh07ps8//1wOh0O//PKL6tatq3Hjxslut6tXr16yWMp4pjqAMkOhBAAoI9upwykZynG65G2zqnmgn/x8/vcyNYfDoTvuuENxcXFq1apVuefZtWuXIiIiFBERoaNHj6pFixaaOHGiJk6cqLZt25b79QGUDIUSAHBZAwcOlGEYioyMrNDrulwubdiwQQ6HQ4sWLdLZs2fVvXt32e12jR8/Xg0aNKjQPAAujUIJACjSwYMH1apVK82fP192u920HFlZWfrmm2/kcDj0zTffyOVyafDgwbLb7Ro1apT8/PxMywZUdRRKAECRnn/+eb3xxhs6ceKEfH0LLmRuhtTUVC1atEgOh0MbN26Ur6+vRo8eLbvdrkGDBslms13+JADKDIUSAFCo3NxcNW/eXDfddJPeffdds+Nc0qFDh7RgwQI5HA7t379fQUFBuv3222W329WtWzcm8wAVgEIJACjUypUrdcMNNygqKkrXXHON2XGKZBiGduzYIYfDoQULFigxMVFt2rTJWzy9ZcuWZkcEKi0KJQCgULfddpv27t2rX3/91aNG+pxOp9auXSuHw6ElS5YoIyNDvXr1kt1u17hx4xQYGGh2RKBSoVACAC4pJSVFjRo10syZM/X444+bHeeKZWRkaNmyZXI4HFq1apWsVquGDRsmu92um2++WTVq1DA7IuDxKJQAgEt64403NGnSJB0/flz169c3O06ZSEpK0sKFC+VwOLRlyxYFBATo1ltvld1uV//+/WW1Ws2OCHgkCiUAoADDMNSlSxeFhobqq6++MjtOuYiJiVFERIQcDocOHjyoJk2a5E3m6dy5s9nxAI9CoQQAFLB9+3Z169ZN//73v3XTTTeZHadcGYahzZs3y+FwaOHChUpJSVGnTp3yJvM0adLE7IiA26NQAgAKePjhh7V06VIdOXKkSq3pmJOTo1WrVsnhcGjZsmXKzs7WgAEDZLfbNWbMGNWqVcvsiIBbolACAPI5f/68GjVqpL/85S+aMWOG2XFMk5aWpiVLlsjhcGjt2rXy9vbWiBEjZLfbdcMNN8jb29vsiIDboFACAPL5/PPPNWHCBMXExCgsLMzsOG7h2LFj+vzzz+VwOPTLL7+obt26GjdunOx2u3r16uVRSyoB5YFCCQDIZ/DgwcrOztYPP/xgdhS3tGvXLkVERCgiIkJHjx5VixYtNHHiRE2cOFFt27Y1Ox5gCgolACDP4cOH1bJlS3388ce66667zI7j1lwulzZs2CCHw6FFixbp7Nmz6t69u+x2u8aPH68GDRqYHRGoMBRKAECeadOmac6cOTp58qT8/PzMjuMxsrKy9M0338jhcOibb75Rbm6uBg8eLLvdrlGjRqlmzZpmRwTKFYUSACDptxG3li1bavDgwXr//ffNjuOxUlNTtWjRIjkcDm3cuFG+vr4aPXq07Ha7Bg0aVKVmzaPqoFACACRJq1ev1uDBg7Vp0yZdd911ZsepFA4fPqwFCxZo/vz52r9/v4KCgvIWT+/WrRuTeVBpUCgBAJKkCRMmaOfOndqzZw9Fp4wZhqEdO3bI4XBowYIFSkxMVJs2bfIWT2/ZsqXZEYFSoVACAHT69GkFBwfrxRdf1JNPPml2nErN6XRq7dq1cjgcWrJkiTIyMtSrVy/Z7XaNGzdOgYGBZkcESoxCCQDQ22+/rUceeUTHjh1jdnIFysjI0LJly+RwOLRq1SpZLBbdeOONstvtuvnmm1WjRg2zIwLFQqEEAKhbt25q2rSpvv76a7OjVFlJSUlauHChHA6HtmzZooCAAN16662aOHGi+vfvLy8vL7MjAoWiUAJAFbdz50517dpVy5Yt04gRI8yOA0kxMTGKiIiQw+HQwYMH1bhxY02YMEF2u12dO3c2Ox5QAIUSAKq4Rx99VF9++aUSEhJY0sbNGIahzZs3KyIiQl988YVSUlLUqVOnvMk8TZo0MTsiIIlCCQBVWnZ2tho1aqT7779fs2bNMjsOipCTk6NVq1bJ4XBo2bJlys7O1oABA2S32zVmzBjVqlXL7IiowiiUAFCFLVq0SOPGjdO+fft4D7UHSUtL05IlS+RwOLR27Vp5e3tr+PDhstvtGjZsmLy9vc2OiCqGQgkAVdgNN9yg9PR0/fjjj2ZHwRU6duyYPv/8c0VERGjnzp2qW7euxo0bJ7vdrl69erGmKCoEhRIAqqiEhAQ1a9ZMH3zwge69916z46AM7N69WxEREYqIiFBCQoJatGihiRMnauLEiYxAo1xRKAGginrxxRc1c+ZMnThxQv7+/mbHQRlyuVzasGGDHA6HFi1apLNnz6pbt26y2+0aP368GjZsaHZEVDIUSgCoglwul8LCwtS/f3999NFHZsdBOcrKytI333wjh8Ohb775Rrm5uRo8eLDsdrtGjRqlmjVrmh0RlQCFEgCqoMjISA0cOFAbNmxQnz59zI6DCpKamqrFixfL4XBow4YN8vX11ejRo2W32zVo0CCWjcIVo1ACQBV0xx13aMuWLdq/fz+TNqqow4cPa8GCBZo/f77279+voKAgjR8/Xna7Xd27d+e/FygRCiUAVDFnz55Vw4YNNXXqVD3zzDNmx4HJDMPQjh075HA49Pnnn+vkyZNq3bq17Ha7Jk6cqJYtW5odER6AQgkAVcx7772nhx56SAkJCQoODjY7DtxIbm6u1q5dK4fDoa+++koZGRnq1auX7Ha7xo0bp8DAQLMjwk1RKAGgiunZs6eCgoK0YsUKs6PAjWVkZGj58uVyOBxauXKlLBaLhg0bJrvdruHDh6tGjRpmR4QboVACQBWye/duderUSUuWLNHo0aPNjgMPkZSUpIULF8rhcGjLli3y9/fXrbfeKrvdrv79+8vLy8vsiDAZhRIAqpAnnnhCEREROnr0qKpVq2Z2HHig2NhYRUREyOFw6MCBA2rcuLEmTJggu92uzp07mx0PJqFQAkAVkZOTo8aNG+uuu+7SnDlzzI4DD2cYhqKiouRwOPTFF18oJSVFHTt2lN1u14QJE9S0aVOzI6ICUSgBoBK7cOGCsrOzVbNmTS1ZskRjxozR7t271aFDB7OjoRK5cOGCVq5cKYfDoWXLlik7O1v9+/eX3W7XmDFjVLt2bbMjopxRKAGgEnv88cf11ltvafTo0Tp8+LAkKSoqytxQqNTS0tK0dOlSORwOrVmzRt7e3ho+fLgmTpyoYcOGycfHx+yIKAdWswMAAMqPxWKR0+nUV199pS1btig6OlpTpkxRQkKC2dFQSQUEBOiuu+7S999/r4SEBL300kuKi4vT6NGjFRwcrAcffFAbN26Uy+UyOyrKECOUAFCJzZ49W3//+9+Vm5ubb3vXrl21fft2k1KhKtq9e7ciIiK0YMECxcfHq3nz5po4caImTpyodu3amR0PpcQIJQBUYsHBwfnKpJeXl3x9fZmUgwrXsWNHvfzyyzp06JDWr1+vwYMHa968eWrfvr26d++u1157TSdOnDA7Jq4QI5QAUImtWbNGgwYNkiRZrVbVqlVLq1ev1tVXX21yMkDKysrSf/7zHzkcDn3zzTdyOp0aNGiQ7Ha7Ro8erZo1a5odEcVEoQSASmzv3r15M7qbNGmitWvXKiwszORUQEGpqalavHixIiIi9MMPP8jX11ejRo2S3W7X4MGDZbPZzI6IIlAoAaCSyMh26nBKhnKcLnnbrGoe6KcL58+pTp06ql+/vnbu3KlGjRqZHRO4rCNHjmjBggWaP3++9u3bp6CgII0fP14TJ05Ujx49ZLFYzI6IP6BQAoAHi01MV0RUvCKjkxSfmqnff6FbJIXU9VWd88f17Ng+urZdM7NiAlfEMAzt3LlTDodDCxYs0MmTJxUWFia73a6JEyeqVatWZkfEf1EoAcADJaRmavLSXdoQlywvq0W5rsK/yi/u7xtaTzNGd1LTur4VmBQoG7m5uYqMjJTD4dBXX32lc+fO6brrrpPdbte4ceNUr169Cs1zqTsCfj5V97Y8hRIAPMwXW+M1ZfkeOV1GkUXyj7ysFtmsFk0b0UHje4SUY0KgfGVmZmr58uVyOBz67rvvZLFYNGzYMNntdg0fPlw1atQol+sW545AeJsgTewZorAG/uWSwV1RKAHAg8yLjNWcVTGlPs+TQ1rr4XAm58DzJSUl6csvv5TD4VBUVJT8/f01ZswY2e12DRgwQF5eXqW+BncELo9CCQAe4out8Xp2ya4yO9+sWzrpNkYqUYnExsYqIiJCERERiouLU6NGjTRhwgTZ7XZ17tw532SeqKgo3XHHHVqwYIG6d+9e6Dm5I1A8FEoA8AAJqZka9Np6ZTsLvq4u59QRnd24QDkn45SbcUaWaj6qFthUAT1vkW9Yz0LP6WOzavXj/avMCAqqDsMwtGXLFjkcDn3xxRdKTk5Wx44dZbfbNWHCBDVt2lR33nmn5s+fr1q1amnjxo3q2LFjgfNwR6D4KJQA4AHu+DBKmw6mXHKE5PyBrUr7eYV8GreVV826Mi5kKzN6k7KP7lHdGx6Wf5cbLnlOL6tFvVoGav59hZdOwNNduHBBq1atksPh0Ndff62srCz16dNHUVFRunDhgry8vFS7dm1t2rRJrVu3zvtcYXcEso78qsTPJ1/yWg3vmCOfxm0vua+y3xGgUAKAm4tNTNfg138o0WcMV65OfPKYDOcFNf7Tu0Ueu/rxfgoNqloTCFA1paWlaenSpZozZ452796dt91qtap+/fravHmzmjdvXuQdgYuF0r/bcHkHt863r0bLq+XlW+uS167sdwR4lzcAuLmIqHh5WUu2kLPF6iWbfz25ss8VeZyX1SLH5vjSxAM8RkBAgO666y41adJEVuv/KpDL5VJiYqI6deqkw4cPa/LSXXJe5nlJn6YdVLNjeL5/CiuTkuR0GZq8tOyegXY3FEoAcHOR0UnFmgzgyslSbuZZXTh9Qmlbvtb5g9tUvdlVRX4m12UoMiaprKICbu/MmTNatWqVDMMo8Mad8+fP698//KwNccnF+99cdqYMV26xrpvrMrQhLllxSelXlNvdVd0VOAHAA5zLdio+NbNYx55e+4HO7fzutz9YrPJtfZ3qDvnLZT8Xn5KpjGxnlV6UGVXH0aNH1aJFC3Xt2lUDBw5Us2bN1KxZM4WEhMjf319Tl++Rl/XIZQtlyn/+KSPnvGSxyqdpB9UJv1c+wUVPvLl4R2DqiA5l+SO5Bb49AMCNHUnJUHEfdA/oMVK+bfsoNz1Fmfs3yjBcUu6Fy37OkPTvdVFqWddbFotFVqtVVqs17z9falth/7m0x/KOZpS3nTt36sCBAzpw4IAOHjyoGTNmqH379nn/3bvsHQGvavJt00s1WnaX1beWLiTHK23LUiVGPKOG9tnyblj46yAv3hGYqspXKJmUAwBubEf8aY1+Z9MVfTbxi+flyj6nhne+etmiduLTJ5RzovTLo5SFKymqFVl6r/TYsijblekzZuX4/PPPdd9990mSvLy8lJubq549e+qll17SNb37qfO0VcX+S9xFF04f14kP/yafph3U4LbpRf/3W9LuqUMr3R2ByvXTAEAl42278kfdfdv2Vup38+RMPaZqgU2KPPbLLxaoeS2bXC6XDMOQy+XK958vtc3s/e6YyeVyKTc3t8Sfv9R2M/JXNbm5vz3/GBUVpUGDBunWBx6TETioxOepVqeRaoT1VGbMJhmuXFmshb+dx5B0OCVDHRoVPoHHE1EoAcCNNQ/0k0Uq8YiJJBkXsiVJruyMIo+zSBrU86pKN2KCkjMMo8wKq9nluLD969ev1wcffJD3M1utVrlcLnXp0kW33DpOWyNTr+h3ZwuoJ+U6ZVzIlsWn6KWBci6xHJGn49sDANyYn49NIXV9daSIiTm5GWfk5Vc73zYj16mM3WtlsfmoWr2iF1MOCfSlTELSb48bXLw9XFnl5OTogw8+kM1mU25ursaMGaMXXnhBHTt21J7jZ6XIjVd0XueZk7LYvGXxrn7ZY0tz58Fd8Q0CAG4uvE2Q5kcVPus05bt5MnIy5dO0o7z8A5V77rQy9q6TM+Wo6gy8T1bvGoWe28tqUXjroPKKDridwMBAWa1WjR07Vs8//7zatWuXt684dwRyM88WWG8yJ/GgMmO3qEbLbrJYii6Llv9ep7KhUAKAm5vYM0Sf/HS40P1+7frq3K/fK33Hf+Q6ny6rdw15NwxVnQH3FPkub+m3Waf2ayvv6+CAPxoxYoTOnj2rmjVrFthXnDsCp76eJWs1b/k0bvffWd4JOvfLd7JU81GdAXdf9vqV9Y5A5fuJAKCSCWvgr76h9Qp9l7df+/7ya9+/xOe9+C5vXruIqsRisVyyTF50uTsCvq2vVcaedUrb8rVcOZny8q0l39a9VKvP7apWp1GR167MdwRYNggAPEBR7xa+UpX93cLAlYhNTNfg138ot/OvfrxfpfxLXOV7KhQAKqGmdX01rYzfrjF9RAfKJPAHF+8IeFnLdpF9L6tFfUPrVcoyKVEoAcBjjO8RoieHtC6Tcz01pI1u68Gzk8ClzBjdSbYyLpQ2q0UzRncq03O6EwolAHiQh8PDNPOWTvKxWUs8guJltcjHZtWsWzrpofDQckoIeD7uCJQcz1ACgAdKSM3U5KW7tCEuWV5WS5HvHr64v29oPc0Y3alS/0sNKEvzImM1Z1XpX0n61JA2lf4vcRRKAPBgsYnpioiKV2RMkuJTMvOtn2fRb0uUhLcOkv3akEr77BZQnr7YGq8py/fI6TKK/IvbH3lZLbJZLZo+okOVeLyEQgkAHi47O1tt2rTRvX/6i8bc/aBynC5526xqHuhXKde7AyoadwQuj0IJAB7u0Ucf1RtvvKG2bdtq3759ZscBKi3uCBSOQgkAHmzt2rW6/vrrJUlWq1VJSUkKDAw0ORVQ+WVkO3U4JYM7Av9FoQQAD3X69Gm1b99eiYmJuvhV/uGHH+ree+81ORmAqoZlgwDAQ/3lL3/RqVOn8sqk1WrVl19+aXIqAFURI5QA4IG+++47DRs2rMB2m82mU6dOqXbt2hUfCkCVxQglAHigli1bavz48erYsaMslv8tcO50OrVhwwYTkwGoihihBAAPFxgYqPvuu0/Dhg3T0aNHNWLECNWqVcvsWACqkKo7HQkAKoGUlBSlpqaqe/fuCg8PNzsOgCqKW94A4MFiYn57LVybNm1MTgKgKqNQAoAHu1goQ0Mr93uCAbg3CiUAeLCYmBg1adJEfn5+ZkcBUIVRKAHAg0VHR3O7G4DpKJQA4MFiYmLUunVrs2MAqOIolADgoVwul2JjYymUAExHoQQAD5WQkKCsrCxueQMwHYUSADzUxRnejFACMBuFEgA8VExMjKpVq6ZmzZqZHQVAFUehBAAPFR0drdDQUNlsvPQMgLkolADgoZjhDcBdUCgBwENRKAG4CwolAHig7OxsHT58mEIJwC1QKAHAA8XFxckwDJYMAuAWKJQA4IFYMgiAO6FQAoAHiomJUUBAgIKCgsyOAgAUSgDwRNHR0WrTpo0sFovZUQCAQgkAnogZ3gDcCYUSADwQhRKAO6FQAoCHOX36tE6dOsUMbwBug0IJAB6GGd4A3A2FEgA8zMVCGRYWZnISAPgNhRIAPExMTIwaN26smjVrmh0FACRRKAHA40RHR3O7G4BboVACgIdhhjcAd0OhBAAP4nK5FBsbywxvAG6FQgkAHuTYsWPKzMxkhBKAW6FQAoAHYckgAO6IQgkAHiQmJkY2m00tWrQwOwoA5KFQAoAHiY6OVqtWrWSz2cyOAgB5KJQA4EGY4Q3AHVEoAcCDUCgBuCMKJQB4iJycHB06dIglgwC4HQolAHiIAwcOyOVyMUIJwO1QKAHAQ7BkEAB3RaEEAA8RExMjf39/NWzY0OwoAJAPhRIAPER0dLRat24ti8VidhQAyIdCCQAeghneANwVhRIAPERMTAwzvAG4JQolAHiAs2fPKjExkRFKAG6JQgkAHoAZ3gDcGYUSADwAhRKAO6NQAoAHiImJUXBwsPz9/c2OAgAFUCgBwANcXDIIANwRhRIAPAAzvAG4MwolALg5wzBYgxKAW6NQAoCbO378uDIyMiiUANwWhRIA3BwzvAG4OwolALi5mJgYeXl5qWXLlmZHAYBLolACgJuLjo5Wy5YtVa1aNbOjAMAlUSgBwM0xIQeAu6NQAoCbY8kgAO6OQgkAbuzChQs6ePAgI5QA3BqFEgDc2MGDB5Wbm0uhBODWKJQA4MYuLhnELW8A7oxCCQBuLCYmRn5+fgoODjY7CgAUikIJAG4sOjparVu3lsViMTsKABSKQgkAbowZ3gA8AYUSANwYa1AC8AQUSgBwU2lpaTpx4gSFEoDbo1ACgJuKjY2VJAolALdHoQQAN3VxySAKJQB3R6EEADcVExOjBg0aqFatWmZHAYAiUSgBwE1dXDIIANwdhRIA3BRLBgHwFBRKAHBDhmGwZBAAj0GhBAA3dPLkSaWnp1MoAXgECiUAuKGLM7y55Q3AE1AoAcANxcTEyGq1qmXLlmZHAYDLolACgBuKjo5WixYt5O3tbXYUALgsCiUAuCFmeAPwJBRKAHBDzPAG4EkolADgZi5cuKADBw5QKAF4DAolALiZw4cPy+l0UigBeAwKJQC4GZYMAuBpKJQA4GZiYmLk6+urRo0amR0FAIqFQgkAbiY6OlphYWGyWvmKBuAZ+LYCADfDkkEAPA2FEgDcDEsGAfA0FEoAcCPnzp3TsWPHKJQAPAqFEgDcSGxsrCRRKAF4FAolALiRi0sGUSgBeBIKJQC4kejoaNWvX1916tQxOwoAFBuFEgDcCBNyAHgiCiUAuBGWDALgiSiUAOAmDMNQdHQ0I5QAPA6FEgDcRFJSktLS0iiUADwOhRIA3MTFGd7c8gbgaSiUAOAmYmJiZLFY1KpVK7OjAECJUCgBwE1ER0erefPm8vHxMTsKAJQIhRIA3ARLBgHwVBRKAHATLBkEwFNRKAHADTidTsXFxTFCCcAjUSgBwA0cOXJEFy5coFAC8EgUSgBwAywZBMCTUSgBwA1ER0erevXqatKkidlRAKDEKJQA4AZiYmIUFhYmq5WvZQCeh28uAHADzPAG4MkolADgBqKjo5mQA8BjUSgBwGQZGRk6evQohRKAx6JQAoDJ4uLiJIlCCcBjUSgBwGTR0dGSWDIIgOeiUAKAyWJiYhQYGKi6deuaHQUArgiFEgBMFhMTw+1uAB6NQgkAJmPJIACejkIJACYyDIMlgwB4PAolAJgoOTlZZ86coVAC8GgUSgAwUUxMjCSWDALg2SiUAGCi6OhoWSwWhYaGmh0FAK4YhRIATBQTE6OQkBDVqFHD7CgAcMUolABgIpYMAlAZUCgBwETR0dEsGQTA41EoAcAkubm5iouLY4QSgMejUAKASeLj45WTk0OhBODxKJQAYJLo6GhJ4pY3AI9HoQQAk8TExMjHx0dNmzY1OwoAlAqFEgBMEhMTo9DQUHl5eZkdBQBKhUIJACZhySAAlQWFEgBMwpJBACoLCiUAmOD8+fOKj49nhBJApUChBAATxMXFSRKFEkClQKEEABOwZBCAyoRCCQAmiImJUZ06dRQYGGh2FAAoNQolAJjg4gxvi8VidhQAKDUKJQCYIDo6mucnAVQaFEoAMEFMTAzPTwKoNCiUAFDBUlJSlJqaygglgEqDQgkAFeziDG8KJYDKgkIJABUsJiZGkhQWFmZyEgAoGzazAwBAVZCR7dThlAzlOF2Kij6mpi1C5evra3YsACgTFsMwDLNDAEBlFJuYroioeEVGJyk+NVP5vmwNQ80C/RTeJkgTe4YorIG/WTEBoNQolABQxhJSMzV56S5tiEuWl9WiXFfhX7MX9/cNracZozupaV1GLQF4HgolAJShL7bGa8ryPXK6jCKL5B95WS2yWS2aNqKDxvcIKceEAFD2KJQAUEbmRcZqzqqYUp/nySGt9XA4E3YAeA4m5QBAGfhia3yhZdJwXtCZDQ5l7ImUK+ucqtVvrtr97lCNFl0vefycVTGqX9NHtzFSCcBDsGwQAJRSQmqmpizfU+j+5G9eU9rWr+XXfoDqDPqTLFarkhZNVVZC4Z95YfkeJaRmlkdcAChzFEoAKKXJS3fJWcjzktnHo5W57wfV7n+X6gy8V/5dblCD22fIFhCkM+s+LvScTpehyUt3lVdkAChTFEoAKIXYxHRtiEsudAJOZvSPksUq/y435G2z2LxV86rByj62X860U5f8XK7L0Ia4ZMUlpZdLbgAoSxRKACiFiKh4eVkthe7PSTyoanUby+qTfzkg7+DWefsL42W1yLE5vmyCAkA5olACQClERicVuTxQ7rlUedWsU2C7V826efsL/azLUGRMUulDAkA5o1ACwBU6l+1U/GUmzhjOHMmrWoHtFpv3//YXIT4lUxnZzisPCQAVgEIJAFfoSEqGLreQr8XmLeVeKLD9YpG8WCwLY0g6nJJxhQkBoGJQKAHgCuU4XZc9xqtmXeWeO11g+8Vb3RdvfZf2OgBgJgolAFwhb9vlv0K9g1rqQuoxubLz3xrPOf7bIujeDVqWyXUAwEx8SwHAFWoe6KfC53f/xrdtb8lwKX3nd3nbDOcFndv1vbwbtZEtoH6Rn7f89zoA4M549SIAXCE/H5tC6vrqSBETc3watZFv2z46s/5TuTLPyFankTJ2rZHzbJIaDHv0stcICfSVnw9f1QDcGyOUAFAK4W2CilyHUpLq3fyEArqPVMbuSKV+/54Ml1NBt76g6iEdi/ycl9Wi8NZBZRkXAMqFxTCMy01SBAAUIjYxXYNf/6Hczr/68X4KDfIvt/MDQFlghBIASiGsgb/6hta77ChlSXlZLeobWo8yCcAjUCgBoJRmjO4kWxkXSpvVohmjO5XpOQGgvFAoAaCUmtb11bQRHcr0nNNHdFDTur6XPxAA3ACFEgDKwPgeIXpySOsyOddTQ9roth4hZXIuAKgITMoBgDL0xdZ4TVm+R06XoVxX8b9evawW2awWTR/RgTIJwONQKAGgjCWkZmry0l3aEJcsL6ulyGJ5cX/f0HqaMboTt7kBeCQKJQCUk9jEdEVExSsyJknxKZn6/ZetRb8tWh7eOkj2a0OYzQ3Ao1EoAaACZGQ7dTglQzlOl7xtVjUP9OMNOAAqDQolAAAASoVZ3gAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACgVCiUAAABKhUIJAACAUqFQAgAAoFQolAAAACiV/wdYMUawDlSMlwAAAABJRU5ErkJggg==", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAApQAAAIfCAYAAADOuEwnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/P9b71AAAACXBIWXMAAA9hAAAPYQGoP6dpAABlLklEQVR4nO3deVyVdf7//+c5hx0FBcUdcQNNLU0dTXPB0lIThcp9O9g0/VqmLJupZiatvp/Kcsoas1VwSbNSXCs1C1NbzDRNyQVNxRUFVAQEPJzr94cjI7IocuAc4HG/3bgV1/o6TAPP631dr/dlMgzDEAAAAHCDzM4uAAAAAJUbgRIAAABlQqAEAABAmRAoAQAAUCYESgAAAJQJgRIAAABlQqAEAABAmRAoAQAAUCYESgAAAJQJgRIAKpDJZNLUqVOdXQYAOBSBEoBDzZkzRyaTKf/Lzc1NjRo10oQJE3Ts2LFyPffvv/+uqVOn6tChQ9e1/dSpU2UymVSvXj1lZWUVWh8SEqJ77rnHwVVWjCv/N7jy69VXX3V2aQCqIDdnFwCganrxxRfVrFkzZWdn66efftKcOXO0adMm7dq1S15eXuVyzt9//10vvPCC+vTpo5CQkOve79SpU3r33Xf11FNPlUtdztKvXz+NGzeuwLKOHTs6qRoAVRmBEkC5GDBggDp37ixJeuCBB1SnTh1NmzZNK1as0LBhw5xcXUEdOnTQ66+/rocfflje3t7OLsdhQkNDNWbMGGeXAaAa4JY3gArRs2dPSdKBAwcKLP/222/Vs2dP+fr6qlatWhoyZIh2795daP9ff/1VAwYMkJ+fn2rUqKE77rhDP/30U/76OXPm6P7775ckhYeH59/iXb9+/TVre/7555WcnKx33333mttmZmbqqaeeUpMmTeTp6amwsDBNnz5dhmEU2C4nJ0eTJk1S3bp1VbNmTUVEROjo0aNFHvPYsWOKjo5WvXr15OnpqbZt2yomJqbQdklJSdqzZ881a7zShQsXlJ2dXap9AKC0CJQAKsTl5xpr166dv2zdunW66667dOrUKU2dOlVPPvmkfvjhB/Xo0aPAc5AJCQnq2bOnduzYob/97W/617/+pYMHD6pPnz7avHmzJKlXr17661//Kkl67rnnNH/+fM2fP19t2rS5Zm09e/ZU37599dprr+nChQvFbmcYhiIiIvTmm2/q7rvv1htvvKGwsDA9/fTTevLJJwts+8ADD2jGjBnq37+/Xn31Vbm7u2vQoEGFjpmcnKxu3bpp3bp1evTRR/XWW2+pZcuWmjhxombMmFFg23Hjxl3X57lszpw58vX1lbe3t2666SYtXLjwuvcFgFIxAMCBYmNjDUnGunXrjNOnTxtHjhwxFi9ebNStW9fw9PQ0jhw5kr9thw4djKCgICM1NTV/2Y4dOwyz2WyMGzcuf9nQoUMNDw8P48CBA/nLjh8/btSsWdPo1atX/rLPP//ckGTEx8dfV61TpkwxJBmnT582vvvuO0OS8cYbb+Svb9q0qTFo0KD875ctW2ZIMv7f//t/BY5z3333GSaTydi/f79hGIaxfft2Q5Lx8MMPF9hu1KhRhiRjypQp+csmTpxoNGjQwEhJSSmw7YgRIwx/f38jKysrf1nv3r2N6/213b17d2PGjBnG8uXLjXfffddo166dIcmYNWvWde0PAKXBCCWAcnHnnXeqbt26atKkie677z75+vpqxYoVaty4sSTpxIkT2r59uyZMmKCAgID8/W6++Wb169dPX375pSQpLy9Pa9eu1dChQ9W8efP87Ro0aKBRo0Zp06ZNSk9PL3O9vXr1Unh4eImjlF9++aUsFkv+SOhlTz31lAzD0FdffZW/naRC2z3xxBMFvjcMQ0uWLNHgwYNlGIZSUlLyv+666y6dO3dO27Zty99+/fr1hW6tF+f777/X448/roiICD300EPaunWr2rVrp+eee67EUVgAuBEESgDl4p133tHXX3+txYsXa+DAgUpJSZGnp2f++sOHD0uSwsLCCu3bpk0bpaSkKDMzU6dPn1ZWVlax29ntdh05csQhNU+dOlUnT57Ue++9V+T6w4cPq2HDhqpZs2ahOi6vv/xPs9msFi1aFNju6s9w+vRpnT17Vh988IHq1q1b4MtqtUq61IHuCB4eHnr00Ud19uxZbd261SHHBIDL6PIGUC7+9Kc/5Xd5Dx06VLfffrtGjRqlvXv3qkaNGk6urmi9evVSnz599Nprr+mhhx4q9/PZ7XZJ0pgxYzR+/Pgit7n55psddr4mTZpIktLS0hx2TACQCJQAKoDFYtErr7yi8PBwzZw5U88884yaNm0qSdq7d2+h7ffs2aM6derI19dXXl5e8vHxKXY7s9mcH5RMJlOZa506dar69Omj999/v9C6pk2bat26dTp//nyBUcrLndeXP1PTpk1lt9t14MCBAqOSV3+Gyx3geXl5uvPOO8tc+7X88ccf+ecFAEfiljeACtGnTx/96U9/0owZM5Sdna0GDRqoQ4cOmjt3rs6ePZu/3a5du7R27VoNHDhQ0qUw2r9/fy1fvrxA53dycrIWLlyo22+/XX5+fpIkX19fSSpwvNLq3bu3+vTpo2nTphWabmfgwIHKy8vTzJkzCyx/8803ZTKZNGDAAEnK/+fbb79dYLuru7YtFovuvfdeLVmyRLt27SpUy+nTpwt8f73TBl29nySdP39eM2bMUJ06ddSpU6drHgMASoMRSgAV5umnn9b999+vOXPm6KGHHtLrr7+uAQMG6LbbbtPEiRN14cIF/ec//5G/v3+B913/v//3//T111/r9ttv18MPPyw3Nze9//77ysnJ0WuvvZa/XYcOHWSxWDRt2jSdO3dOnp6e6tu3r4KCgkpV55QpUxQeHl5o+eDBgxUeHq5//OMfOnTokG655RatXbtWy5cv1xNPPJH/zGSHDh00cuRIzZo1S+fOnVP37t31zTffaP/+/YWO+eqrryo+Pl5du3bVn//8Z910001KS0vTtm3btG7dugK3p8eNG6fvvvvumo0577zzjpYtW6bBgwcrODhYJ06cUExMjJKSkjR//nx5eHiU6ucBANfk1B5zAFXO5WmDtmzZUmhdXl6e0aJFC6NFixaGzWYzDMMw1q1bZ/To0cPw9vY2/Pz8jMGDBxu///57oX23bdtm3HXXXUaNGjUMHx8fIzw83Pjhhx8Kbffhhx8azZs3NywWyzWnELpy2qCrXZ6i58ppgwzDMM6fP29MmjTJaNiwoeHu7m60atXKeP311w273V5guwsXLhh//etfjcDAQMPX19cYPHiwceTIkULTBhmGYSQnJxuPPPKI0aRJE8Pd3d2oX7++cccddxgffPBBkTVdy9q1a41+/foZ9evXN9zd3Y1atWoZ/fv3N7755ptr7gsAN8JkGNc5BwUAAABQBJ6hBAAAQJkQKAEAAFAmBEoAAACUCYESAAAAZUKgBAAAQJkQKAEAAFAmBEoAlU5ISIgmTJhwXdseOXJEXl5e+v7778u3qAqyfv16mUwmrV+/vtT7vvfeewoODlZOTo7jCwNQrREoAZSbN954QyaTSevWrSt2mw8//FAmk0krVqwolxpefPFFde3aVT169CiwfOXKlerdu7eCgoLk4+Oj5s2ba9iwYVq9enW51OEKJkyYoNzc3CLfUw4AZUGgBFBuRowYIbPZrIULFxa7zcKFCxUYGJj//mtHOn36tObOnauHHnqowPLp06crIiJCJpNJzz77rN58803de++9SkxM1KJFixxeh6vw8vLS+PHj9cYbb1zz9Y0AUBq8yxtAuWnYsKHCw8MVFxend999V56engXWHzt2TBs2bNCDDz4od3d3h5//448/lpubmwYPHpy/zGaz6aWXXlK/fv20du3aQvucOnXK4XW4kmHDhum1115TfHy8+vbt6+xyAFQRjFACKFdjxozRuXPn9MUXXxRat2jRItntdo0ePVrSpZHD7t27KzAwUN7e3urUqZMWL158w+detmyZunbtqho1auQvS0lJUXp6eqFb4JcFBQXl/3tubq6ef/55derUSf7+/vL19VXPnj0VHx9fYJ9Dhw7JZDJp+vTpeuedd9S8eXP5+Piof//+OnLkiAzD0EsvvaTGjRvL29tbQ4YMUVpaWoFjhISE6J577tHatWvVoUMHeXl56aabblJcXNx1fdbNmzfr7rvvlr+/v3x8fNS7d+8inxvt1KmTAgICtHz58us6LgBcDwIlgHIVFRUlLy+vIm97L1y4UE2bNs0Pd2+99ZY6duyoF198US+//LLc3Nx0//33FxlGr+XixYvasmWLbr311gLLg4KC5O3trZUrVxYKdVdLT0/XRx99pD59+mjatGmaOnWqTp8+rbvuukvbt28vtP2CBQs0a9YsPfbYY3rqqaf03XffadiwYfrnP/+p1atX6+9//7sefPBBrVy5UpMnTy60f2JiooYPH64BAwbolVdeyf/8X3/9dYl1fvvtt+rVq5fS09M1ZcoUvfzyyzp79qz69u2rn3/+udD2t956a5VpUgLgIgwAKGf333+/4eXlZZw7dy5/2Z49ewxJxrPPPpu/LCsrq8B+ubm5Rrt27Yy+ffsWWN60aVNj/PjxJZ5z//79hiTjP//5T6F1zz//vCHJ8PX1NQYMGGD83//9n7F169ZC29lsNiMnJ6fAsjNnzhj16tUzoqOj85cdPHjQkGTUrVvXOHv2bP7yZ5991pBk3HLLLcbFixfzl48cOdLw8PAwsrOzC3wmScaSJUvyl507d85o0KCB0bFjx/xl8fHxhiQjPj7eMAzDsNvtRqtWrYy77rrLsNvt+dtlZWUZzZo1M/r161focz344IOGt7d3kT83ALgRjFACKHdjxoxRdnZ2gdu3l0csL9/uliRvb+/8fz9z5ozOnTunnj17atu2baU+Z2pqqiSpdu3ahda98MILWrhwoTp27Kg1a9boH//4hzp16qRbb71Vu3fvzt/OYrHIw8NDkmS325WWliabzabOnTsXWdP9998vf3///O+7du2a//nd3NwKLM/NzdWxY8cK7N+wYUNFRkbmf+/n56dx48bp119/1cmTJ4v8nNu3b1diYqJGjRql1NRUpaSkKCUlRZmZmbrjjju0YcMG2e32AvvUrl1bFy5cUFZWVtE/PAAoJQIlgHI3YMAABQQEFLjt/cknn+iWW25R27Zt85etWrVK3bp1k5eXlwICAlS3bl29++67Onfu3A2f2yimm3nkyJHauHGjzpw5o7Vr12rUqFH69ddfNXjwYGVnZ+dvN3fuXN18883y8vJSYGCg6tatqy+++KLImoKDgwt8fzlcNmnSpMjlZ86cKbC8ZcuWMplMBZaFhoZKuvScZlESExMlSePHj1fdunULfH300UfKyckpVOvln8nV5wKAG0WXN4By5+7urmHDhunDDz9UcnKykpKSlJiYqNdeey1/m40bNyoiIkK9evXSrFmz1KBBA7m7uys2NrbEaYeKExgYKKlwaLuan5+f+vXrp379+snd3V1z587V5s2b1bt3b3388ceaMGGChg4dqqefflpBQUGyWCx65ZVXdODAgULHslgsRZ6juOXFhd3SuDz6+Prrr6tDhw5FbnNlU5J06Wfi4+NTYEQYAMqCQAmgQowePVrvvfeePv30Ux08eFAmk0kjR47MX79kyRJ5eXlpzZo1BaYXio2NvaHzBQcHy9vbWwcPHrzufTp37qy5c+fqxIkTkqTFixerefPmiouLKzCaN2XKlBuq6Vr2798vwzAKnGvfvn2SLnWBF6VFixaSLgXjO++887rOc/DgQbVp06ZsxQLAFbjlDaBC9OjRQyEhIfr444/16aefqnfv3mrcuHH+eovFIpPJpLy8vPxlhw4d0rJly27ofO7u7urcubN++eWXAsuzsrL0448/FrnPV199JUkKCwvLr0kqOJK4efPmYvcvq+PHj2vp0qX536enp2vevHnq0KGD6tevX+Q+nTp1UosWLTR9+nRlZGQUWn/69OlCy7Zt26bu3bs7rnAA1R4jlAAqhMlk0qhRo/Tyyy9LuvRKxCsNGjRIb7zxhu6++26NGjVKp06d0jvvvKOWLVvqt99+u6FzDhkyRP/4xz+Unp4uPz8/SZcCZffu3dWtWzfdfffdatKkic6ePatly5Zp48aNGjp0qDp27ChJuueeexQXF6fIyEgNGjRIBw8e1HvvvaebbrqpyPBWVqGhoZo4caK2bNmievXqKSYmRsnJySWO0prNZn300UcaMGCA2rZtK6vVqkaNGunYsWOKj4+Xn5+fVq5cmb/91q1blZaWpiFDhji8fgDVFyOUACrM5Y5uT09P3XfffQXW9e3bV7Nnz9bJkyf1xBNP6JNPPtG0adMKdD2X1tixY5WXl1fgPeG1atXShx9+qPr16ys2NlYPP/yw/vWvfykjI0Ovv/66Pv300/xtJ0yYoJdfflk7duzQX//6V61Zs0Yff/yxOnfufMM1laRVq1b69NNP9eWXX+qZZ57RxYsX9emnn+quu+4qcb8+ffroxx9/VOfOnTVz5kw99thjmjNnjurXr69JkyYV2Pbzzz9XcHAwb8kB4FAmwxFPhQOAi5o4caL27dunjRs3OruUEoWEhKhdu3ZatWpVuZ0jJydHISEheuaZZ/T444+X23kAVD+MUAKo0qZMmaItW7bwZhhdanByd3fXQw895OxSAFQxjFACgAuoiBFKACgvjFACAACgTBihBAAAQJkwQgkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMqEQAkAAIAyIVACAACgTAiUAAAAKBMCJQAAAMrEzdkFAAAAVDaZOTYdSs1Urs0uDzezQgJ95etZfWNV9f3kAAAApZCYfF4LNicpfu8pJaVlybhinUlScICPwsOCNLprsFrVq+msMp3CZBiGce3NAAAAqqcjaVl6bulObdyfIovZpDx78dHp8vqeLevo5cj2ahLgU4GVOg+BEgAAoBiLtiRpyooE2exGiUHyahazSW5mk16IaKsRXYLLsULXQKAEAAAowsz4RE1fu6/Mx5ncP1SPhrdyQEWui2coAQAArrJoS1KxYdKee0Hpm+OUc3yvck/skz07Q4EDn1CNm+8scvvpa/epbg1PDa/CI5VMGwQAAHCFI2lZmrIiodj19qx0nfv+E11MPSL3oGbXdcznVyToSFqWo0p0OQRKAACAKzy3dKdsJTXe1AhQ40fnq/HDsaodHn1dx7TZDT23dKejSnQ5BEoAAID/Skw+r437U0pswDG5uctSo3apjptnN7Rxf4r2nzpf1hJdEoESAADgvxZsTpLFbCqXY1vMJn38U1K5HNvZCJQAAAD/Fb/3VKmmByqNPLuh+H2nyuXYzkagBAAAkJSRY1NSOTfOJKVmKTPHVq7ncAYCJQAAgKTDqZkq78m5DUmHUjPL+SwVj0AJAAAgKddmr1LnqUgESgAAAEkebhUTiyrqPBWp6n0iAACAGxAS6Kvy6e/+H9N/z1PV8OpFAAAASb6ebgoO8NHh62jMSd+6UvbsTOVlpEmSLuz/WbbzKZIkv06DZfYqOjQGB/rI17Pqxa+q94kAAABKYBiGTp06JbPZLJPJVOCfLX1zdDjVLplKvombvnmp8tL/NwVQ1r4fpH0/SJJqtA0vMlBazCaFhwY59sO4CJNhGOXd0AQAAOAyXnzxRU2ZMqXIde6BTdTwz++W27nXTeqllkE1y+34zsIzlAAAoFq57bbbil3Xo30L3d6yjsPflmMxm9SzZZ0qGSYlAiUAAKhm+vbtqwYNGhRYZjKZFB4ernXr1umVyPZyc3CgdDOb9HJke4ce05UQKAEAQLVgt9v13HPPqWbNmjpx4kT+crPZrLp162rRokWyWCxqEuCjFyLaOvTcL0a0VZMAH4ce05UQKAEAQJVms9k0efJk+fr66pVXXpGHh4emT5+uW265JX+bJUuWKCjofw0zI7oEa3L/UIec/+n+YRreJdghx3JVdHkDAIAqKTc3V5MnT9YHH3ygnJwcBQQE6LXXXtNjjz0mSQoNDVVERISmTZum22+/vdD+j4a3Up0anpqyIkE2u6E8+/X3MVvMJrmZTXoxom2VD5MSXd4AAKCKyc7O1qRJkxQTE6Pc3FzVqVNH//d//6cHH3yw0Lb79u1Tq1atZDIV/8zkkbQsPbd0pzbuT5HFbCoxWF5e37NlHb0c2b5K3+a+EoESAABUCVlZWXrsscc0f/58Xbx4UfXq1dOrr76qCRMmOOT4icnntWBzkuL3nVJSapauDFAmXZq0PDw0SGO6BVfZbu7iECgBAECllpGRoYcffliffPKJbDabGjRooNdff12jR48ut3Nm5th0KDVTuTa7PNzMCgn0rZJvwLleBEoAAFAppaen6y9/+Ys+//xz5eXlqXHjxnrzzTd13333Obu0aqf6RmkAAFAppaWl6cEHH9TSpUtlt9vVtGlTvf3224qIiHB2adUWgRIAAFQKp06d0p///GetWrVKdrtdzZs318yZMzVgwABnl1btESgBAIBLO3nypKKjo7V69WoZhqFWrVrp3Xff1R133OHs0vBfBEoAAOCSjh49qujoaK1bt06GYahNmzZ6//331bNnT2eXhqsQKAEAgEs5dOiQrFarvvvuOxmGofbt2+uDDz5Qt27dnF0aikGgBAAALiExMVHR0dHatGmTJKlDhw768MMP1blzZydXhmshUAIAAKfavXu3oqOj9dNPP0mSOnfurNmzZ+vmm292cmW4XgRKAADgFL/99puio6O1detWSVK3bt0UExOjNm3aOLkylBaBEgAAVKht27Zp4sSJ2r59uyTp9ttvV0xMjFq1auXcwnDDzM4uAAAAVA+bN2/WzTffrE6dOmnHjh3q06ePDh48qI0bNxImKzkCJQAAKFebNm3STTfdpG7dumnXrl3q16+fDh8+rPj4eIWEhDi7PDgAgRIAAJSLb7/9VqGhoerZs6f27NmjAQMG6OjRo1q7dq2aNGni7PLgQARKAADgUF999ZVatGihO+64QwcOHFBERIROnjypL7/8Ug0bNnR2eSgHBEoAAOAQK1asUEhIiAYOHKhDhw7p3nvv1enTp7V8+XIFBQU5uzyUIwIlAAAok8WLF6tJkyYaMmSIjh49qhEjRig1NVWLFy9WQECAs8tDBSBQAgCAG/LJJ5+oYcOGuv/++3Xy5EmNHTtWZ8+e1SeffKJatWo5uzxUIAIlAAAolblz56p+/foaNWqUUlJSFB0drXPnzmnevHmqUaOGs8uDExAoAQDAdfnggw9Ut25dTZgwQWfOnNFDDz2k9PR0zZ49Wz4+Ps4uD05EoAQAACWaOXOmAgMD9Ze//EXnz5/XY489pvPnz+vdd9+Vl5eXs8uDCzAZhmE4uwgAAOBa7Ha7ZsyYoZdeeklnz56Vl5eXHn74YU2bNk1ubry5GQURKAEAQD673a5p06bplVde0fnz5+Xt7a0nnnhCL774IkESxSJQAgAA2e12vfTSS3r99deVmZkpX19fTZ48Wc8//7zMZp6QQ8kIlAAAVGN2u13/+te/NGPGDGVlZalmzZp65pln9MwzzxAkcd0YuwYAoBqy2Wx69tlnNXPmTGVnZ8vf31/Tp0/XpEmTCJIoNQIlAADVSG5urp5++mm9//77ysnJUe3atfXqq6/q8ccfd3ZpqMQIlAAAVAPZ2dmaNGmSYmJilJubqzp16mjGjBl66KGHnF0aqgACJQAAVVhWVpb++te/at68ebp48aKCgoL06quvymq1Ors0VCEESgAAqqCMjAw98sgjWrhwoWw2mxo0aKDXXntNY8aMcXZpqIIIlAAAVCHp6el66KGH9NlnnykvL0+NGjXSG2+8oWHDhjm7NFRhBEoAAKqAM2fO6MEHH1RcXJzsdruCg4P11ltvaejQoc4uDdUAgRIAgEosJSVFDzzwgFauXCm73a7mzZvrP//5jwYOHOjs0lCNECgBAKiETp48qYkTJ+qrr76SYRhq1aqV3n33Xd1xxx3OLg3VEIESAIBK5OjRo5o4caK+/vprGYah1q1b6/3331evXr2cXRqqMQIlAACVwOHDh2W1WrV+/XoZhqF27drpgw8+0G233ebs0gACJQAANyozx6ZDqZnKtdnl4WZWSKCvfD0d+6f1wIEDslqt2rhxoyTplltu0UcffaTOnTs79DxAWRAoAQAohcTk81qwOUnxe08pKS1LxhXrTJKCA3wUHhak0V2D1apezWse7/jx4zp58qRuvfXWAsv37t0rq9WqH3/8UZLUqVMnzZ49W7fccosDPw3gGCbDMIxrbwYAQPV2JC1Lzy3dqY37U2Qxm5RnL/7P5+X1PVvW0cuR7dUkwKfI7dLT09WxY0edPHlSSUlJCgwM1K5duxQdHa0tW7ZIkrp27arZs2erbdu25fK5AEcgUAIAcA2LtiRpyooE2exGiUHyahazSW5mk16IaKsRXYILrDMMQ8OGDdPSpUtlGIasVqu2bdumX3/9VZLUo0cPzZ49W2FhYQ79LEB5IFACAFCCmfGJmr52X5mPM7l/qB4Nb5X//TvvvKNHH3200Ha9e/dWbGysmjVrVuZzAhWFQAkAQDEWbUnSM3E7Cy3PObFPmTu/UXbSTtnOJcvs7SfPhmGq1Wus3AMaFXu8aVHtNbxLsLZs2aJu3brJbrcXWP/EE0/ozTffdPjnAMobgRIAgCIcScvSnW9+pxybvdC600tfVs7R3fJpfbvcg0KUl3FG57etkpGbrfrjpsujbkiRx/R0M+vffWtpaL+eysvLK7S+Zs2aOnLkiPz9/R39cYByRaAEAKAIY2dv1g9/pBb5zGT20d3ybNBSJot7/rKLacd0fPaj8m3dQ3UGTy7ymCYZunBwu5I//ZcsFou8vLwkSXa7XYZhyNvbW1u2bFGLFi3K50MB5YRpgwAAuEpi8nlt3J9S7Hqvxm0KLXMPaCSPOsG6mHKk2P0MmeTVrKN2Hj6tdsF1HFIr4ArMzi4AAABXs2BzkixmU6n2MQxDeVlnZfbxK3E7i9mkxduTy1Ie4HIIlAAAXCV+76lSTQ8kSZkJ65V3PlW+rXuWuF2e3VD8vlNlKQ9wOQRKAACukJFjU1JaVqn2uZh6RGlfvyvPRq3l2/6Oa26flJqlzBzbjZYIuBwCJQAAVzicmqnSjE3mZZzRqc9fkNnTV3WGPiuT2XLNfQxJh1Izb7hGwNXQlAMAwBVyi5gmqDj27EwlfzZF9uxM1RszTW41A8vlPICrY4QSAIAreLhd359Gw5arU4tflO3MMQXd/7w86gRfe6cbOA9QGfBfMwAAVwgJ9NW1+rsNe55OL5umnON7VHfoM/JsVHgaoZKY/nseoKrgljcAAFfw9XRTcICPDpfQmHPm29m6sH+zvFv+SXkXMpSxK77A+hrtwks8R3Cgj3w9+ROMqoP/mgEAuEp4WJDmbz5c7NRBucl/SJIu7P9ZF/b/XGh9SYHSYjYpPDTIMYUCLoJACQDAVdp5nSlxHsr6o1+94WPn2Q2N6Va65y0BV8czlACAamnx4sXq0KGDQkNDFRwcrKCgIPn4+MhkMun+u3rKcjpRltK9LOeaLGaTeraso5ZBNR17YMDJGKEEAFRLp0+f1o4dO4pcd8stt2jZKxN119ublOfA6X3czCa9HNneYccDXAUjlACAamnixIlq1KhRgWUmk0ktW7bUTz/9pJC6NfVCRFuHnvPFiLZqEuDj0GMCroBACQCodrKysjR+/HgdP368wHKLxaLFixfLy8tLkjSiS7Am9w91yDmf7h+m4V14dhJVE4ESAFBt2O12/e1vf1OtWrW0aNEihYSEKDQ0VGbzpT+H//d//6dbbrmlwD6PhrfSq1Ht5elmlsVcuocqLWaTPN3MmhbVXo+Et3TY5wBcjckwjNK8shQAgEpp1qxZ+vvf/66MjAwFBgZq1qxZGjZsmNavX6/w8HB1795dGzZskMVS9Lu4j6Rl6bmlO7Vxf4osZlOJXeCX1/dsWUcvR7bnNjeqPAIlAKBKW7Vqlf785z/r5MmT8vb21tSpU/W3v/2twDZLlizR7bffrnr16l3zeInJ57Vgc5Li951SUmqWrvwjatKlScvDQ4M0plsw3dyoNgiUAIAqadu2bRo9erT27NkjNzc3Pfjgg3rrrbfk5ua4CU4yc2w6lJqpXJtdHm5mhQT68gYcVEsESgBAlXL06FGNGDFC33//vUwmk4YMGaK5c+fKz8/P2aUBVRaXUQCAKiEjI0MTJkxQXFycDMNQt27dtGjRIjVt2tTZpQFVHl3eAIBKzW636/HHH1ft2rW1ZMkStWzZUj///LN+/PFHwiRQQQiUAIBK64033lDNmjX19ttvKyAgQHFxcdq3b5+6dOni7NKAaoVACQCodJYsWaKgoCA99dRTMplMmjFjhpKTkxUZGens0oBqiaYcAEClsXnzZo0ZM0b79++Xm5ubHnvsMU2fPj1/YnIAzkFTDgDA5R0+fFjDhw/X5s2bZTKZdN999yk2NlY1atRwdmkARKAEALiw9PR0jR07VitXrpRhGOrZs6cWLlyoxo0bO7s0AFfgHgEAwOXYbDY9/PDDCggI0IoVKxQWFqZff/1VGzZsIEwCLohACQBwKa+++qr8/Pz07rvvKigoSF988YV2796tDh06OLs0AMUgUAIAXMKiRYtUp04dPfvss7JYLJo1a5aOHz+ugQMHOrs0ANdAlzcAwKm+//57jR07VgcPHpSHh4eeeOIJvfLKK3RuA5UIgRIA4BQHDhzQ8OHDtXXrVpnNZo0YMUKzZ8+Wl5eXs0sDUEp0eQMAKlRaWprGjBmj1atXyzAMhYeHa+HChapfv76zSwNwg7ifAACoELm5uXrggQcUFBSkr776Su3atdPOnTv17bffEiaBSo5ACQAoV3a7XS+++KL8/f01e/Zs1a9fX2vXrtVvv/2mdu3aObs8AA7ALW8AQLmZN2+eHn/8cZ09e1b+/v6aOXOmJk6c6OyyADgYI5QAAIdbv369mjZtqvHjx+vChQv65z//qbS0NMIkUEXR5Q0AcJjdu3dr5MiR2rFjh8xms8aOHav33nuPzm2giuOWNwCgzFJSUjRy5EitW7dOktS/f38tWLBAderUcXJlACoCt7wBADcsOztb48ePV7169bRu3Tp16NBBe/bs0Zo1awiTQDVCoAQAlJrdbtc//vEP1apVS/PmzVOTJk0UHx+vX3/9VWFhYc4uD0AF45Y3AKBUZs+erSeffFLp6emqXbu2PvroI40ZM8bZZQFwIkYoAQDXZc2aNWrcuLEeeOAB5ebm6qWXXlJKSgphEgBd3gCAku3atUsjR47Url27ZLFYZLVa9c4778jDw8PZpQFwEdzyBgAU6eTJkxo5cqTWr18vk8mkgQMHasGCBapVq5azSwPgYrjlDQAoICsrS6NGjVKjRo20fv16de7cWYmJifriiy8IkwCKRKAEAEi61Ln9t7/9TbVq1dInn3yikJAQbdq0SVu2bFGLFi2cXR4AF0agBABo1qxZ8vf31+uvvy4/Pz99+umnOnDggHr06OHs0gBUAgRKAKjGVq1apQYNGuiRRx5RXl6epk2bppSUFA0bNszZpQGoROjyBoBqaNu2bRo9erT27NkjNzc3Pfjgg3rrrbfk5kavJoDS4zcHAFQjR48e1ciRI7Vp0yaZTCYNHTpUc+fOlZ+fn7NLA1CJESgBoBrIyMjQhAkTFBcXJ8Mw1K1bNy1atEhNmzZ1dmkAqgCeoQSAKsxut2vSpEmqXbu2lixZopYtW+rnn3/Wjz/+SJgE4DAESgCoombMmCE/Pz/NmDFDAQEBiouL0759+9SlSxdnlwagiiFQAkAVExcXp6CgIE2aNEnSpWCZnJysyMhIJ1cGoKqiyxsAqoiff/5Zo0eP1v79++Xu7q5HH31U06dPl9nM2AGA8kVTDgBUcocPH9bw4cO1efNmmUwm3XfffYqNjVWNGjWcXRqAaoJACQCVVHp6usaNG6cVK1bIMAz17NlTCxcuVOPGjZ1dGoBqhvsgAFDJ2Gw2PfzwwwoMDNTy5csVFhamX3/9VRs2bCBMAnAKAiUAVCKvvfaa/Pz89O6776pu3br64osvtHv3bnXo0MHZpQGoxgiUAFAJfPrpp6pTp47+/ve/y2KxaNasWTp+/LgGDhzo7NIAgC5vAHBl33//vcaOHauDBw/Kw8NDTzzxhF555RU6twG4FAIlALigAwcOaPjw4dq6davMZrNGjBih2bNny8vLy9mlAUAhdHkDgAtJS0vTmDFjtHr1ahmGob59+2rBggWqX7++s0sDgGJxzwQAXEBubq4eeOABBQUF6auvvlK7du20a9cuffPNN4RJAC6PQAkATmS32/Xiiy/K399fs2fPVv369bV27Vr99ttvatu2rbPLA4Drwi1vAHCSefPm6fHHH9fZs2fl7++vmTNnauLEic4uCwBKjRFKAKhg69evV9OmTTV+/HhduHBB//rXv5SWlkaYBFBp0eUNABVkz549GjFihHbs2CGz2ayxY8fqvffeo3MbQKXHLW8AKGcpKSkaOXKk1q1bJ0nq37+/FixYoDp16ji5MgBwDG55A0A5yc7O1vjx41WvXj2tW7dOHTt21J49e7RmzRrCJIAqhUAJAA5mt9v1z3/+U7Vq1dK8efPUpEkTxcfHa9u2bQoLC3N2eQDgcNzyBgAHmj17tp588kmlp6erdu3a+uijjzRmzBhnlwUA5YoRSgBwgDVr1qhx48Z64IEHlJubq5deekkpKSmESQDVAl3eAFAGu3bt0siRI7Vr1y5ZLBZFR0dr5syZ8vDwcHZpAFBhuOUNADfg5MmTGjlypNavXy+TyaRBgwbp448/Vq1atZxdGgBUOG55A0ApZGVlaeTIkWrUqJHWr1+vzp07KzExUatWrSJMAqi2CJQAcB3sdrv+9re/qVatWlq0aJFCQkK0adMmbdmyRS1atHB2eQDgVNzyBlBlZObYdCg1U7k2uzzczAoJ9JWv5/X/mjt16pTOnDlTaGqfWbNm6e9//7syMjIUGBioWbNmadiwYY4uHwAqLZpyAFRqicnntWBzkuL3nlJSWpau/IVmkhQc4KPwsCCN7hqsVvVqFnuc7OxsdejQQSdOnNChQ4dUu3ZtrVq1Sn/+85918uRJeXt764UXXtDTTz9d7p8JACobAiWASulIWpaeW7pTG/enyGI2Kc9e/K+yy+t7tqyjlyPbq0mAT6FtJk2apLfffluSNGbMGP3888/as2eP3Nzc9OCDD+qtt96Smxs3dQCgKARKAJXOoi1JmrIiQTa7UWKQvJrFbJKb2aQXItpqRJfg/OXffPON7rzzzgLbmkwmDRkyRHPnzpWfn5/DageAqohACaBSmRmfqOlr95X5OJP7h+rR8FY6c+aMWrdurVOnThVYHxUVpSVLlpT5PABQHRAoAVQai7Yk6Zm4ncWuzzm5X+c2LVTO0d9l2C7KrVY91ehwt/w6RxS5/atR7TVlTD/t21c4oJpMJu3YsUPt27d3WP0AUFXxQBCASuFIWpamrEgodv2Fg9t0avGL8qjXQv7dR8jk4SXb2ZPKO59S7D7PfL5Nx5LPFVhmNpvl6ekpX19fpaWlOax+AKjKGKEEUCmMnb1ZP/yRWuQzk/acLB374EF5NmqjupHPymS6zil2DbvC/KV50X+St7e3vL295e7u7uDKAaDqY4QSgMtLTD6vjfuLH2nM/H297JlnVbvXOJlMZtlzs2Vy97h2sDSZtTddOm/yUT2/4qcUAgCUjEAJwOUt2JxU4tRA2Ye2y+TpI1tGqk7F/T/Z0o7J5O4l33bhCrjjzzK5eRR7bIvZpI9/StLUiLblVT4AVHm8ehGAy4vfe6rE6YEuph2X7Hk6veQleTe7VXUjn1ONm/sp49evlPLFjBKPnWc3FL/vVInbAABKxgglAJeWkWNTUlpWidsYF7NlXMxRjY4DFNDvL5Ikn7DuMvIuKmP7al3sOVruAY2K3T8pNUuZObZSvaYRAPA/jFACcGmHUzN1rc7By7e0fdv0LrDc96Y+kqScY3tK3N+QdCg18wYrBAAQKAG4tFyb/ZrbWGoEXvqnb62Cy339JUn27AyHnAcAUDQCJQCX5uF27V9THvVbSJJs51MLLLedvzSPpMXH3yHnAQAUjd+gAFxak1peMl1jG9/WPSVJGb+tLbA847e1ktkiz+CS33ZjkhQS6FuGKgGgeuMJdAAuJzc3V998843i4uK0fPly+Y9+Uxe9ahW7vUf9FvK9uZ8yf/tap+12eQW3U3bSTmXt2SS/2+6XW83AEs8XHOhDQw4AlAG/QQG4hMzMTK1Zs0ZxcXFauXKl0tPT1aJFC1mtVmW3bapVe9NLnDoo8K5H5OZXVxm/rVPWvh/l5l9Xte/4s/y6DCnxvBazSeGhQY7+OABQrfDqRQBOc/bsWa1atUpxcXFavXq1Lly4oPbt2ysqKkpRUVFq3769TCaTEpPPq9+MDeVWx7pJvdQyiDflAMCNYoQSQIU6deqUli9frri4OH3zzTe6ePGiunbtqqlTpyoyMlKtWrUqtE+rejXVs2WdYt/lfaMsZpO6Nw8kTAJAGTFCCaDcHTlyRHFxcYqLi9OmTZskSb169VJUVJSGDh2qJk2aXPsYaVm6883vlOPA6X083cxaN6m3mgT4OOyYAFAdESgBlIt9+/blh8gtW7bI3d1d/fr1U1RUlCIiIlS3bt1SH3PRliQ9E7fTYTVOi2qv4V2CHXY8AKiuCJQAHMIwDO3YsSM/RCYkJMjHx0cDBgxQVFSUBg0aJH//a88HeS0z4xM1fe2+Mh/n6f5heiS8ZZmPAwAgUAIoA7vdrs2bN+eHyD/++EP+/v6KiIhQVFSU+vfvLx8fx99OXrQlSVNWJMhmN0r1TKXFbJKb2aQXI9oyMgkADkSgBFAqNptNGzZs0JIlS7R06VKdOHFCQUFBGjp0qKKiohQeHi4PD49yr+NIWpaeW7pTG/enyGI2lRgsL6/v2bKOXo5szzOTAOBgBEoA15Sdna1169blTzSelpam4ODg/Ol9unfvLovF4pTaEpPPK3bTAX21/ZDO2tx15S80ky5NWh4eGqQx3YLp5gaAckKgBFCkjIwMffnll4qLi9MXX3yhjIwMhYaG6t5771VUVJQ6deokk+laL0WsGD169NAPP/ygn7ftkE+9psq12eXhZlZIoC9vwAGACsBvWgD50tLStHLlSsXFxWnNmjXKyclRx44d9fe//11RUVFq06aNy4TIy+bPn68ffvhBkhT/9Wr97W9/c3JFAFD9MEIJVHMnTpzQsmXLFBcXp/j4eOXl5al79+6KiopSZGSkmjdv7uwSi5WYmKhbbrlFFy5ckCR17dpVP/30k5OrAoDqh0AJVEMHDx7U0qVLFRcXpx9++EFms1l9+vTRvffeqyFDhqhhw4bOLvGacnNz9ac//Um7du1SXl6eJMlkMik5OfmG5rgEANw4bnkD1cTu3bu1ZMkSxcXF6ddff5Wnp6f69++vmJgYDR48WIGBgc4usVSeffZZ7dixo8AywzC0atUqWa1WJ1UFANUTI5RAFWUYhrZt25Y/R+SePXtUo0YNDRo0SFFRURowYIBq1qy8Xc8333yzdu7831tzTCaTDMNQRESEli9f7sTKAKD6YYQSqELy8vL0ww8/5IfIpKQk1a5dW0OGDNFrr72mfv36ycvLy9llOsTWrVuVmJio//znP3rvvfc0ZMgQ7dq1S97e3s4uDQCqHQIlUMldvHhR8fHxiouL07Jly5ScnKwGDRooMjJSUVFR6tWrl9zd3Z1dpsO5u7vrpptuko+Pj5o3b66lS5c6uyQAqLYIlEAldOHCBa1du1ZLlizRypUrdfbsWTVr1kxjxoxRVFSUunXrJrPZ7OwyK0RCQoLatm3r7DIAoFojUAKVRHp6ur744gvFxcXpyy+/VFZWlm666SY99thjioqK0i233OJyc0RWhISEBI0ZM8bZZQBAtUagBFxYSkqKli9frri4OK1bt065ubnq3Lmz/vnPfyoyMlKtW7d2dolOde7cOR09epQRSgBwMgIl4GKOHj2aP9H4d999J8Mw1LNnT7322msaOnSomjZt6uwSXcbvv/8uSWrXrp2TKwGA6o1ACbiA/fv353dmb968WW5ubrrjjjv07rvvasiQIapXr56zS3RJCQkJMpvN1X6kFgCcjUAJOIFhGNq1a1d+iPztt9/k7e2tu+++W/Pnz9egQYNUu3ZtZ5fp8hISEtSiRYsqMxUSAFRWBEqggtjtdv3yyy/5b6vZv3+//Pz8dM899+j555/X3XffLV9fX2eXWanQ4Q0AroFACZQjm82mTZs2KS4uTkuXLtXRo0dVp04dDR06VG+//bb69u0rT09PZ5dZaSUkJPCaRQBwAQRKwMFycnL0zTffKC4uTsuXL1dKSooaNWqkqKgoRUVF6fbbb5ebG//XK6uzZ8/q+PHjjFACgAvgrxrgAJmZmVq9erXi4uK0atUqpaenq2XLlpo4caKioqLUuXPnajPReEVJSEiQJAIlALgAAiVwg86cOaNVq1YpLi5Oq1evVnZ2tm6++WY9+eSTioqKUrt27arlROMVJSEhQRaLRWFhYc4uBQCqPQIlUArJycn5E41/8803stls6tatm1588UVFRkaqZcuWzi6x2ti1a5datmzJM6gA4AIIlMA1JCUl5U/vs2nTJplMJvXu3Vtvvvmmhg4dqsaNGzu7xGopISGBCc0BwEUQKIEi7N27Nz9E/vLLL/Lw8FC/fv300UcfKSIiQnXq1HF2idVeQkKC/vKXvzi7DACACJSApEsTje/YsSN/jsjff/9dPj4+GjhwoJ588kkNHDhQ/v7+zi4T/5Wamqrk5GQacgDARRAoUW3Z7Xb99NNP+SORBw8eVK1atRQREaGXX35Z/fv3l7e3t7PLRBHo8AYA10KgRLVy8eJFbdiwQUuWLNGyZct04sQJBQUFKTIyUlFRUQoPD5e7u7uzy8Q1JCQkyM3NTa1atXJ2KQAAEShRDWRnZ+vrr79WXFycVqxYobS0NDVt2lQjRoxQVFSUbrvtNlksFmeXiVJISEhQaGioPDw8nF0KAEAESlRR58+f15dffqm4uDh9+eWXysjIUFhYmP7yl7/o3nvv1a233sockZUY7/AGANdCoESVkZqaqpUrVyouLk5r165VTk6OOnbsqGeeeUZRUVFq06aNs0uEgyQkJOiRRx5xdhkAgP8iUKJSO3HihJYtW6YlS5Zo/fr1stvt6t69u15++WVFRkaqWbNmzi4RDnb69GmdPn2aEUoAcCEESlQ6f/zxh5YuXaq4uDj9+OOPMpvNCg8P18yZMzVkyBA1aNDA2SWiHF3u8GZScwBwHQRKuDzDMLR79+78OSK3b98uT09P3XXXXYqNjdXgwYMVEBDg7DJRQXbt2iUPDw9ecwkALoRACZdkGIa2bt2aP0fk3r17VaNGDd1zzz167rnnNGDAANWoUcPZZcIJEhISFBYWJjc3fn0BgKvgNzJcRl5enr7//nvFxcVp6dKlSkpKUkBAgIYMGaLp06frzjvvlJeXl7PLhJPR4Q0ArodACafKzc1VfHy84uLitGzZMp06dUoNGjRQVFSUoqKi1KtXL0aikM8wDCUkJKh///7OLgUAcAX+UqPCZWVlae3atVqyZIlWrlypc+fOqXnz5ho3bpyioqLUtWtXmc1mZ5cJF5ScnKy0tDRGKAHAxRAoUSHOnTunL774QnFxcfrqq6+UlZWltm3b6vHHH1dUVJRuvvlmJhrHNfEObwBwTQRKlJvTp09r+fLliouL07p163Tx4kV16dJF//rXvxQZGamwsDBnl4hKJiEhQZ6enmrRooWzSwEAXIFACYc6evRo/hyRGzZskCT17NlT06dP19ChQxUcHOzkClGZJSQkqHXr1rx7HQBcDIESZZaYmJg/vc/PP/8sd3d33XHHHXrvvfc0ZMgQBQUFObtEVBEJCQlMaA4ALohAiVIzDEM7d+7MD5E7d+6Ut7e3BgwYoL/+9a8aNGiQatWq5ewyUcVc7vAeNGiQs0sBAFyFQInrYrfbtWXLlvy31Rw4cEB+fn4aPHiwpk6dqrvuuku+vr7OLhNV2PHjx3X27FkacgDABREoUSybzaaNGzfmTzR+7Ngx1a1bV0OHDtXMmTPVt29feXh4OLtMVBN0eAOA6yJQooCcnBx98803WrJkiZYvX67U1FQ1btxY9957r6KionT77bfTEAGnSEhIkLe3t5o1a+bsUgAAVyFQQhkZGVq9erXi4uK0atUqnT9/Xq1atdKf//xnRUVFqXPnzswRCadLSEhQmzZtmPQeAFwQgbKaOnPmjFauXKm4uDitWbNG2dnZuuWWWzR58mRFRUWpbdu2hEi4FN7hDQCui0BZjZw8eTJ/ovFvv/1WNptNt912m1566SVFRkYyWTRclmEY+v333zV06FBnlwIAKAKBsoo7fPhw/vQ+33//vcxms3r37q0ZM2Zo6NChatSokbNLBK7p6NGjSk9PZ4QSAFwUgbIK2rNnT36I3Lp1qzw8PNS/f3/Nnj1bgwcPVp06dZxdIlAqlzu8mdQcAFwTgbIKMAxD27dvz58jcvfu3fL19dXAgQM1efJkDRw4UH5+fs4uE7hhCQkJ8vX15dWdAOCiCJSVlN1u148//pg/Enno0CHVrl1bERERevXVV9WvXz95e3s7u0zAIXbt2qWbbrqJDm8AcFEEykrk4sWLWr9+veLi4rRs2TKdPHlS9erVU2RkpKKiotSnTx+5u7s7u0zA4ejwBgDXRqB0cRcuXNDXX3+tuLg4rVixQmfOnFHTpk01atQoRUVFqVu3bkw0jirNbrfr999/17Bhw5xdCgCgGNU+UGbm2HQoNVO5Nrs83MwKCfSVr6dzfyznz5/Xl19+qSVLlujLL79UZmam2rRpo4cfflhRUVHq2LEjc0Si2khKSlJmZiYjlADgwqploExMPq8Fm5MUv/eUktKyZFyxziQpOMBH4WFBGt01WK3q1ayQmlJTU7VixQrFxcVp7dq1ys3N1a233qrnnntOkZGRatOmTYXUAbiCKy/0tv6yRyZ3LwIlALgwk2EYxrU3qxqOpGXpuaU7tXF/iixmk/LsxX/0y+t7tqyjlyPbq0mAj8PrOX78uJYtW6YlS5bou+++k91uV48ePRQVFaXIyEiFhIQ4/JyAqyrpQk8yFBzgq74VfKEHALg+1SZQLtqSpCkrEmSzGyUGyatZzCa5mU16IaKtRnQp+5Qlf/zxR35n9o8//ig3Nzf17dtXUVFRGjJkiOrXr1/mcwCViatd6AEASq9aBMqZ8YmavnZfmY8zuX+oHg1vVWh5enq6Ro4cqTvvvFOTJk0qsO7yK+MuzxG5Y8cOeXl56a677lJUVJQGDx6s2rVrl7k2oDJylQs9AEDZVPlAuWhLkp6J21loefbh35T8yXNF7lN/7HR5Nmpd5LppUe01/Io/YGfPnlW/fv30yy+/qFGjRjpy5Igk6Zdffskfidy3b59q1qype+65R1FRUbr77rtVo0YNB3w6oPIq7ws9AEDFqdJNOUfSsjRlRUKJ29TsNFgeDUILLHOr3aDY7Z9fkaDuLeqoSYCP0tLS1LdvX+3atUuSdOzYMY0aNUrff/+9jhw5osDAQA0ZMkRvvPGG7rjjDnl5eZX9QwFVwKItSUWGyZRVbypz1zfF7tfokTlyq1nw1aHT1+5T3RqeBS70AAAVq0oHyueW7pTtGrfRPJu0lW/r26/7mDa7oeeW7tSbQ1qod+/e2rNnj+x2e/76VatWacKECYqKilLPnj3l5lalf8RAqZV0oVez493yCulw1VJDaWvekZt/vUJh8rIrL/QAABWvyqadxOTz2rg/5bq2tedkyeTuKZP52hOE59kNbdyfotDOVp05vLvQ+nr16uk///lPqesFqouSLvQ8G7WRZ6OCU2RlH0mQcTFHvjf1KfaYly/05k/s6shSAQDXqcoGygWbk67ZMSpJqV++JSP3gmQyy7NJW9UOj5Zng5Kfx7KYJM92d8pydJ/y8vIkKf8dwwcOHNCePXvUunXRz2AC1VlpLvQuy/z9O0km+d7Uu9htLl/o7T91Xi2DmFIIACpalQ2U8XtPlRwmLe7yCesu7+adZfbx18WUJKX/vFTJC/6u+mNel0f9FsXummdIoX0idWzFDB09elR79uzR3r17tWfPHp04cUL+/v7l8ImAyu96L/QuM/JsytqzSZ6N28itVr0St7WYTfr4pyRNjWACdACoaFUyUGbk2JSUllXiNl6N28ir8RW31lp1lU/rHjox+zGd+W6u6g1/scT9k1KzdOGiXcHBwQoODlb//v0dUTpQpV3zQu8qFw5uk/1Ceom3uy/LsxuK33dKU0WgBICKZnZ2AeXhcGqmbmQuJPfaDeXdqquyk36TYc8rcVtD0qHUzBuqD6iOrudC72qZv38nmd3k0+b6GueSUrOUmWO7kfIAAGVQJQNlrs1+7Y2K4eZXR8qzybiYU67nAaqb0l7o2XMv6ELiT/Ju1lEWb7/r2ocLPQBwjioZKD3cbvxj2c6elMnNQyaPa88ZWZbzANVNaS/Asvb9dKm7u22fcj0PAKDsqmQiCgn0leka2+RlnSu0LDf5D2Ul/iyvkI4ymUr+0Zj+ex4A16e0F2CZv6+XycNb3q1KNxUQF3oAUPGqZFOOr6ebggN8dLiE57VOL5sms7uHPBu1+W+X9xFl7Fgtk7unaveZcM1zBAf6yNfzfz++jIwMHT9+XBaLRS1aFN8hDlRXly/0rue2d17WOWUf2i7fNr1kdr/+N0xxoQcAzlElA6UkhYcFaf7mw8V2lPqEdlNmwnql/7xM9twsWXz85RPaXf63j5R77YYlHttskk5sXadOnSYrLS1NycnJunDhgiTJZDIpJydH7u7uDv9MQGV2PRd6l2Xu3iDZ80p9u/vqCz0AQMWosr95R3cN1pwfDxW73q9zhPw6R9zQse2GdPDrebKlHi2w3GQy6bbbbiNMAsW41oXeZZkJ62X2qVXEaxiLZzGbFB4aVMYKAQA3oso+bNSqXk31bFlHFvO1nqYsHYvZpJ4t6+jNqX8vtM4wDHXt2jX/7TkAChrdNfi65qFsMO7favLXj6/rdaiX5dkNjekWXJbyAAA3qMoGSkl6ObK93BwcKN3MJr0c2V6PPvqoXnjhhQLrTCaT3nzzTYWEhOgf//iHEhMTHXpuoLIr7ws9XrsIAM5RpQNlkwAfveDg17C9GNFWTQJ8JEn/+te/9Nhjj0m69C7v8ePH6+eff9Y999yjd955R6GhoerVq5diY2OVkZHh0DqAyqo8L/QAAM5RpQOlJI3oEqzJ/UMdcqyn+4dpeJf/3VIzmUyaMWOGRo4cKbvdrokTJ6pLly569913deLECS1cuFCenp6aOHGi6tevr+joaG3cuFGGcSPv8QGqhvK+0AMAVDyTUU3SzaItSZqyIkE2u1GqdwlbzCa5mU16MaJtgTB5pYsXL2r79u3q0qVLkeuTkpI0d+5cxcbG6uDBg2rZsqWsVqvGjRunxo0b39DnASq7mfGJmr52X5mP83T/MD0S3tIBFQEAblS1CZSSdCQtS88t3amN+1NkMZtKDJaX1/dsWUcvR7Z3yOiH3W7Xhg0bFBsbq88//1w5OTnq16+foqOjFRERIS+v659vD6gKyvNCDwBQcapVoLwsMfm8FmxOUvy+U0pKzSow0bJJl+ayCw8N0phuweX2kH96ero+++wzxcbG6ocfflDt2rU1atQoRUdHq2PHjjKZHPuMGeCqnH2hBwAou2oZKK+UmWPTodRM5drs8nAzKyTQt8InRt6zZ4/mzJmjefPm6cSJE7r55ptltVo1evRo1a1bt0JrAZzFFS70AAA3ptoHSldis9m0Zs0axcbGasWKFZKkwYMHy2q16u6775abW5Wdhx4owBUu9AAA149A6aJSUlK0YMECxcbGaseOHapfv77GjRsnq9Wq1q1bO7s8AACAfATKSuDXX39VTEyMFixYoDNnzui2226T1WrV8OHD5efn5+zyAABANUegrERycnK0YsUKxcTEaO3atfL09NR9990nq9Wq3r17y2yu8tOKAgAAF0SgrKSOHj2qefPmKTY2Vvv371ezZs00YcIEjR8/Xk2bNnV2eQAAoBohUFZyhmHo+++/V0xMjD777DNlZWWpb9++slqtioqKkre3t7NLBAAAVRyBsgrJyMjQ4sWLFRsbqw0bNsjf318jRoxQdHS0unTpwtyWAACgXBAoq6j9+/drzpw5mjNnjo4dO6abbrpJVqtVY8eOVb169ZxdHgAAqEIIlFVcXl6e1q1bp9jYWC1dulR5eXkaNGiQrFarBg0aJHd3d2eXCAAAKjkCZTWSlpamTz75RLGxsdq6dauCgoI0ZswYWa1WtWvXztnlAQCASopAWU399ttvio2N1ccff6yUlBR16dJFVqtVI0eOVK1atZxdHgAAqEQIlNVcbm6uVq1apdjYWH311Vdyc3NTVFSUrFar7rjjDua2BAAA10SgRL4TJ05o/vz5io2N1Z49e9SkSRNNmDBBEyZMUPPmzZ1dHgAAcFEEShRiGIY2b96smJgYLVq0SOfPn1fv3r0VHR2te++9V76+vs4uEQAAuBACJUqUlZWluLg4xcTEKD4+XjVr1tSwYcMUHR2t2267jbktAQAAgRLX7+DBg5o7d65iY2OVlJSksLAwTZgwQePGjVPDhg2dXR4AAHASAiVKzW63Kz4+XrGxsVqyZIlyc3N19913y2q1avDgwfL09HR2iQAAoAIRKFEmZ8+e1aeffqrY2Fht3rxZgYGBGj16tKxWqzp06ODs8gAAQAUgUMJhfv/9d8XGxmr+/PlKTk5Wx44dZbVaNWrUKAUGBjq7PAAAUE4IlHC4ixcv6quvvlJsbKxWrVols9msIUOGyGq1qn///rJYLM4uEQAAOBCBEuXq1KlT+vjjjxUbG6tdu3apYcOGGj9+vCZMmKDQ0FBnlwcAAByAQIkKYRiGtm7dqpiYGH3yySc6e/asevTooejoaN1///2qWbOms0sEAAA3iECJCpedna1ly5YpJiZG69atk4+Pj+677z5FR0erZ8+ezG0JAEAlQ6CEUyUlJWnu3LmaM2eO/vjjD7Vo0UITJkzQ+PHj1aRJE2eXBwAArgOBEi7Bbrdr48aNiomJ0eLFi3XhwgX169dPVqtVQ4cOlZeXl7NLBAAAxSBQwuWcP39en332mWJjY/X999+rVq1aGjVqlKxWqzp16sQtcQAAXAyBEi5t7969mjNnjubOnasTJ06offv2slqtGjNmjOrWrevs8gAAgAiUqCRsNpvWrl2r2NhYLV++XIZhaPDgwbJarRowYIDc3NycXSIAANUWgRKVTkpKihYuXKjY2Fht375d9erV07hx42S1WtWmTRtnlwcAQLVDoESl9uuvvyo2NlYLFixQWlqaunXrJqvVquHDh8vf39/Z5QEAUC0QKFEl5OTkaMWKFYqNjdWaNWvk6empe++9V1arVX369JHZbHZ2iQAAVFkESlQ5x44d07x58xQbG6vExESFhITkz20ZEhLi7PIAAKhyCJSosgzD0A8//KCYmBh99tlnysjIUN++fRUdHa3IyEj5+Pg4u0QAAKoEAiWqhYyMDC1ZskQxMTHasGGD/Pz8NGLECEVHR+tPf/oTc1sCAFAGBEpUOwcOHNCcOXM0Z84cHT16VG3atJHVatXYsWNVv359Z5cHAEClQ6BEtZWXl6dvvvlGsbGxWrp0qWw2mwYOHCir1apBgwbJw8PD2SUCAFApECgBSWfOnNEnn3yi2NhY/fLLL6pbt67GjBkjq9Wq9u3bO7s8AABcGoESuMrOnTsVGxurjz/+WKdPn1bnzp1ltVo1cuRI1a5d29nlAQDgcgiUQDFyc3P1xRdfKDY2Vl9++aXc3NwUGRkpq9WqO+64QxaLxdklAgDgEgiUwHU4efKk5s+fr9jYWO3evVtNmjTR+PHjNWHCBLVo0cLZ5QEA4FQESqAUDMPQzz//rJiYGC1atEjp6enq1auXoqOjdd9998nX19fZJQIAUOEIlMANysrK0tKlSxUTE6Nvv/1WNWrU0PDhw2W1WtW9e3fmtgQAVBsESsABDh48qLlz52rOnDk6fPiwQkNDZbVaNW7cODVs2NDZ5QEAUK4IlIAD2e12rV+/XrGxsVq8eLFyc3N19913y2q1avDgwfL09HR2iQAAOByBEign586d06effqqYmBht3rxZAQEBGj16tKKjo9WhQwdnlwcAgMMQKIEK8Pvvv2vOnDmaN2+ekpOT1aFDB1mtVo0ePVqBgYHOLg8AgDIhUAIV6OLFi1q9erViY2O1cuVKmc1mRUREKDo6Wv3792duSwBApUSgBJzk1KlTWrBggWJjY7Vz5041bNhQ48aNk9VqVWhoqLPLAwDguhEoASczDENbt25VbGysFi5cqLNnz6pHjx6yWq0aNmyYatas6ewSAQAoEYEScCHZ2dlavny5YmJi9PXXX8vb21v333+/rFarevXqxdyWAACXRKAEXNSRI0c0b948xcbG6sCBA2rRooUmTJig8ePHq0mTJs4uDwCAfARKwMUZhqGNGzcqJiZGn3/+uS5cuKB+/frJarVq6NCh8vLycnaJAIBqjkAJVCLnz5/X559/rtjYWG3atEm1atXSqFGjZLVa1alTpwq7JZ6ZY9Oh1Ezl2uzycDMrJNBXvp5uFXJuAIDrIVACldS+ffs0Z84czZ07V8ePH1e7du0UHR2tMWPGqG7dug4/X2LyeS3YnKT4vaeUlJalK39xmCQFB/goPCxIo7sGq1U9GokAoDohUAKVXF5entauXavY2FgtX75cdrtd99xzj6KjozVgwAC5uZVt5PBIWpaeW7pTG/enyGI2Kc9e/K+My+t7tqyjlyPbq0mAT5nODQCoHAiUQBWSmpqqhQsXKjY2Vr/++qvq1aunsWPHymq16qabbir18RZtSdKUFQmy2Y0Sg+TVLGaT3MwmvRDRViO6BJf6vACAyoVACVRR27dvV2xsrBYsWKDU1FR17dpV0dHRGj58uPz9/a+5/8z4RE1fu6/MdUzuH6pHw1uV+TgAANdFoASquJycHK1atUoxMTFavXq1PD09FRUVpejoaPXp00dms7nQPou2JOmZuJ2FlueePqxzmxYq9+R+5WWelcndU+6BTeTXNUo+rboWW8O0qPYazkglAFRZBEqgGjl+/Hj+3Jb79u1TSEiIxo8frwkTJigkJETSpWcm73zzO+XY7IX2v3Bgi9J/WSnPRq1lqREg42KOsvb+oJyjCQq4+1HV7HB3kef1dDNr3aTePFMJAFUUgRKohgzD0I8//qiYmBh9+umnysjIUN++fWW1WrU6u6V+PJh23c9MGvY8nZjzhAzbRTV68L0it7GYTerePFDzJxY/igkAqLwK3+sCUOWZTCZ1795dH330kU6ePKm5c+cqLy9P0U88p00HUkvVgGMyW+RWs47sORnFbpNnN7Rxf4r2nzrviPIBAC6GQAlUc76+vho3bpzWr1+vp95bJov52pOj23OzlZd1ThfPnFD6z8t04Y+t8mp6S4n7WMwmffxTkqPKBgC4EF5tASDfDwfPXdfo5JlvP1LG9tWXvjGZ5RN6mwL6/38l7pNnNxS/75Smqq0jSgUAuBACJQBJUkaOTUlpWde1rV+XIfJpfbvyzqcqa88mGYZdyrt4zf2SUrOUmWPjNY0AUMVwyxuAJOlwaqau98lJ98Am8g7poBrt71DQ/VNk5Gbr1OIXda0eP0PSodTM/O/tdrs2b96sZ555Ro8//viNFw8AcCqGCQBIknKLmCboevm07qG01TNlSzsm98DGJW57Ieei4uPjtWTJEi1evFjJycmSpFq1aumtt9664RoAAM5DoAQgSfJwu/EbFsbFHEmSPSfzGltKvXp014UTiTKZTAVGNNu0aXPD5wcAOBeBEoAkKSTQVyapxNveeZlnZfGtVWCZkWdT5q5vZXLzlHudkt+GY5IU4JGnY1Kh2+Pbtm3T7bffrtatWxf4CgkJkZsbv6oAwJXxWxqAJMnX003BAT46XEJjTurqmTJys+TZpJ0sNQOVl3FGmb+vly31qGr3nSizh3eJ5wgO9NG3B/Zp2rRpev755yVJeXl5MplMCg8PV2BgoLZv365FixYpM/PSaKe7u7tatWpVKGiGhYXJz8/PcT8AAMAN4005APJNXZGg+ZsPFzt1UObv3ynjt6+Ve/qQ7BfOy+zhLY/6LVWz0+AS3+UtSRaTdJNHqiKDbcrOztbevXs1b948nTt3ToZhaMmSJYqKipJ0afTy2LFj2rt3r/bs2VPg6+jRo/nHbNCgQaGQ2bp1azVp0qTId5QDAMoHgRJAvsTk8+o3Y0O5Hf/Yhw/JlnopEFosFkmXRiglaevWrbr11luveYyMjAzt27evQMjcu3ev9u7dq5ycS89yent754fLy/9s3bq1QkND5ePD+8QBwNEIlAAKGDt7s374o3SvX7wWi9mk25oH6vRnz2vt2rUF11ksGjBggFauXFmmc+Tl5SkpKalAyLz875c7ySWpadOmBULm5a/69evLZLr2W4IAAIURKAEUcCQtS3e++Z1yyjCN0NU83cxaN6m36nib1K1bNyUkJOSPTErSkCFD9Nprryk0NNRh57zSmTNn8gPmlUFz//79stlskqSaNWsWCpmtW7dWixYt5OnpWS51AUBVQaAEUMiiLUl6Jm6nw443Laq9hne51AGelJSkDh066Ny5c5Kkhg0b6uLFizp16pSGDBmiyZMnq0ePHg47d0kuXryoP/74o8hnNc+cOSNJMpvNat68eZHPatapU6dC6gQAV0egBFCkmfGJmr52X5mP83T/MD0S3rLAso0bNyo8PFx5eXmKi4vTgAEDtGDBAv373//W7t27ddttt2ny5MkaMmRI/rOWFckwDKWkpBQKmXv37tXBgwdlt18avQ0MDCwUMlu3bq1mzZox1RGAaoVACaBYi7YkacqKBNnsRqmeqbSYTXIzm/RiRNv8kcmrzZ07V3FxcYqLi8sPjXa7XV999ZVef/11fffdd2rRooWefPJJTZgwwWWaabKzs7V///4in9XMyMiQ9L+pjq5+VjMsLEz+/v5O/gQA4HgESgAlOpKWpeeW7tTG/SmymE0lBsvL63u2rKOXI9urScCNh8AtW7bo3//+tz7//HPVrl1bjzzyiB555BEFBQXd8DHLk2EYOn78eKGQuWfPHh05ciR/uwYNGhTZFMRURwAqMwIlgOuSmHxeCzYnKX7fKSWlZhV4o45JlyYtDw8N0phuwWoZVNNh5z148KBmzJih2bNnKy8vT+PHj9eTTz5Zbg085eHyVEdXB819+/YpOztb0qWpjkJDQwsFTaY6Kllmjk2HUjOVa7PLw82skEBf+XryuAFQ0QiUAErNGX/E09LS9N577+ntt9/WqVOnFBERoaefflrdu3evtNP92O12HT58uFDQ3Lt3r06ePJm/XXBwcJFNQQ0aNKi0n70s8i9u9p5SUloRFzcBPgoPC9LorsFqVc9xFzcAikegBFCp5OTkaMGCBZo+fbp2796tbt26afLkyRo6dKhTGnjKy9mzZ4sMmomJiUVOdXTlbfSWLVtWyamOnPX4BYBrI1ACqJQqQwNPebh48aIOHjxYqClo9+7dhaY6KupZzco61VFZG8ReiGirEcU0iAEoOwIlgEqvsjXwlIerpzq6cnTzyqmOAgICipzA3ZWnOnLUFFaT+4fq0fBWDqgIwNUIlACqjCsbeGw2m8aPH6+nnnqqUjXwlIfLUx0VNYH7lVMdtWzZsshnNZ051dH1TrJ/7odPdXbDfLnXCVbDB2YVu92Vk+wDcBwCJYAqp6gGnstv4KmOTSzFMQxDJ06cKBQyr57qqH79+kUGzeDg4HKd6uh6XwNqS0/R8Q//IskkN/+gEgPl5deA8kwl4FgESgBVVnVp4CkPmZmZ2rdvX6GmoL179xY51dGVz2uGhobK19e3zDWMnb1ZP/yRes1nJk8vnyZ71jkZdrvsF9JLDJQWs0ndmwdq/sSuZa4PwP8QKAFUeZcbeKZPn67169dXmwae8mC325WUlFQoaO7Zs6fQVEdFNQVd71RHicnn1W/Ghmtul520S8mfPKcG1reV9vV71wyUl62b1Muh86UC1R2BEkC18ssvv2j69OnVuoGnvFw51dGVz2tePdVRUUHz6qmOpq5I0PzNh0scnTTseToR+7g8G7VW4N2P6uSCZ64rUFrMJo3t2lRTI9o65oMDIFACqJ6KauB58sknFRYW5uzSqpwrpzq6MmhePdVRs2bN8gPmxlp3Kjkzr8Tjnt+6Smc2zFejv3wgi4//dQdKSWoa6KPvJoc75PMBIFACqOZo4HGey1MdXd19vnv/QeVFTtOl994ULe9Cuo6//xf533a//LpGSVKpAqVJ0q6pd/GaRsBBCJQAIBp4XEnC8XMa9J9NJW6TuuYdZR/aroYPzJLJ4i6pdIFSkr547Ha1bei8KZGAqqT85nsAgErE09NT0dHR2rVrl1atWiUvLy/dd999CgsL06xZs5SVleXsEquN3GtME3Qx7Zgytq9RzU4RyjufJtvZZNnOJsvIuyjDnifb2WTlXThf5vMAuH6MUAJAMa5u4Hn44Yf16KOP0sBTzq41Qpl9+Dclf/Jciceo2TlCAXc+WOI2jFACjkOgBIBroIGnYmXm2NRu6hoV98cpL+ucco7+Xmj52Q3zZc+9oIA7H5RbrQbyCAop9hw8Qwk4Fre8AeAamjVrprfeektJSUmaMmWKVqxYoTZt2mjo0KHatGmTuC53LF9PNwWX8CYbi4+/fEJvK/Rl9vaT2cNbPqG3lRgmJSk40IcwCTgQgRIArlNAQICeffZZHTp0SB999JH27dunnj176rbbbtOSJUuUl1fyNDe4fuFhQbKYy6fL3mI2KTyUxxYAR+KWNwDcoKvfwNO8eXM9+eSTslqtvIGnjK73TTk3ijflAI7FCCUA3CCz2axBgwYpPj5eW7ZsUZcuXfTXv/5VwcHBev7555WcnOzsEiutVvVqqmfLOg4fpbSYTerZsg5hEnAwRigBwIEOHjyot956Sx999BENPGV0JC1Ld775nXIcOL2Pp5tZ6yb1VpMSntEEUHqMUAKAAzVr1kwzZswo0MDTunVrDRkyhAaeUmoS4KMXHPy+7Rcj2hImgXJAoASAcnBlA8/s2bOVmJiY38CzePFiGniu04guwZrcP9Qhx3q6f5iGdwl2yLEAFESgBIBydPUbeLy9vXX//fcrNDRU77zzjjIzM51dost7NLyVXo1qL083c6mfqbSYTfJ0M2taVHs9Et6ynCoEwDOUAFDBrnwDT61atfTII4/okUceUb169Zxdmks7kpal55bu1Mb9KbKYTcqzF//n6/L6ni3r6OXI9tzmBsoZgRIAnOTQoUOaMWMGDTyllJh8Xgs2Jyl+3yklpWYVeKOOSZcmLQ8PDdKYbsF0cwMVhEAJAE6Wlpam999/X2+//bZOnjypiIgITZ48WbfffrtMpvKZ3LuqyMyx6VBqpnJtdnm4mRUS6MsbcAAnIFACgIvIycnRggULNH36dO3evVtdu3bV5MmTFRkZKYvF4uzyAKBYNOUAgIu4soHniy++oIEHQKXBCCUAuLBffvlF//73v/XZZ5/RwAPAZREoAaASuLqBZ9y4cXryySfVunVrZ5cGAARKAKhMaOAB4Ip4hhIAKpEr38ATExOj/fv3q1evXryBB4BTESgBoBLy9PSU1WrVzp07CzXwzJw5kwYeABWKW94AUEVc3cDz8MMP69FHH6WBB0C5I1ACQBVDAw+AikagBIAq6syZM3rvvffyG3gGDx6sp59+mgYeAA7HM5QAUEXVrl27QAPPgQMH1KtXL3Xr1o0GHgAORaAEgCru6gYeHx8fGngAOBS3vAGgGrrcwPP555/L39+fBh4AZUKgBIBqjAYeAI5AoAQA0MADoEx4hhIAQAMPgDIhUAIA8l3dwOPr60sDD4Br4pY3AKBENPAAuBYCJQDgutDAA6A4BEoAQKnQwAPgajxDCQAolZIaeD7//HMaeIBqiEAJALghRTXwDBs2TK1ataKBB6hmuOUNAHAYGniA6olACQBwOBp4gOqFQAkAKDdnzpzR+++/r7feeiu/gWfy5Mnq2bMnDTxAFcIzlACAclO7dm0988wzBRp4evfuTQMPUMUQKAEA5Y4GHqBq45Y3AMApaOABqg4CJQDAqQ4dOqS33npLH374oWw2m8aOHaunnnqKBh6gEiFQAgBcAg08QOXFM5QAAJdwrQYem83m7BIBFINACQBwKcU18ISGhtLAA7gobnkDAFze1q1bNX36dH3++efy8/PTww8/rMcee4wGHsBFECgBAJUGDTyAayJQAgAqHRp4ANfCM5QAgEqnuAaerl270sADOAGBEgBQaV3dwFOjRg0aeAAn4JY3AKBKoYEHqHgESgBAlVRUA8+TTz6pNm3aOLs0oMohUAIAqjRnNfBk5th0KDVTuTa7PNzMCgn0la+nW7mdD3AmAiUAoFrIycnRwoULNX36dP3+++/q0qWLnn76aUVGRsrNzTFBLzH5vBZsTlL83lNKSsvSlX9gTZKCA3wUHhak0V2D1apeTYecE3AFBEoAQLViGIZWr16t119/XfHx8WrWrJkmTZqk6Oho+fr63tAxj6Rl6bmlO7Vxf4osZpPy7MX/ab28vmfLOno5sr2aBPjc6EcBXAaBEgBQbRXVwPPoo4+qfv36132MRVuSNGVFgmx2o8QgeTWL2SQ3s0kvRLTViC7BN1I+4DIIlACAau/KBp6LFy9q3Lhx19XAMzM+UdPX7ivz+Sf3D9Wj4a3KfBzAWQiUAAD81+UGnrffflsnTpwosYFn0ZYkPRO3s9AxDNtFnd34sTIT4mXPzpB73RDV6jVW3s06lnjuaVHtNZyRSlRSTGwOAMB/XX4Dz8GDB0t8A8+RtCxNWZFQ5DFSvnhT6VuWyfemPqp954Mymc069flUZR8pevvLnl+RoCNpWQ7/TEBFYIQSAIBiFNXA89RTT+lHr8768Y/UQs9M5hzfq5PznlKt8Gj5d426dAxbro5/9Igsvv6qP3Z6seeymE3q3jxQ8yd2LdfPBJQHRigBACiGyWTSgAED9O233+qXX35R165d9cwrb2nT/pQiG3Cy9n4vmcyq2eHu/x3DzUM1bumnnGN7ZEs/Xey58uyGNu5P0f5T58vlswDliUAJAMB16NSpkz755BM9OStOFnPRE6LnJv8h94BGMnsWnArIo0Fo/vqSWMwmffxTkmMKBioQgRIAgFJYv/d0sdMD5WWkyVKjdqHllhoB+etLkmc3FL/vVNmLBCoYgRIAgOuUkWNTUgmNM4YtV7K4F1pucvP43/prSErNUmaO7caLBJyAQAkAwHU6nJqpkjpZTW4eUt7FQssvB8nLwbIkhqRDqZk3WCHgHARKAACuU67NXuJ6S40A5WWcKbT88q3uy7e+y3oewNUQKAEAuE4ebiX/2fQIaq6Lacdkzyl4Wzz3+KW36XjUa+6Q8wCuhv9iAQC4TiGBviq6v/sSn9Y9JMOu89tX5y8zbBeVsfNreTQMk5tf3Wuew/Tf8wCViZuzCwAAoLLw9XRTcICPDhfTmOPZMEw+rW/X2e/myp51Vm61Gypz5zeynTulegMev65zBAf6yNeTP8+oXBihBACgFMLDgoqdh1KS6tzzpPw6D1Hmrnilff2+DLtNQfc9L6/gdtc8tsVsUnhokCPLBSoEr14EAKAUEpPPq9+MDeV2/HWTeqllUM1yOz5QHhihBACgFFrVq6meLeuUOEp5Iyxmk3q2rEOYRKVEoAQAoJRejmwvNwcHSjezSS9HtnfoMYGKQqAEAKCUmgT46IWItg495osRbdUkwOfaGwIuiEAJAMANGNElWJP7hzrkWE/3D9PwLsEOORbgDDTlAABQBou2JGnKigTZ7Iby7Nf/J9ViNsnNbNKLEW0Jk6j0CJQAAJTRkbQsPbd0pzbuT5HFbCoxWF5e37NlHb0c2Z7b3KgSCJQAADhIYvJ5LdicpPh9p5SUmqUr/8CadGnS8vDQII3pFkw3N6oUAiUAAOUgM8emQ6mZyrXZ5eFmVkigL2/AQZVFoAQAAECZ0OUNAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMiFQAgAAoEwIlAAAACgTAiUAAADKhEAJAACAMvn/AejAXBH9cv28AAAAAElFTkSuQmCC", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "sgn_vis.visualize_main_data_output(5)" ] diff --git a/python/tests/test_assets/distributed/constants.py b/python/tests/test_assets/distributed/constants.py index 7e0dffc..3ae6fbf 100644 --- a/python/tests/test_assets/distributed/constants.py +++ b/python/tests/test_assets/distributed/constants.py @@ -96,8 +96,9 @@ ).to(torch.int64) ## Edge features -# Edge features are set to be the corresponding source node ids value divided by 10, repeated twice for u2u nodes and once for u2i nodes. -# Each edge feature tensor is of shape [num_edges_on_rank, edge_feat_dim]. +# Edge features for U2U edge are set to be the corresponding source node ids value divided by 10. +# U2I edge has no edge features. +# Each U2U edge feature tensor is of shape [num_edges_on_rank, edge_feat_dim]. MOCKED_U2U_EDGE_FEATURES_ON_RANK_ZERO: Final[torch.Tensor] = torch.Tensor( [[0, 0], [0.1, 0.1], [0.2, 0.2], [0.3, 0.3]] ) @@ -105,13 +106,6 @@ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6], [0.7, 0.7]] ) -MOCKED_U2I_EDGE_FEATURES_ON_RANK_ZERO: Final[torch.Tensor] = torch.Tensor( - [[0], [0.1], [0.2], [0.3]] -) -MOCKED_U2I_EDGE_FEATURES_ON_RANK_ONE: Final[torch.Tensor] = torch.Tensor( - [[0.4], [0.5], [0.6], [0.7]] -) - ## Labeled Edges # There are 2 positive labels and 2 negative labels per rank for the u2u edge. For u2i edge, we have 2 labels on rank 0 and 0 labels on rank 1. # Each label tensor is of shape [2, num_labels_on_rank], where, for an outgoing graph, the 0th and 1st rows correspond to source and destination nodes, respectfully. @@ -188,7 +182,6 @@ class TestGraphData: }, edge_features={ USER_TO_USER_EDGE_TYPE: MOCKED_U2U_EDGE_FEATURES_ON_RANK_ZERO, - USER_TO_ITEM_EDGE_TYPE: MOCKED_U2I_EDGE_FEATURES_ON_RANK_ZERO, }, positive_labels={ USER_TO_USER_EDGE_TYPE: MOCKED_U2U_POS_EDGE_INDEX_ON_RANK_ZERO, @@ -214,7 +207,6 @@ class TestGraphData: }, edge_features={ USER_TO_USER_EDGE_TYPE: MOCKED_U2U_EDGE_FEATURES_ON_RANK_ONE, - USER_TO_ITEM_EDGE_TYPE: MOCKED_U2I_EDGE_FEATURES_ON_RANK_ONE, }, positive_labels={ USER_TO_USER_EDGE_TYPE: MOCKED_U2U_POS_EDGE_INDEX_ON_RANK_ONE, @@ -280,13 +272,6 @@ class TestGraphData: ), dim=0, ), - USER_TO_ITEM_EDGE_TYPE: torch.cat( - ( - MOCKED_U2I_EDGE_FEATURES_ON_RANK_ZERO, - MOCKED_U2I_EDGE_FEATURES_ON_RANK_ONE, - ), - dim=0, - ), }, positive_labels={ USER_TO_USER_EDGE_TYPE: torch.cat( diff --git a/python/tests/test_assets/distributed/run_distributed_dataset.py b/python/tests/test_assets/distributed/run_distributed_dataset.py new file mode 100644 index 0000000..fa2be62 --- /dev/null +++ b/python/tests/test_assets/distributed/run_distributed_dataset.py @@ -0,0 +1,101 @@ +from typing import Literal, MutableMapping, Optional + +from gigl.common.data.load_torch_tensors import TFDatasetOptions +from gigl.common.utils.vertex_ai_context import DistributedContext +from gigl.distributed.dataset_factory import _build_dataset_process, build_dataset +from gigl.distributed.dist_link_prediction_data_partitioner import ( + DistLinkPredictionDataPartitioner, +) +from gigl.distributed.dist_link_prediction_dataset import DistLinkPredictionDataset +from gigl.distributed.utils.serialized_graph_metadata_translator import ( + convert_pb_to_serialized_graph_metadata, +) +from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper +from gigl.src.mocking.lib.mocked_dataset_resources import MockedDatasetInfo +from gigl.src.mocking.lib.versioning import ( + MockedDatasetArtifactMetadata, + get_mocked_dataset_artifact_metadata, +) +from gigl.utils.data_splitters import NodeAnchorLinkSplitter + + +def run_distributed_dataset( + rank: int, + world_size: int, + mocked_dataset_info: MockedDatasetInfo, + output_dict: MutableMapping[int, DistLinkPredictionDataset], + should_load_tensors_in_parallel: bool, + master_ip_address: str, + master_port: int, + partitioner: Optional[DistLinkPredictionDataPartitioner] = None, + dataset: Optional[DistLinkPredictionDataset] = None, + splitter: Optional[NodeAnchorLinkSplitter] = None, +) -> DistLinkPredictionDataset: + """ + Runs DistLinkPredictionDataset Load() __init__ and load() functions provided a mocked dataset info + Args: + rank (int): Rank of the current process + world_size (int): World size of the current process + mocked_dataset_info (MockedDatasetInfo): Mocked Dataset Metadata for current run + output_dict (MutableMapping[int, DistLinkPredictionDataset]): Dict initialized by mp.Manager().dict() in which outputs will be written to + should_load_tensors_in_parallel (bool): Whether tensors should be loaded from serialized information in parallel or in sequence across the [node, edge, pos_label, neg_label] entity types. + master_ip_address (str): Master IP Address for performing distributed operations. + master_port (int) Master Port for performing distributed operations + partitioner (Optional[DistLinkPredictionDataPartitioner]): Optional initialized partitioner class to pass into `load_and_build_partitioned_dataset` + dataset (Optional[DistLinkPredictionDataset]): Optional initialized dataset class to pass into `load_and_build_partitioned_dataset` + """ + mocked_dataset_artifact_metadata: MockedDatasetArtifactMetadata = ( + get_mocked_dataset_artifact_metadata()[mocked_dataset_info.name] + ) + gbml_config_pb_wrapper = GbmlConfigPbWrapper.get_gbml_config_pb_wrapper_from_uri( + gbml_config_uri=mocked_dataset_artifact_metadata.frozen_gbml_config_uri + ) + preprocessed_metadata_pb_wrapper = ( + gbml_config_pb_wrapper.preprocessed_metadata_pb_wrapper + ) + graph_metadata_pb_wrapper = gbml_config_pb_wrapper.graph_metadata_pb_wrapper + + serialized_graph_metadata = convert_pb_to_serialized_graph_metadata( + preprocessed_metadata_pb_wrapper=preprocessed_metadata_pb_wrapper, + graph_metadata_pb_wrapper=graph_metadata_pb_wrapper, + ) + + distributed_context = DistributedContext( + main_worker_ip_address=master_ip_address, + global_rank=rank, + global_world_size=world_size, + ) + + sample_edge_direction: Literal["in", "out"] = "out" + + if partitioner is None and dataset is None: + dataset = build_dataset( + serialized_graph_metadata=serialized_graph_metadata, + distributed_context=distributed_context, + sample_edge_direction=sample_edge_direction, + should_load_tensors_in_parallel=should_load_tensors_in_parallel, + partitioner=partitioner, + dataset=dataset, + splitter=splitter, + ) + output_dict[rank] = dataset + return dataset + else: + # In testing, we pass in a Partitioner or Dataset class with a NotImplementedError in order to ensure + # that custom logic from children partitioners and datasets are showcased in the DatasetFactory. As a result, + # we must use `_dataset_building_process` instead of `launch_dataset_building_process` so that a spawned process + # is not launched, which would cause raise expected errors immediately instead of being caught by the unit test. + _build_dataset_process( + process_number_on_current_machine=0, + output_dict={}, + serialized_graph_metadata=serialized_graph_metadata, + distributed_context=distributed_context, + dataset_building_port=master_port, + sample_edge_direction=sample_edge_direction, + should_load_tensors_in_parallel=should_load_tensors_in_parallel, + partitioner=partitioner, + dataset=dataset, + tf_dataset_options=TFDatasetOptions(), + ) + assert dataset is not None + return dataset diff --git a/python/tests/test_assets/distributed/run_distributed_partitioner.py b/python/tests/test_assets/distributed/run_distributed_partitioner.py index ad18b69..2950947 100644 --- a/python/tests/test_assets/distributed/run_distributed_partitioner.py +++ b/python/tests/test_assets/distributed/run_distributed_partitioner.py @@ -6,7 +6,7 @@ from gigl.distributed import DistLinkPredictionDataPartitioner from gigl.src.common.types.graph_data import EdgeType, NodeType -from gigl.types.distributed import EdgeAssignStrategy, PartitionOutput +from gigl.types.distributed import PartitionOutput from tests.test_assets.distributed.constants import ( MOCKED_NUM_PARTITIONS, USER_NODE_TYPE, @@ -26,7 +26,7 @@ def run_distributed_partitioner( output_dict: Dict[int, PartitionOutput], is_heterogeneous: bool, rank_to_input_graph: Dict[int, TestGraphData], - edge_assign_strategy: EdgeAssignStrategy, + should_assign_edges_by_src_node: bool, master_addr: str, master_port: int, input_data_strategy: InputDataStrategy, @@ -38,7 +38,7 @@ def run_distributed_partitioner( output_dict: Dict[int, PartitionOutput]: Dict initialized by mp.Manager().dict() in which outputs of partitioner will be written to. This is a mapping of rank to Partition output. is_heterogeneous (bool): Whether homogeneous or heterogeneous inputs should be used rank_to_input_graph (Dict[int, TestGraphData]): Mapping of rank to mocked input graph for testing partitioning - edge_assign_strategy (EdgeAssignStrategy): Whether to partion edges according to the partition book of the source node or destination node, + should_assign_edges_by_src_node (bool): Whether to partion edges according to the partition book of the source node or destination node master_addr (str): Master address for initializing rpc for partitioning master_port (int): Master port for initializing rpc for partitioning input_data_strategy (InputDataStrategy): Strategy for registering inputs to the partitioner @@ -73,7 +73,7 @@ def run_distributed_partitioner( if input_data_strategy == InputDataStrategy.REGISTER_ALL_ENTITIES_SEPARATELY: dist_partitioner = DistLinkPredictionDataPartitioner( - edge_assign_strategy=edge_assign_strategy, + should_assign_edges_by_src_node=should_assign_edges_by_src_node, ) # We call del to mimic the real use case for handling these input tensors dist_partitioner.register_node_ids(node_ids=node_ids) @@ -125,7 +125,7 @@ def run_distributed_partitioner( ) elif input_data_strategy == InputDataStrategy.REGISTER_MINIMAL_ENTITIES_SEPARATELY: dist_partitioner = DistLinkPredictionDataPartitioner( - edge_assign_strategy=edge_assign_strategy, + should_assign_edges_by_src_node=should_assign_edges_by_src_node, ) # We call del to mimic the real use case for handling these input tensors @@ -151,7 +151,7 @@ def run_distributed_partitioner( else: dist_partitioner = DistLinkPredictionDataPartitioner( - edge_assign_strategy=edge_assign_strategy, + should_assign_edges_by_src_node=should_assign_edges_by_src_node, node_ids=node_ids, node_features=node_features, edge_index=edge_index, diff --git a/python/tests/unit/common_tests/translators_tests/__init__.py b/python/tests/unit/common/__init__.py similarity index 100% rename from python/tests/unit/common_tests/translators_tests/__init__.py rename to python/tests/unit/common/__init__.py diff --git a/python/tests/unit/common_tests/utils_tests/__init__.py b/python/tests/unit/common/collections/__init__.py similarity index 100% rename from python/tests/unit/common_tests/utils_tests/__init__.py rename to python/tests/unit/common/collections/__init__.py diff --git a/python/tests/unit/common_tests/collections_tests/frozen_dict_test.py b/python/tests/unit/common/collections/frozen_dict_test.py similarity index 100% rename from python/tests/unit/common_tests/collections_tests/frozen_dict_test.py rename to python/tests/unit/common/collections/frozen_dict_test.py diff --git a/python/tests/unit/common_tests/collections_tests/itertools_test.py b/python/tests/unit/common/collections/itertools_test.py similarity index 100% rename from python/tests/unit/common_tests/collections_tests/itertools_test.py rename to python/tests/unit/common/collections/itertools_test.py diff --git a/python/tests/unit/common_tests/utils_tests/compute/__init__.py b/python/tests/unit/common/data/__init__.py similarity index 100% rename from python/tests/unit/common_tests/utils_tests/compute/__init__.py rename to python/tests/unit/common/data/__init__.py diff --git a/python/tests/unit/common_tests/data/dataloaders_test.py b/python/tests/unit/common/data/dataloaders_test.py similarity index 83% rename from python/tests/unit/common_tests/data/dataloaders_test.py rename to python/tests/unit/common/data/dataloaders_test.py index 65e2132..148bf3f 100644 --- a/python/tests/unit/common_tests/data/dataloaders_test.py +++ b/python/tests/unit/common/data/dataloaders_test.py @@ -16,6 +16,17 @@ ) from gigl.src.data_preprocessor.lib.types import FeatureSpecDict +_FEATURE_SPEC_WITH_ENTITY_KEY: FeatureSpecDict = { + "node_id": tf.io.FixedLenFeature([], tf.int64), + "feature_0": tf.io.FixedLenFeature([], tf.float32), + "feature_1": tf.io.FixedLenFeature([], tf.float32), +} + +_FEATURE_SPEC_WITHOUT_ENTITY_KEY: FeatureSpecDict = { + "feature_0": tf.io.FixedLenFeature([], tf.float32), + "feature_1": tf.io.FixedLenFeature([], tf.float32), +} + def _get_mock_node_examples() -> List[tf.train.Example]: """Generate mock examples for testing. @@ -56,12 +67,6 @@ def setUp(self): for example in examples: writer.write(example.SerializeToString()) - self.feature_spec: FeatureSpecDict = { - "node_id": tf.io.FixedLenFeature([], tf.int64), - "feature_0": tf.io.FixedLenFeature([], tf.float32), - "feature_1": tf.io.FixedLenFeature([], tf.float32), - } - def tearDown(self): super().tearDown() self.temp_dir.cleanup() @@ -70,6 +75,7 @@ def tearDown(self): [ param( "No features", + feature_spec=_FEATURE_SPEC_WITH_ENTITY_KEY, feature_keys=[], feature_dim=0, expected_id_tensor=torch.tensor(range(100)), @@ -77,6 +83,7 @@ def tearDown(self): ), param( "One feature", + feature_spec=_FEATURE_SPEC_WITH_ENTITY_KEY, feature_keys=["feature_0"], feature_dim=0, expected_id_tensor=torch.tensor(range(100)), @@ -87,6 +94,23 @@ def tearDown(self): ), param( "Two features", + feature_spec=_FEATURE_SPEC_WITH_ENTITY_KEY, + feature_keys=["feature_0", "feature_1"], + feature_dim=0, + expected_id_tensor=torch.tensor(range(100)), + expected_feature_tensor=torch.concat( + ( + torch.tensor(range(100), dtype=torch.float32).reshape(100, 1) + * 10, + torch.tensor(range(100), dtype=torch.float32).reshape(100, 1) + * 0.1, + ), + dim=1, + ), + ), + param( + "Two features, no entity key in feature schema", + feature_spec=_FEATURE_SPEC_WITHOUT_ENTITY_KEY, feature_keys=["feature_0", "feature_1"], feature_dim=0, expected_id_tensor=torch.tensor(range(100)), @@ -105,6 +129,7 @@ def tearDown(self): def test_load_as_torch_tensors( self, _, + feature_spec: FeatureSpecDict, feature_keys: List[str], feature_dim: int, expected_id_tensor: torch.Tensor, @@ -114,7 +139,7 @@ def test_load_as_torch_tensors( node_ids, feature_tensor = loader.load_as_torch_tensors( serialized_tf_record_info=SerializedTFRecordInfo( tfrecord_uri_prefix=UriFactory.create_uri(self.data_dir), - feature_spec=self.feature_spec, + feature_spec=feature_spec, feature_keys=feature_keys, feature_dim=feature_dim, entity_key="node_id", @@ -128,9 +153,9 @@ def test_load_as_torch_tensors( assert_close(feature_tensor, expected_feature_tensor) def test_build_dataset_for_uris(self): - dataset = TFRecordDataLoader.build_dataset_for_uris( + dataset = TFRecordDataLoader._build_dataset_for_uris( uris=[UriFactory.create_uri(self.data_dir / "100.tfrecord")], - feature_spec=self.feature_spec, + feature_spec=_FEATURE_SPEC_WITH_ENTITY_KEY, # Feature Spec is guaranteed to have entity key when this function is called ).unbatch() nodes = {r["node_id"].numpy() for r in dataset} diff --git a/python/tests/unit/common_tests/data/export_test.py b/python/tests/unit/common/data/export_test.py similarity index 95% rename from python/tests/unit/common_tests/data/export_test.py rename to python/tests/unit/common/data/export_test.py index 82b57ad..4618db0 100644 --- a/python/tests/unit/common_tests/data/export_test.py +++ b/python/tests/unit/common/data/export_test.py @@ -269,9 +269,10 @@ def mock_upload(uri: Uri, buffer: io.BytesIO, content_type: str): records = list(avro_reader) self.assertEqual(records, expected_records) + @patch("time.sleep") @patch("gigl.common.data.export.GcsUtils") def test_write_embeddings_to_gcs_upload_retries_and_fails( - self, mock_gcs_utils_class + self, mock_gcs_utils_class, mock_sleep ): # Mock inputs gcs_base_uri = GcsUri("gs://test-bucket/test-folder") @@ -284,7 +285,6 @@ def test_write_embeddings_to_gcs_upload_retries_and_fails( "GCS upload failed" ) mock_gcs_utils_class.return_value = mock_gcs_utils - exporter = EmbeddingExporter(export_dir=gcs_base_uri) exporter.add_embedding(id_batch, embedding_batch, embedding_type) @@ -304,8 +304,7 @@ def test_skips_flush_if_empty(self, mock_gcs_utils_class): exporter.flush_embeddings() @patch("gigl.common.data.export.bigquery.Client") - @patch("gigl.common.data.export.FileLoader") - def test_load_embedding_to_bigquery(self, mock_file_loader, mock_bigquery_client): + def test_load_embedding_to_bigquery(self, mock_bigquery_client): # Mock inputs gcs_folder = GcsUri("gs://test-bucket/test-folder") project_id = "test-project" @@ -317,21 +316,13 @@ def test_load_embedding_to_bigquery(self, mock_file_loader, mock_bigquery_client mock_client.load_table_from_uri.return_value.output_rows = 1000 mock_bigquery_client.return_value = mock_client - # Mock FileLoader - mock_loader = MagicMock() - mock_file_loader.return_value = mock_loader - mock_loader.list_children.return_value = [ - GcsUri("gs://test-bucket/test-path/file1.avro") - ] - # Call the function load_embeddings_to_bigquery(gcs_folder, project_id, dataset_id, table_id) # Assertions mock_bigquery_client.assert_called_once_with(project=project_id) - mock_loader.list_children.assert_called_once_with(gcs_folder, pattern=".*avro") mock_client.load_table_from_uri.assert_called_once_with( - source_uris=["gs://test-bucket/test-path/file1.avro"], + source_uris=f"{gcs_folder.uri}/*.avro", destination=mock_client.dataset.return_value.table.return_value, job_config=ANY, ) diff --git a/python/tests/unit/distributed_tests/__init__.py b/python/tests/unit/common/translators/__init__.py similarity index 100% rename from python/tests/unit/distributed_tests/__init__.py rename to python/tests/unit/common/translators/__init__.py diff --git a/python/tests/unit/common_tests/translators_tests/gbml_protos_translator_test.py b/python/tests/unit/common/translators/gbml_protos_translator_test.py similarity index 100% rename from python/tests/unit/common_tests/translators_tests/gbml_protos_translator_test.py rename to python/tests/unit/common/translators/gbml_protos_translator_test.py diff --git a/python/tests/unit/gnn_library_tests/__init__.py b/python/tests/unit/common/types/__init__.py similarity index 100% rename from python/tests/unit/gnn_library_tests/__init__.py rename to python/tests/unit/common/types/__init__.py diff --git a/python/tests/unit/common/types/uri_test.py b/python/tests/unit/common/types/uri_test.py new file mode 100644 index 0000000..a332cab --- /dev/null +++ b/python/tests/unit/common/types/uri_test.py @@ -0,0 +1,15 @@ +import unittest + +from gigl.common.types.uri.uri_factory import UriFactory + + +class UriTest(unittest.TestCase): + def test_can_get_basename(self): + file_name = "file.txt" + gcs_uri_full = UriFactory.create_uri(f"gs://bucket/path/to/{file_name}") + local_uri_full = UriFactory.create_uri(f"/path/to/{file_name}") + http_uri_full = UriFactory.create_uri(f"http://abc.com/xyz/{file_name}") + + self.assertEqual(file_name, gcs_uri_full.get_basename()) + self.assertEqual(file_name, local_uri_full.get_basename()) + self.assertEqual(file_name, http_uri_full.get_basename()) diff --git a/python/tests/unit/src_tests/__init__.py b/python/tests/unit/common/utils/__init__.py similarity index 100% rename from python/tests/unit/src_tests/__init__.py rename to python/tests/unit/common/utils/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/__init__.py b/python/tests/unit/common/utils/compute/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/__init__.py rename to python/tests/unit/common/utils/compute/__init__.py diff --git a/python/tests/unit/common_tests/utils_tests/compute/fast_serialize_np_test.py b/python/tests/unit/common/utils/compute/fast_serialize_np_test.py similarity index 100% rename from python/tests/unit/common_tests/utils_tests/compute/fast_serialize_np_test.py rename to python/tests/unit/common/utils/compute/fast_serialize_np_test.py diff --git a/python/tests/unit/common_tests/utils_tests/retry_test.py b/python/tests/unit/common/utils/retry_test.py similarity index 100% rename from python/tests/unit/common_tests/utils_tests/retry_test.py rename to python/tests/unit/common/utils/retry_test.py diff --git a/python/tests/unit/common_tests/utils_tests/timeout_test.py b/python/tests/unit/common/utils/timeout_test.py similarity index 100% rename from python/tests/unit/common_tests/utils_tests/timeout_test.py rename to python/tests/unit/common/utils/timeout_test.py diff --git a/python/tests/unit/common/utils/vertex_ai_context_test.py b/python/tests/unit/common/utils/vertex_ai_context_test.py new file mode 100644 index 0000000..b21d533 --- /dev/null +++ b/python/tests/unit/common/utils/vertex_ai_context_test.py @@ -0,0 +1,102 @@ +import os +import unittest +from unittest.mock import call, patch + +from gigl.common import GcsUri +from gigl.common.services.vertex_ai import LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY +from gigl.common.utils.vertex_ai_context import ( + DistributedContext, + connect_worker_pool, + get_host_name, + get_leader_hostname, + get_leader_port, + get_rank, + get_vertex_ai_job_id, + get_world_size, + is_currently_running_in_vertex_ai_job, +) +from gigl.distributed import DistributedContext + + +class TestVertexAIContext(unittest.TestCase): + @patch.dict(os.environ, {"CLOUD_ML_JOB_ID": "test_job_id"}) + def test_is_currently_running_in_vertex_ai_job(self): + self.assertTrue(is_currently_running_in_vertex_ai_job()) + + @patch.dict(os.environ, {"CLOUD_ML_JOB_ID": "test_job_id"}) + def test_get_vertex_ai_job_id(self): + self.assertEqual(get_vertex_ai_job_id(), "test_job_id") + + @patch.dict(os.environ, {"HOSTNAME": "test_hostname"}) + def test_get_host_name(self): + self.assertEqual(get_host_name(), "test_hostname") + + @patch.dict(os.environ, {"MASTER_ADDR": "test_leader_hostname"}) + def test_get_leader_hostname(self): + self.assertEqual(get_leader_hostname(), "test_leader_hostname") + + @patch.dict(os.environ, {"MASTER_PORT": "12345"}) + def test_get_leader_port(self): + self.assertEqual(get_leader_port(), 12345) + + @patch.dict(os.environ, {"WORLD_SIZE": "4"}) + def test_get_world_size(self): + self.assertEqual(get_world_size(), 4) + + @patch.dict(os.environ, {"RANK": "1"}) + def test_get_rank(self): + self.assertEqual(get_rank(), 1) + + @patch("subprocess.check_output", return_value=b"127.0.0.1") + @patch("time.sleep", return_value=None) + @patch("gigl.common.utils.gcs.GcsUtils.upload_from_string") + @patch.dict( + os.environ, + { + "RANK": "0", + "WORLD_SIZE": "2", + LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY: "gs://FAKE BUCKET DNE/some-file.txt", + "CLOUD_ML_JOB_ID": "test_job_id", + }, + ) + def test_connect_worker_pool_leader(self, mock_upload, mock_sleep, mock_subprocess): + distributed_context: DistributedContext = connect_worker_pool() + self.assertEqual(distributed_context.main_worker_ip_address, "127.0.0.1") + self.assertEqual(distributed_context.global_rank, 0) + self.assertEqual(distributed_context.global_world_size, 2) + mock_upload.assert_called_once_with( + gcs_path=GcsUri("gs://FAKE BUCKET DNE/some-file.txt"), content="127.0.0.1" + ) + + @patch("gigl.common.utils.vertex_ai_context._ping_host_ip") + @patch("subprocess.check_output", return_value=b"127.0.0.1") + @patch("time.sleep", return_value=None) + @patch("gigl.common.utils.gcs.GcsUtils.read_from_gcs", return_value="127.0.0.1") + @patch("gigl.common.utils.gcs.GcsUtils.upload_from_string") + @patch.dict( + os.environ, + { + "RANK": "1", + "WORLD_SIZE": "2", + LEADER_WORKER_INTERNAL_IP_FILE_PATH_ENV_KEY: "gs://FAKE BUCKET DNE/some-file.txt", + "CLOUD_ML_JOB_ID": "test_job_id", + }, + ) + def test_connect_worker_pool_worker( + self, mock_upload, mock_read, mock_sleep, mock_subprocess, mock_ping_host + ): + mock_ping_host.side_effect = [False, True] + distributed_context: DistributedContext = connect_worker_pool() + self.assertEqual(distributed_context.main_worker_ip_address, "127.0.0.1") + self.assertEqual(distributed_context.global_rank, 1) + self.assertEqual(distributed_context.global_world_size, 2) + mock_read.assert_has_calls( + [ + call(GcsUri("gs://FAKE BUCKET DNE/some-file.txt")), + call(GcsUri("gs://FAKE BUCKET DNE/some-file.txt")), + ] + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/src_tests/common_tests/graph_builder_tests/__init__.py b/python/tests/unit/distributed/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/graph_builder_tests/__init__.py rename to python/tests/unit/distributed/__init__.py diff --git a/python/tests/unit/distributed_tests/dataset_input_metadata_translator_test.py b/python/tests/unit/distributed/dataset_input_metadata_translator_test.py similarity index 77% rename from python/tests/unit/distributed_tests/dataset_input_metadata_translator_test.py rename to python/tests/unit/distributed/dataset_input_metadata_translator_test.py index 61cbde7..f4a80b9 100644 --- a/python/tests/unit/distributed_tests/dataset_input_metadata_translator_test.py +++ b/python/tests/unit/distributed/dataset_input_metadata_translator_test.py @@ -5,8 +5,8 @@ from parameterized import param, parameterized from gigl.common.data.dataloaders import SerializedTFRecordInfo -from gigl.distributed.utils.dataset_input_metadata_translator import ( - convert_pb_to_dataset_input_metadata, +from gigl.distributed.utils.serialized_graph_metadata_translator import ( + convert_pb_to_serialized_graph_metadata, ) from gigl.src.common.types.graph_data import EdgeType, NodeType from gigl.src.common.types.pb_wrappers.gbml_config import GbmlConfigPbWrapper @@ -22,11 +22,11 @@ ) -class DistRandomPartitionerTestCase(unittest.TestCase): +class TranslatorTestCase(unittest.TestCase): def setUp(self): - self._name_to_mocked_dataset_map: Dict[str, MockedDatasetArtifactMetadata] = ( - get_mocked_dataset_artifact_metadata() - ) + self._name_to_mocked_dataset_map: Dict[ + str, MockedDatasetArtifactMetadata + ] = get_mocked_dataset_artifact_metadata() def _assert_data_type_correctness( self, @@ -43,7 +43,7 @@ def _assert_data_type_correctness( expected_entity_types: Union[List[EdgeType], List[NodeType]], ): """ - Checks that each item in the provided dataset input metadata is correctly typed and, if heterogeneous, that edge types and node types are as expected. + Checks that each item in the provided serialized graph metadata is correctly typed and, if heterogeneous, that edge types and node types are as expected. Args: entity_info: Optional[ Union[ @@ -95,7 +95,7 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) graph_metadata_pb_wrapper = gbml_config_pb_wrapper.graph_metadata_pb_wrapper - dataset_input_metadata = convert_pb_to_dataset_input_metadata( + serialized_graph_metadata = convert_pb_to_serialized_graph_metadata( preprocessed_metadata_pb_wrapper=preprocessed_metadata_pb_wrapper, graph_metadata_pb_wrapper=graph_metadata_pb_wrapper, ) @@ -103,17 +103,17 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ## Node Entity Info Correctness self._assert_data_type_correctness( - dataset_input_metadata.node_entity_info, + serialized_graph_metadata.node_entity_info, is_heterogeneous=graph_metadata_pb_wrapper.is_heterogeneous, expected_entity_types=graph_metadata_pb_wrapper.node_types, ) - if isinstance(dataset_input_metadata.node_entity_info, abc.Mapping): + if isinstance(serialized_graph_metadata.node_entity_info, abc.Mapping): serialized_node_info_iterable = list( - dataset_input_metadata.node_entity_info.values() + serialized_graph_metadata.node_entity_info.values() ) else: - serialized_node_info_iterable = [dataset_input_metadata.node_entity_info] + serialized_node_info_iterable = [serialized_graph_metadata.node_entity_info] self.assertEqual( len(graph_metadata_pb_wrapper.node_types), @@ -128,6 +128,17 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) node_type ] ) + + node_id_key = preprocessed_metadata_pb_wrapper.preprocessed_metadata_pb.condensed_node_type_to_preprocessed_metadata[ + condensed_node_type + ].node_id_key + + target_node_feature_spec = preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_schema_map[ + condensed_node_type + ].feature_spec + + self.assertEqual(seralized_node_info.entity_key, node_id_key) + self.assertEqual( seralized_node_info.feature_dim, preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_dim_map[ @@ -148,25 +159,23 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) self.assertEqual( seralized_node_info.feature_spec, - preprocessed_metadata_pb_wrapper.condensed_node_type_to_feature_schema_map[ - condensed_node_type - ].feature_spec, + target_node_feature_spec, ) ## Edge Entity Info Correctness self._assert_data_type_correctness( - dataset_input_metadata.edge_entity_info, + serialized_graph_metadata.edge_entity_info, is_heterogeneous=graph_metadata_pb_wrapper.is_heterogeneous, expected_entity_types=graph_metadata_pb_wrapper.edge_types, ) - if isinstance(dataset_input_metadata.edge_entity_info, abc.Mapping): + if isinstance(serialized_graph_metadata.edge_entity_info, abc.Mapping): serialized_edge_info_iterable = list( - dataset_input_metadata.edge_entity_info.values() + serialized_graph_metadata.edge_entity_info.values() ) else: - serialized_edge_info_iterable = [dataset_input_metadata.edge_entity_info] + serialized_edge_info_iterable = [serialized_graph_metadata.edge_entity_info] self.assertEqual( len(graph_metadata_pb_wrapper.edge_types), @@ -186,6 +195,15 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) condensed_edge_type ] + target_edge_feature_spec = preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_schema_map[ + condensed_edge_type + ].feature_spec + + self.assertEqual( + seralized_edge_info.entity_key, + (edge_info.src_node_id_key, edge_info.dst_node_id_key), + ) + self.assertEqual( seralized_edge_info.feature_dim, preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_dim_map[ @@ -205,9 +223,7 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) self.assertEqual( seralized_edge_info.feature_spec, - preprocessed_metadata_pb_wrapper.condensed_edge_type_to_feature_schema_map[ - condensed_edge_type - ].feature_spec, + target_edge_feature_spec, ) ## Positive Label Entity Info Correctness @@ -222,19 +238,19 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) if has_positive_labels: self._assert_data_type_correctness( - dataset_input_metadata.positive_label_entity_info, + serialized_graph_metadata.positive_label_entity_info, is_heterogeneous=graph_metadata_pb_wrapper.is_heterogeneous, expected_entity_types=graph_metadata_pb_wrapper.edge_types, ) if isinstance( - dataset_input_metadata.positive_label_entity_info, abc.Mapping + serialized_graph_metadata.positive_label_entity_info, abc.Mapping ): serialized_positive_label_info_iterable = list( - dataset_input_metadata.positive_label_entity_info.values() + serialized_graph_metadata.positive_label_entity_info.values() ) else: serialized_positive_label_info_iterable = [ - dataset_input_metadata.positive_label_entity_info + serialized_graph_metadata.positive_label_entity_info ] self.assertEqual( @@ -257,6 +273,15 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) condensed_edge_type ] + target_pos_edge_feature_spec = preprocessed_metadata_pb_wrapper.condensed_edge_type_to_pos_edge_feature_schema_map[ + condensed_edge_type + ].feature_spec + + self.assertEqual( + seralized_positive_label_info.entity_key, + (edge_info.src_node_id_key, edge_info.dst_node_id_key), + ) + self.assertEqual( seralized_positive_label_info.feature_dim, preprocessed_metadata_pb_wrapper.condensed_edge_type_to_pos_edge_feature_dim_map[ @@ -276,14 +301,12 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) self.assertEqual( seralized_positive_label_info.feature_spec, - preprocessed_metadata_pb_wrapper.condensed_edge_type_to_pos_edge_feature_schema_map[ - condensed_edge_type - ].feature_spec, + target_pos_edge_feature_spec, ) else: self.assertIsNone(seralized_positive_label_info) else: - self.assertIsNone(dataset_input_metadata.positive_label_entity_info) + self.assertIsNone(serialized_graph_metadata.positive_label_entity_info) ## Negative Label Entity Info Correctness @@ -297,19 +320,19 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) if has_negative_labels: self._assert_data_type_correctness( - dataset_input_metadata.negative_label_entity_info, + serialized_graph_metadata.negative_label_entity_info, is_heterogeneous=graph_metadata_pb_wrapper.is_heterogeneous, expected_entity_types=graph_metadata_pb_wrapper.edge_types, ) if isinstance( - dataset_input_metadata.negative_label_entity_info, abc.Mapping + serialized_graph_metadata.negative_label_entity_info, abc.Mapping ): serialized_negative_label_info_iterable = list( - dataset_input_metadata.negative_label_entity_info.values() + serialized_graph_metadata.negative_label_entity_info.values() ) else: serialized_negative_label_info_iterable = [ - dataset_input_metadata.negative_label_entity_info + serialized_graph_metadata.negative_label_entity_info ] self.assertEqual( @@ -332,6 +355,15 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) condensed_edge_type ] + target_hard_neg_edge_feature_spec = preprocessed_metadata_pb_wrapper.condensed_edge_type_to_hard_neg_edge_feature_schema_map[ + condensed_edge_type + ].feature_spec + + self.assertEqual( + serialized_negative_label_info.entity_key, + (edge_info.src_node_id_key, edge_info.dst_node_id_key), + ) + self.assertEqual( serialized_negative_label_info.feature_dim, preprocessed_metadata_pb_wrapper.condensed_edge_type_to_hard_neg_edge_feature_dim_map[ @@ -351,11 +383,9 @@ def test_translator_correctness(self, _, mocked_dataset_info: MockedDatasetInfo) ) self.assertEqual( serialized_negative_label_info.feature_spec, - preprocessed_metadata_pb_wrapper.condensed_edge_type_to_hard_neg_edge_feature_schema_map[ - condensed_edge_type - ].feature_spec, + target_hard_neg_edge_feature_spec, ) else: self.assertIsNone(serialized_negative_label_info) else: - self.assertIsNone(dataset_input_metadata.negative_label_entity_info) + self.assertIsNone(serialized_graph_metadata.negative_label_entity_info) diff --git a/python/tests/unit/distributed/distributed_neighborloader_test.py b/python/tests/unit/distributed/distributed_neighborloader_test.py new file mode 100644 index 0000000..2eb054e --- /dev/null +++ b/python/tests/unit/distributed/distributed_neighborloader_test.py @@ -0,0 +1,105 @@ +import unittest +from collections import abc +from typing import MutableMapping + +import graphlearn_torch as glt +import torch +import torch.distributed.rpc +from torch.multiprocessing import Manager +from torch_geometric.data import Data, HeteroData + +from gigl.distributed.dist_context import DistributedContext +from gigl.distributed.dist_link_prediction_dataset import DistLinkPredictionDataset +from gigl.distributed.distributed_neighborloader import DistNeighborLoader +from gigl.src.common.types.graph_data import NodeType +from gigl.src.mocking.mocking_assets.mocked_datasets_for_pipeline_tests import ( + CORA_NODE_ANCHOR_MOCKED_DATASET_INFO, + DBLP_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, +) +from tests.test_assets.distributed.run_distributed_dataset import ( + run_distributed_dataset, +) + + +class DistributedNeighborLoaderTest(unittest.TestCase): + def setUp(self): + self._master_ip_address = "localhost" + self._world_size = 1 + self._num_rpc_threads = 4 + + self._context = DistributedContext( + main_worker_ip_address=self._master_ip_address, + global_rank=0, + global_world_size=self._world_size, + ) + + def test_distributed_neighbor_loader(self): + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + + dataset = run_distributed_dataset( + rank=0, + world_size=self._world_size, + mocked_dataset_info=CORA_NODE_ANCHOR_MOCKED_DATASET_INFO, + output_dict=output_dict, + should_load_tensors_in_parallel=True, + master_ip_address=self._master_ip_address, + master_port=master_port, + ) + + loader = DistNeighborLoader( + dataset=dataset, + num_neighbors=[2, 2], + context=self._context, + local_process_rank=0, + local_process_world_size=1, + pin_memory_device=torch.device("cpu"), + ) + + count = 0 + for datum in loader: + self.assertIsInstance(datum, Data) + count += 1 + + # Cora has 2708 nodes, make sure we go over all of them. + # https://paperswithcode.com/dataset/cora + self.assertEqual(count, 2708) + + @unittest.skip("Failing on Google Cloud Build - skiping for now") + def test_distributed_neighbor_loader_heterogeneous(self): + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + + dataset = run_distributed_dataset( + rank=0, + world_size=self._world_size, + mocked_dataset_info=DBLP_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + output_dict=output_dict, + should_load_tensors_in_parallel=True, + master_ip_address=self._master_ip_address, + master_port=master_port, + ) + + assert isinstance(dataset.node_ids, abc.Mapping) + loader = DistNeighborLoader( + dataset=dataset, + input_nodes=(NodeType("author"), dataset.node_ids[NodeType("author")]), + num_neighbors=[2, 2], + context=self._context, + local_process_rank=0, + local_process_world_size=1, + pin_memory_device=torch.device("cpu"), + ) + + count = 0 + for datum in loader: + self.assertIsInstance(datum, HeteroData) + count += 1 + + self.assertEqual(count, 4057) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/distributed_tests/distributed_partitioner_test.py b/python/tests/unit/distributed/distributed_partitioner_test.py similarity index 82% rename from python/tests/unit/distributed_tests/distributed_partitioner_test.py rename to python/tests/unit/distributed/distributed_partitioner_test.py index aa41764..cf4ac39 100644 --- a/python/tests/unit/distributed_tests/distributed_partitioner_test.py +++ b/python/tests/unit/distributed/distributed_partitioner_test.py @@ -2,21 +2,22 @@ import unittest from collections import abc, defaultdict -from typing import Dict, Iterable, List, MutableMapping, Tuple, Union +from typing import Dict, Iterable, List, MutableMapping, Optional, Tuple, Union import graphlearn_torch as glt import torch import torch.multiprocessing as mp from graphlearn_torch.distributed import init_rpc, init_worker_group +from graphlearn_torch.partition import PartitionBook from parameterized import param, parameterized from torch.multiprocessing import Manager -from gigl.distributed.partitioner.dist_link_prediction_data_partitioner import ( +from gigl.distributed.dist_link_prediction_data_partitioner import ( DistLinkPredictionDataPartitioner, ) +from gigl.distributed.utils import get_process_group_name from gigl.src.common.types.graph_data import EdgeType, NodeType from gigl.types.distributed import ( - EdgeAssignStrategy, FeaturePartitionData, GraphPartitionData, PartitionOutput, @@ -51,6 +52,9 @@ def _assert_data_type_correctness( torch.Tensor, Dict[NodeType, torch.Tensor], Dict[EdgeType, torch.Tensor], + PartitionBook, + Dict[NodeType, PartitionBook], + Dict[EdgeType, PartitionBook], FeaturePartitionData, Dict[NodeType, FeaturePartitionData], Dict[EdgeType, FeaturePartitionData], @@ -67,6 +71,9 @@ def _assert_data_type_correctness( torch.Tensor, Dict[NodeType, torch.Tensor], Dict[EdgeType, torch.Tensor], + PartitionBook, + Dict[NodeType, PartitionBook], + Dict[EdgeType, PartitionBook], FeaturePartitionData, Dict[NodeType, FeaturePartitionData], Dict[EdgeType, FeaturePartitionData], @@ -89,9 +96,9 @@ def _assert_graph_outputs( self, rank: int, is_heterogeneous: bool, - edge_assign_strategy: EdgeAssignStrategy, - output_node_partition_book: Union[torch.Tensor, Dict[NodeType, torch.Tensor]], - output_edge_partition_book: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]], + should_assign_edges_by_src_node: bool, + output_node_partition_book: Union[PartitionBook, Dict[NodeType, PartitionBook]], + output_edge_partition_book: Union[PartitionBook, Dict[EdgeType, PartitionBook]], output_edge_index: Union[ GraphPartitionData, Dict[EdgeType, GraphPartitionData] ], @@ -103,9 +110,9 @@ def _assert_graph_outputs( Args: rank (int): Rank from current output is_heterogeneous (bool): Whether the output is expected to be homogeneous or heterogeneous - edge_assign_strategy (EdgeAssignStrategy): Whether to partion edges according to the partition book of the source node or destination node - output_node_partition_book (Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): Node Partition Book from partitioning, either a Tensor if homogeneous or a Dict[NodeType, Tensor] if heterogeneous - output_edge_partition_book (Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]): Edge Partition Book from partitioning, either a Tensor if homogeneous or a Dict[EdgeType, Tensor] if heterogeneous + should_assign_edges_by_src_node (bool): Whether to partion edges according to the partition book of the source node or destination node + output_node_partition_book (Union[PartitionBook, Dict[NodeType, PartitionBook]]): Node Partition Book from partitioning, either a PartitionBook if homogeneous or a Dict[NodeType, PartitionBook] if heterogeneous + output_edge_partition_book (Union[PartitionBook, Dict[EdgeType, PartitionBook]]): Edge Partition Book from partitioning, either a PartitionBook if homogeneous or a Dict[EdgeType, PartitionBook] if heterogeneous output_edge_index: (Union[GraphPartitionData, Dict[EdgeType, GraphPartitionData]]): Output edge indices and ids from partitioning, either a GraphPartitionData if homogeneous or a Dict[EdgeType, GraphPartitionData] if heterogeneous expected_node_types (List[NodeType]): Expected node types for heterogeneous input expected_edge_types (List[EdgeType]): Expected edge types for heterogeneous input @@ -127,10 +134,11 @@ def _assert_graph_outputs( ) # To unify logic between homogeneous and heterogeneous cases, we define an iterable which we'll loop over. # Each iteration contains an EdgeType, an edge partition book, and a graph consisting of edge indices and ids. - entity_iterable: Iterable[Tuple[EdgeType, torch.Tensor, GraphPartitionData]] + entity_iterable: Iterable[Tuple[EdgeType, PartitionBook, GraphPartitionData]] if isinstance(output_edge_partition_book, abc.Mapping) and isinstance( output_edge_index, abc.Mapping ): + # TODO (mkolodner-sc): Implement heterogeneous range-based partitioning entity_iterable = [ ( edge_type, @@ -146,19 +154,23 @@ def _assert_graph_outputs( entity_iterable = [ (USER_TO_USER_EDGE_TYPE, output_edge_partition_book, output_edge_index) ] + elif isinstance(output_edge_partition_book, PartitionBook): + raise NotImplementedError( + "TODO(mkolodner-sc): Implement range based partitioning" + ) else: raise ValueError( f"The output edge partition book of type {type(output_edge_partition_book)} and the output graph of type {type(output_edge_index)} are not compatible." ) for edge_type, edge_partition_book, graph in entity_iterable: - node_partition_book: torch.Tensor + node_partition_book: PartitionBook node_ids: torch.Tensor self.assertEqual(graph.edge_index.size(0), 2) # We take the unique items in either source or destination nodes, as source/destination node ids which # repeat across multiple edges will still only take up one slot in the partition book. - if edge_assign_strategy == EdgeAssignStrategy.BY_SOURCE_NODE: + if should_assign_edges_by_src_node: target_node_type = edge_type.src_node_type node_ids = torch.unique(graph.edge_index[0]) else: @@ -219,7 +231,7 @@ def _assert_node_feature_outputs( self, rank: int, is_heterogeneous: bool, - edge_assign_strategy: EdgeAssignStrategy, + should_assign_edges_by_src_node: bool, output_graph: Union[GraphPartitionData, Dict[EdgeType, GraphPartitionData]], output_node_feat: Union[ FeaturePartitionData, Dict[NodeType, FeaturePartitionData] @@ -232,7 +244,7 @@ def _assert_node_feature_outputs( Args: rank (int): Rank from current output is_heterogeneous (bool): Whether the output is expected to be homogeneous or heterogeneous - edge_assign_strategy (EdgeAssignStrategy): Whether to partion edges according to the partition book of the source node or destination node + should_assign_edges_by_src_node (bool): Whether to partion edges according to the partition book of the source node or destination node output_graph: (Union[GraphPartitionData, Dict[EdgeType, GraphPartitionData]]): Output edge indices and ids from partitioning, either a GraphPartitionData if homogeneous or a Dict[EdgeType, GraphPartitionData] if heterogeneous output_node_feat (Union[FeaturePartitionData, Dict[NodeType, FeaturePartitionData]]): Output node features from partitioning, either a FeaturePartitionData if homogeneous or a Dict[NodeType, FeaturePartitionData] if heterogeneous expected_node_types (List[NodeType]): Expected node types for heterogeneous input @@ -264,9 +276,9 @@ def _assert_node_feature_outputs( entity_iterable = [(USER_TO_USER_EDGE_TYPE, output_graph)] for edge_type, graph in entity_iterable: - node_feat: FeaturePartitionData + node_feat: Optional[FeaturePartitionData] node_ids: torch.Tensor - if edge_assign_strategy == EdgeAssignStrategy.BY_SOURCE_NODE: + if should_assign_edges_by_src_node: target_node_type = edge_type.src_node_type node_ids = torch.unique(graph.edge_index[0]) else: @@ -318,7 +330,7 @@ def _assert_edge_feature_outputs( self, rank: int, is_heterogeneous: bool, - edge_assign_strategy: EdgeAssignStrategy, + should_assign_edges_by_src_node: bool, output_graph: Union[GraphPartitionData, Dict[EdgeType, GraphPartitionData]], output_edge_feat: Union[ FeaturePartitionData, Dict[EdgeType, FeaturePartitionData] @@ -330,7 +342,7 @@ def _assert_edge_feature_outputs( Args: rank (int): Rank from current output is_heterogeneous (bool): Whether the output is expected to be homogeneous or heterogeneous - edge_assign_strategy (EdgeAssignStrategy): Whether to partion edges according to the partition book of the source node or destination node + should_assign_edges_by_src_node (bool): Whether to partion edges according to the partition book of the source node or destination node output_graph: (Union[GraphPartitionData, Dict[EdgeType, GraphPartitionData]]): Output edge indices and ids from partitioning, either a GraphPartitionData if homogeneous or a Dict[EdgeType, GraphPartitionData] if heterogeneous output_edge_feat (Union[FeaturePartitionData, Dict[EdgeType, FeaturePartitionData]]): Output node features from partitioning, either a FeaturePartitionData if homogeneous or a Dict[EdgeType, FeaturePartitionData] if heterogeneous expected_edge_types (List[EdgeType]): Expected edge types for heterogeneous input @@ -343,13 +355,13 @@ def _assert_edge_feature_outputs( self._assert_data_type_correctness( output_data=output_edge_feat, is_heterogeneous=is_heterogeneous, - expected_entity_types=expected_edge_types, + expected_entity_types=[USER_TO_USER_EDGE_TYPE], ) # To unify logic between homogeneous and heterogeneous cases, we define an iterable which we'll loop over. # Each iteration contains an EdgeType, a feature object containing edge features and edge ids, and a graph consisting of edge indices and ids. entity_iterable: Iterable[ - Tuple[EdgeType, FeaturePartitionData, GraphPartitionData] + Tuple[EdgeType, Optional[FeaturePartitionData], GraphPartitionData] ] if is_heterogeneous: assert isinstance( @@ -359,12 +371,12 @@ def _assert_edge_feature_outputs( output_graph, abc.Mapping ), "Homogeneous output detected from graph for heterogeneous input" entity_iterable = [ - item - for item in zip( - MOCKED_HETEROGENEOUS_EDGE_TYPES, - output_edge_feat.values(), - output_graph.values(), + ( + edge_type, + output_edge_feat.get(edge_type, None), + output_graph[edge_type], ) + for edge_type in MOCKED_HETEROGENEOUS_EDGE_TYPES ] else: assert isinstance( @@ -377,54 +389,60 @@ def _assert_edge_feature_outputs( entity_iterable = [(USER_TO_USER_EDGE_TYPE, output_edge_feat, output_graph)] for edge_type, edge_feat, graph in entity_iterable: - if edge_assign_strategy == EdgeAssignStrategy.BY_SOURCE_NODE: - target_node_type = edge_type.src_node_type + if edge_feat is None: + self.assertTrue( + edge_type not in RANK_TO_MOCKED_GRAPH[rank].edge_features + ) else: - target_node_type = edge_type.dst_node_type + if should_assign_edges_by_src_node: + target_node_type = edge_type.src_node_type + else: + target_node_type = edge_type.dst_node_type - num_nodes_on_rank: int = RANK_TO_NODE_TYPE_TYPE_TO_NUM_NODES[rank][ - target_node_type - ] + num_nodes_on_rank: int = RANK_TO_NODE_TYPE_TYPE_TO_NUM_NODES[rank][ + target_node_type + ] - # We expect the number of edge feats on the current rank to be the same as the number of input nodes to the partitioner on the current rank. This is because - # the number of edges should be equal to the number of target nodes assigned to each rank and the number of edge feats should be equal to the number of edges. + # We expect the number of edge feats on the current rank to be the same as the number of input nodes to the partitioner on the current rank. This is because + # the number of edges should be equal to the number of target nodes assigned to each rank and the number of edge feats should be equal to the number of edges. - if target_node_type == ITEM_NODE_TYPE: - # If the target_node_type is ITEM, we expect there to be twice as many edges as nodes since item node is the destination node of two edges, - # and therefore twice as many edge features - self.assertEqual(edge_feat.feats.size(0), num_nodes_on_rank * 2) - else: - # Otherwise, we expect there to be the same number of edge features and nodes, as each user node is the source and destination node of exactly one edge. - self.assertEqual(edge_feat.feats.size(0), num_nodes_on_rank) + if target_node_type == ITEM_NODE_TYPE: + # If the target_node_type is ITEM, we expect there to be twice as many edges as nodes since item node is the destination node of two edges, + # and therefore twice as many edge features + self.assertEqual(edge_feat.feats.size(0), num_nodes_on_rank * 2) + else: + # Otherwise, we expect there to be the same number of edge features and nodes, as each user node is the source and destination node of exactly one edge. + self.assertEqual(edge_feat.feats.size(0), num_nodes_on_rank) - # We expect the edge ids on the current rank from the graph to be the same as the edge ids on the current rank from the features - assert_tensor_equality( - tensor_a=graph.edge_ids, tensor_b=edge_feat.ids, dim=0 - ) + # We expect the edge ids on the current rank from the graph to be the same as the edge ids on the current rank from the features + assert_tensor_equality( + tensor_a=graph.edge_ids, tensor_b=edge_feat.ids, dim=0 + ) - # We expect the shape of the edge features to be equal to the expected edge feature dimension - self.assertEqual( - edge_feat.feats.size(1), EDGE_TYPE_TO_FEATURE_DIMENSION_MAP[edge_type] - ) + # We expect the shape of the edge features to be equal to the expected edge feature dimension + self.assertEqual( + edge_feat.feats.size(1), + EDGE_TYPE_TO_FEATURE_DIMENSION_MAP[edge_type], + ) - # We expect the value of each edge feature to be equal to its corresponding edge id / 10 on the currently mocked input - for idx, e_id in enumerate(edge_feat.ids): - assert_tensor_equality( - tensor_a=edge_feat.feats[idx], - tensor_b=torch.ones( - EDGE_TYPE_TO_FEATURE_DIMENSION_MAP[edge_type], - dtype=torch.float32, + # We expect the value of each edge feature to be equal to its corresponding edge id / 10 on the currently mocked input + for idx, e_id in enumerate(edge_feat.ids): + assert_tensor_equality( + tensor_a=edge_feat.feats[idx], + tensor_b=torch.ones( + EDGE_TYPE_TO_FEATURE_DIMENSION_MAP[edge_type], + dtype=torch.float32, + ) + * e_id + * 0.1, ) - * e_id - * 0.1, - ) def _assert_label_outputs( self, rank: int, is_heterogeneous: bool, - edge_assign_strategy: EdgeAssignStrategy, - output_node_partition_book: Union[torch.Tensor, Dict[NodeType, torch.Tensor]], + should_assign_edges_by_src_node: bool, + output_node_partition_book: Union[PartitionBook, Dict[NodeType, PartitionBook]], output_labeled_edge_index: Union[torch.Tensor, Dict[EdgeType, torch.Tensor]], expected_edge_types: List[EdgeType], ) -> None: @@ -433,8 +451,8 @@ def _assert_label_outputs( Args: rank (int): Rank from current output is_heterogeneous (bool): Whether the output is expected to be homogeneous or heterogeneous - edge_assign_strategy (EdgeAssignStrategy): Whether to partion edges according to the partition book of the source node or destination node - output_node_partition_book: (Union[torch.Tensor, Dict[NodeType, torch.Tensor]]): Node Partition Book from partitioning, either a Tensor if homogeneous or a Dict[NodeType, Tensor] if heterogeneous + should_assign_edges_by_src_node (bool): Whether to partion edges according to the partition book of the source node or destination node + output_node_partition_book: (Union[PartitionBook, Dict[NodeType, PartitionBook]]): Node Partition Book from partitioning, either a PartitionBook if homogeneous or a Dict[NodeType, PartitionBook] if heterogeneous output_labeled_edge_index (Union[torch.Tensor, Dict[EdgeType, torch.Tensor]]): Output labeled edges from partitioning, either a FeaturePartitionData if homogeneous or a Dict[EdgeType, FeaturePartitionData] if heterogeneous expected_edge_types (List[EdgeType]): Expected edge types for heterogeneous input """ @@ -458,9 +476,9 @@ def _assert_label_outputs( entity_iterable = [(USER_TO_USER_EDGE_TYPE, output_labeled_edge_index)] for edge_type, labeled_edge_index in entity_iterable: - node_partition_book: torch.Tensor + node_partition_book: PartitionBook - if edge_assign_strategy == EdgeAssignStrategy.BY_SOURCE_NODE: + if should_assign_edges_by_src_node: target_node_type = edge_type.src_node_type target_nodes = labeled_edge_index[0] else: @@ -474,7 +492,7 @@ def _assert_label_outputs( node_partition_book = output_node_partition_book[target_node_type] else: assert isinstance( - output_node_partition_book, torch.Tensor + output_node_partition_book, (torch.Tensor, PartitionBook) ), "Heterogeneous node partition book detected for homogeneous input" node_partition_book = output_node_partition_book @@ -497,53 +515,53 @@ def _assert_label_outputs( "Homogeneous Partitioning by Source Node - Register All Entites together through Constructor", is_heterogeneous=False, input_data_strategy=InputDataStrategy.REGISTER_ALL_ENTITIES_TOGETHER, - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=True, ), param( "Heterogeneous Partitioning By Source Node - Register All Entites together through Constructor", is_heterogeneous=True, input_data_strategy=InputDataStrategy.REGISTER_ALL_ENTITIES_TOGETHER, - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=True, ), param( "Homogeneous Partitioning By Dest Node- Register All Entites together through Constructor", is_heterogeneous=False, input_data_strategy=InputDataStrategy.REGISTER_ALL_ENTITIES_TOGETHER, - edge_assign_strategy=EdgeAssignStrategy.BY_DESTINATION_NODE, + should_assign_edges_by_src_node=False, ), param( "Heterogeneous Partitioning By Dest Node- Register All Entites together through Constructor", is_heterogeneous=True, input_data_strategy=InputDataStrategy.REGISTER_ALL_ENTITIES_TOGETHER, - edge_assign_strategy=EdgeAssignStrategy.BY_DESTINATION_NODE, + should_assign_edges_by_src_node=False, ), param( "Homogeneous Partitioning By Source Node - Register All Entites separately through register functions", is_heterogeneous=False, input_data_strategy=InputDataStrategy.REGISTER_ALL_ENTITIES_SEPARATELY, - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=False, ), param( "Homogeneous Partitioning By Source Node - Register minimal entities separately through register functions", is_heterogeneous=False, input_data_strategy=InputDataStrategy.REGISTER_MINIMAL_ENTITIES_SEPARATELY, - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=False, ), ] ) - def test_partitioning_correctness( + def _test_partitioning_correctness( self, _, is_heterogeneous: bool, input_data_strategy: InputDataStrategy, - edge_assign_strategy: EdgeAssignStrategy, + should_assign_edges_by_src_node: bool, ) -> None: """ Tests partitioning functionality and correctness on mocked inputs Args: is_heterogeneous (bool): Whether homogeneous or heterogeneous inputs should be used input_data_strategy (InputDataStrategy): Strategy for registering inputs to the partitioner - edge_assign_strategy (EdgeAssignStrategy): Whether to partion edges according to the partition book of the source node or destination node + should_assign_edges_by_src_node (bool): Whether to partion edges according to the partition book of the source node or destination node """ master_port = glt.utils.get_free_port(self._master_ip_address) @@ -558,7 +576,7 @@ def test_partitioning_correctness( output_dict, is_heterogeneous, mocked_input_graph, - edge_assign_strategy, + should_assign_edges_by_src_node, self._master_ip_address, master_port, input_data_strategy, @@ -580,21 +598,23 @@ def test_partitioning_correctness( unified_output_neg_label: Dict[EdgeType, List[torch.Tensor]] = defaultdict(list) for rank, partition_output in output_dict.items(): + partitioned_edge_index = partition_output.partitioned_edge_index + assert partitioned_edge_index is not None self._assert_graph_outputs( rank=rank, is_heterogeneous=is_heterogeneous, - edge_assign_strategy=edge_assign_strategy, + should_assign_edges_by_src_node=should_assign_edges_by_src_node, output_node_partition_book=partition_output.node_partition_book, output_edge_partition_book=partition_output.edge_partition_book, - output_edge_index=partition_output.partitioned_edge_index, + output_edge_index=partitioned_edge_index, expected_node_types=MOCKED_HETEROGENEOUS_NODE_TYPES, expected_edge_types=MOCKED_HETEROGENEOUS_EDGE_TYPES, ) - if isinstance(partition_output.partitioned_edge_index, abc.Mapping): - for edge_type, graph in partition_output.partitioned_edge_index.items(): + if isinstance(partitioned_edge_index, abc.Mapping): + for edge_type, graph in partitioned_edge_index.items(): unified_output_edge_index[edge_type].append(graph.edge_index) else: - graph = partition_output.partitioned_edge_index + graph = partitioned_edge_index unified_output_edge_index[USER_TO_USER_EDGE_TYPE].append( graph.edge_index ) @@ -626,8 +646,8 @@ def test_partitioning_correctness( self._assert_node_feature_outputs( rank=rank, is_heterogeneous=is_heterogeneous, - edge_assign_strategy=edge_assign_strategy, - output_graph=partition_output.partitioned_edge_index, + should_assign_edges_by_src_node=should_assign_edges_by_src_node, + output_graph=partitioned_edge_index, output_node_feat=partition_output.partitioned_node_features, expected_node_types=MOCKED_HETEROGENEOUS_NODE_TYPES, expected_edge_types=MOCKED_HETEROGENEOUS_EDGE_TYPES, @@ -647,8 +667,8 @@ def test_partitioning_correctness( self._assert_edge_feature_outputs( rank=rank, is_heterogeneous=is_heterogeneous, - edge_assign_strategy=edge_assign_strategy, - output_graph=partition_output.partitioned_edge_index, + should_assign_edges_by_src_node=should_assign_edges_by_src_node, + output_graph=partitioned_edge_index, output_edge_feat=partition_output.partitioned_edge_features, expected_edge_types=MOCKED_HETEROGENEOUS_EDGE_TYPES, ) @@ -669,7 +689,7 @@ def test_partitioning_correctness( rank=rank, is_heterogeneous=is_heterogeneous, output_node_partition_book=partition_output.node_partition_book, - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=True, output_labeled_edge_index=partition_output.partitioned_positive_labels, expected_edge_types=MOCKED_HETEROGENEOUS_EDGE_TYPES, ) @@ -691,7 +711,7 @@ def test_partitioning_correctness( rank=rank, is_heterogeneous=is_heterogeneous, output_node_partition_book=partition_output.node_partition_book, - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=True, output_labeled_edge_index=partition_output.partitioned_negative_labels, expected_edge_types=MOCKED_HETEROGENEOUS_EDGE_TYPES, ) @@ -728,13 +748,14 @@ def test_partitioning_correctness( for node_type in unified_output_node_feat: # First, we get the expected node features from the mocked input for this node type expected_node_feat = MOCKED_UNIFIED_GRAPH.node_features[node_type] + partitioned_node_feat_list = unified_output_node_feat[node_type] # We combine the output node features across all the ranks - output_node_feat = torch.cat(unified_output_node_feat[node_type], dim=0) + partitioned_node_feat = torch.cat(partitioned_node_feat_list, dim=0) # Finally, we check that the expected tensor and output tensor have the same rows, which is achieved by setting the shuffle dimension to 0 assert_tensor_equality( - tensor_a=expected_node_feat, tensor_b=output_node_feat, dim=0 + tensor_a=expected_node_feat, tensor_b=partitioned_node_feat, dim=0 ) ## Checking for the union of edge features across all ranks equals to the full set from the input @@ -742,13 +763,14 @@ def test_partitioning_correctness( for edge_type in unified_output_edge_feat: # First, we get the expected edge features from the mocked input for this edge type expected_edge_feat = MOCKED_UNIFIED_GRAPH.edge_features[edge_type] + partitioned_edge_feat_list = unified_output_edge_feat[edge_type] # We combine the output edge features across all the ranks - output_edge_feat = torch.cat(unified_output_edge_feat[edge_type], dim=0) + partitioned_edge_feat = torch.cat(partitioned_edge_feat_list, dim=0) # Finally, we check that the expected tensor and output tensor have the same rows, which is achieved by setting the shuffle dimension to 0 assert_tensor_equality( - tensor_a=expected_edge_feat, tensor_b=output_edge_feat, dim=0 + tensor_a=expected_edge_feat, tensor_b=partitioned_edge_feat, dim=0 ) for edge_type in unified_output_pos_label: @@ -784,7 +806,7 @@ def test_partitioning_failure(self) -> None: node_ids = input_graph.node_ids[USER_NODE_TYPE] node_features = input_graph.node_features[USER_NODE_TYPE] - init_worker_group(world_size=1, rank=rank) + init_worker_group(world_size=1, rank=rank, group_name=get_process_group_name(0)) init_rpc( master_addr=self._master_ip_address, master_port=master_port, @@ -792,7 +814,7 @@ def test_partitioning_failure(self) -> None: ) partitioner = DistLinkPredictionDataPartitioner( - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=True, ) # Assert that calling partition without any registering raises error @@ -823,7 +845,7 @@ def test_partitioning_failure(self) -> None: partitioner.partition_node() partitioner = DistLinkPredictionDataPartitioner( - edge_assign_strategy=EdgeAssignStrategy.BY_SOURCE_NODE, + should_assign_edges_by_src_node=True, ) empty_node_ids = torch.empty(0) empty_edge_index = torch.empty((2, 0)) diff --git a/python/tests/unit/distributed/load_and_build_dataset_test.py b/python/tests/unit/distributed/load_and_build_dataset_test.py new file mode 100644 index 0000000..3541758 --- /dev/null +++ b/python/tests/unit/distributed/load_and_build_dataset_test.py @@ -0,0 +1,460 @@ +import unittest +from collections import abc +from typing import Any, MutableMapping, Optional, Union + +import graphlearn_torch as glt +import torch +from parameterized import param, parameterized +from torch.multiprocessing import Manager +from torch.testing import assert_close + +from gigl.distributed import ( + DistLinkPredictionDataPartitioner, + DistLinkPredictionDataset, +) +from gigl.src.common.types.graph_data import EdgeType, NodeType +from gigl.src.mocking.mocking_assets.mocked_datasets_for_pipeline_tests import ( + HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, +) +from gigl.types.distributed import PartitionOutput +from gigl.utils.data_splitters import NodeAnchorLinkSplitter +from tests.test_assets.distributed.run_distributed_dataset import ( + run_distributed_dataset, +) + +_PARTITIONER_ERROR_MESSAGE = "Test partitioner subclass not implemented" +_DATASET_ERROR_MESSAGE = "Test dataset subclass not implemented" + + +class _PartitionerTestingSubclass(DistLinkPredictionDataPartitioner): + def partition(self) -> PartitionOutput: + # This subclass raises a NotImplementedError so that we can identify this custom logic being used in place of + # the base DistLinkPredicitonDataPartitioner.partition() logic. + raise NotImplementedError(_PARTITIONER_ERROR_MESSAGE) + + +class _DatasetTestingSubclass(DistLinkPredictionDataset): + def build( + self, + partition_output: PartitionOutput, + splitter: Optional[NodeAnchorLinkSplitter] = None, + ) -> None: + # This subclass raises a NotImplementedError so that we can identify this custom logic being used in place of + # the base DistLinkPredictionDataset.build() logic. + raise NotImplementedError(_DATASET_ERROR_MESSAGE) + + +class _FakeSplitter: + def __init__( + self, + splits: Union[ + tuple[torch.Tensor, torch.Tensor, torch.Tensor], + dict[EdgeType, tuple[torch.Tensor, torch.Tensor, torch.Tensor]], + ], + ): + self.splits = splits + + def __call__(self, edge_index): + return self.splits + + +_USER = NodeType("user") +_STORY = NodeType("story") + + +class LoadAndBuildDatasetTestCase(unittest.TestCase): + def setUp(self): + self._master_ip_address = "localhost" + self._world_size = 1 + self._num_rpc_threads = 4 + + def assert_tensor_equal( + self, + actual: Optional[Union[torch.Tensor, abc.Mapping[Any, torch.Tensor]]], + expected: Optional[Union[torch.Tensor, abc.Mapping[Any, torch.Tensor]]], + ): + if type(actual) != type(expected): + self.fail(f"Expected type {type(expected)} but got {type(actual)}") + if isinstance(actual, dict) and isinstance(expected, dict): + self.assertEqual(actual.keys(), expected.keys()) + for key in actual.keys(): + assert_close(actual[key], expected[key], atol=0, rtol=0) + elif isinstance(actual, torch.Tensor) and isinstance(expected, torch.Tensor): + assert_close(actual, expected, atol=0, rtol=0) + + def test_load_and_build_dataset(self): + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + + dataset = run_distributed_dataset( + rank=0, + world_size=self._world_size, + mocked_dataset_info=TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + output_dict=output_dict, + should_load_tensors_in_parallel=True, + master_ip_address=self._master_ip_address, + master_port=master_port, + ) + + self.assertIsNone(dataset.train_node_ids) + self.assertIsNone(dataset.val_node_ids) + self.assertIsNone(dataset.test_node_ids) + self.assertIsInstance(dataset.node_ids, torch.Tensor) + + def test_load_build_and_split_dataset(self): + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + dataset = run_distributed_dataset( + rank=0, + world_size=self._world_size, + mocked_dataset_info=TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + output_dict=output_dict, + should_load_tensors_in_parallel=True, + master_ip_address=self._master_ip_address, + master_port=master_port, + splitter=_FakeSplitter( + ( + torch.tensor([1000]), + torch.tensor([2000, 3000]), + torch.tensor([3000, 4000, 5000]), + ), + ), + ) + + self.assert_tensor_equal(dataset.train_node_ids, torch.tensor([1000])) + self.assert_tensor_equal(dataset.val_node_ids, torch.tensor([2000, 3000])) + self.assert_tensor_equal( + dataset.test_node_ids, torch.tensor([3000, 4000, 5000]) + ) + # Check that the node ids have *all* node ids, including nodes not included in train, val, and test. + self.assert_tensor_equal( + dataset.node_ids, + torch.tensor( + [ + 1000, + 2000, + 3000, + 3000, + 4000, + 5000, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + ] + ), + ) + + @parameterized.expand( + [ + param( + "One supervision edge type", + splits={ + _USER: ( + torch.tensor([1000]), + torch.tensor([2000]), + torch.tensor([3000]), + ) + }, + expected_train_node_ids={_USER: torch.tensor([1000])}, + expected_val_node_ids={_USER: torch.tensor([2000])}, + expected_test_node_ids={_USER: torch.tensor([3000])}, + expected_node_ids={ + _USER: torch.tensor( + [ + 1000, + 2000, + 3000, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ] + ), + _STORY: torch.tensor( + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + ] + ), + }, + ), + param( + "One supervision edge type - different numbers of train-test-val", + splits={ + _USER: ( + torch.tensor([1000]), + torch.tensor([2000, 3000]), + torch.tensor([3000, 4000, 5000]), + ) + }, + expected_train_node_ids={_USER: torch.tensor([1000])}, + expected_val_node_ids={_USER: torch.tensor([2000, 3000])}, + expected_test_node_ids={_USER: torch.tensor([3000, 4000, 5000])}, + expected_node_ids={ + _USER: torch.tensor( + [ + 1000, + 2000, + 3000, + 3000, + 4000, + 5000, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ] + ), + _STORY: torch.tensor( + [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + ] + ), + }, + ), + param( + "Two supervision edge types - two target node types", + splits={ + _USER: ( + torch.tensor([1000]), + torch.tensor([2000]), + torch.tensor([3000]), + ), + _STORY: ( + torch.tensor([4000]), + torch.tensor([5000]), + torch.tensor([6000]), + ), + }, + expected_train_node_ids={ + _USER: torch.tensor([1000]), + _STORY: torch.tensor([4000]), + }, + expected_val_node_ids={ + _USER: torch.tensor([2000]), + _STORY: torch.tensor([5000]), + }, + expected_test_node_ids={ + _USER: torch.tensor([3000]), + _STORY: torch.tensor([6000]), + }, + expected_node_ids={ + _USER: torch.tensor( + [ + 1000, + 2000, + 3000, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + ] + ), + _STORY: torch.tensor( + [ + 4000, + 5000, + 6000, + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + ] + ), + }, + ), + ] + ) + def test_load_build_and_split_dataset_heterogeneous( + self, + _, + splits, + expected_train_node_ids, + expected_val_node_ids, + expected_test_node_ids, + expected_node_ids, + ): + master_port = glt.utils.get_free_port(self._master_ip_address) + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + dataset = run_distributed_dataset( + rank=0, + world_size=self._world_size, + mocked_dataset_info=HETEROGENEOUS_TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + output_dict=output_dict, + should_load_tensors_in_parallel=True, + master_ip_address=self._master_ip_address, + master_port=master_port, + splitter=_FakeSplitter(splits), + ) + + self.assert_tensor_equal(dataset.train_node_ids, expected_train_node_ids) + self.assert_tensor_equal(dataset.val_node_ids, expected_val_node_ids) + self.assert_tensor_equal(dataset.test_node_ids, expected_test_node_ids) + # Check that the node ids have *all* node ids, including nodes not included in train, val, and test. + self.assert_tensor_equal(dataset.node_ids, expected_node_ids) + + @parameterized.expand( + [ + param( + "Tests custom Partitioner subclasses can be used in `load_and_build_partitioned_dataset`", + should_use_custom_partitioner=True, + should_use_custom_dataset=False, + expected_error_message=_PARTITIONER_ERROR_MESSAGE, + ), + param( + "Tests custom Dataset subclasses can be used in `load_and_build_partitioned_dataset`", + should_use_custom_partitioner=False, + should_use_custom_dataset=True, + expected_error_message=_DATASET_ERROR_MESSAGE, + ), + ] + ) + def test_load_and_build_subclasses( + self, + _, + should_use_custom_partitioner: bool, + should_use_custom_dataset: bool, + expected_error_message: str, + ) -> None: + master_port = glt.utils.get_free_port(self._master_ip_address) + + manager = Manager() + output_dict: MutableMapping[int, DistLinkPredictionDataset] = manager.dict() + + if should_use_custom_partitioner: + partitioner = _PartitionerTestingSubclass() + else: + partitioner = None + + if should_use_custom_dataset: + dataset = _DatasetTestingSubclass(rank=0, world_size=1, edge_dir="in") + else: + dataset = None + + with self.assertRaisesRegex(NotImplementedError, expected_error_message): + run_distributed_dataset( + rank=0, + world_size=self._world_size, + mocked_dataset_info=TOY_GRAPH_NODE_ANCHOR_MOCKED_DATASET_INFO, + output_dict=output_dict, + should_load_tensors_in_parallel=True, + master_ip_address=self._master_ip_address, + master_port=master_port, + partitioner=partitioner, + dataset=dataset, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/distributed/partition_book_test.py b/python/tests/unit/distributed/partition_book_test.py new file mode 100644 index 0000000..ab99cc0 --- /dev/null +++ b/python/tests/unit/distributed/partition_book_test.py @@ -0,0 +1,51 @@ +import unittest +from typing import Dict + +import torch +from graphlearn_torch.partition import RangePartitionBook +from parameterized import param, parameterized + +from gigl.distributed.utils.partition_book import get_ids_on_rank + + +class PartitionBookTest(unittest.TestCase): + @parameterized.expand( + [ + param( + "Test getting ids for tensor-based partition book", + partition_book=torch.Tensor([0, 1, 1, 0, 3, 3, 2, 0, 1, 1]), + rank_to_expected_ids={ + 0: torch.Tensor([0, 3, 7]).to(torch.int64), + 1: torch.Tensor([1, 2, 8, 9]).to(torch.int64), + 2: torch.Tensor([6]).to(torch.int64), + 3: torch.Tensor([4, 5]).to(torch.int64), + }, + ), + param( + "Test getting ids for range-based partition book", + partition_book=RangePartitionBook( + partition_ranges=[(0, 4), (4, 5), (5, 10), (10, 13)], + partition_idx=0, + ), + rank_to_expected_ids={ + 0: torch.Tensor([0, 1, 2, 3]).to(torch.int64), + 1: torch.Tensor([4]).to(torch.int64), + 2: torch.Tensor([5, 6, 7, 8, 9]).to(torch.int64), + 3: torch.Tensor([10, 11, 12]).to(torch.int64), + }, + ), + ] + ) + def test_getting_ids_on_rank( + self, + _, + partition_book: torch.Tensor, + rank_to_expected_ids: Dict[int, torch.Tensor], + ): + for rank, expected_ids in rank_to_expected_ids.items(): + output_ids = get_ids_on_rank(partition_book=partition_book, rank=rank) + torch.testing.assert_close(actual=output_ids, expected=expected_ids) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/src_tests/common_tests/modeling_task_spec_utils_tests/__init__.py b/python/tests/unit/gnn_library/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/modeling_task_spec_utils_tests/__init__.py rename to python/tests/unit/gnn_library/__init__.py diff --git a/python/tests/unit/gnn_library_tests/pyg_training_test.py b/python/tests/unit/gnn_library/pyg_training_test.py similarity index 100% rename from python/tests/unit/gnn_library_tests/pyg_training_test.py rename to python/tests/unit/gnn_library/pyg_training_test.py diff --git a/python/tests/unit/src_tests/common_tests/models_tests/__init__.py b/python/tests/unit/orchestration/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/models_tests/__init__.py rename to python/tests/unit/orchestration/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/models_tests/layers_tests/__init__.py b/python/tests/unit/orchestration/kubeflow/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/models_tests/layers_tests/__init__.py rename to python/tests/unit/orchestration/kubeflow/__init__.py diff --git a/python/tests/unit/orchestration/kubeflow/kfp_orchestrator_test.py b/python/tests/unit/orchestration/kubeflow/kfp_orchestrator_test.py new file mode 100644 index 0000000..a8deb77 --- /dev/null +++ b/python/tests/unit/orchestration/kubeflow/kfp_orchestrator_test.py @@ -0,0 +1,32 @@ +import unittest +from unittest.mock import ANY, patch + +from gigl.common import GcsUri +from gigl.common.logger import Logger +from gigl.orchestration.kubeflow.kfp_orchestrator import KfpOrchestrator + +logger = Logger() + + +class KfpOrchestratorTest(unittest.TestCase): + @patch("gigl.orchestration.kubeflow.kfp_orchestrator.FileLoader") + def test_compile_uploads_compiled_yaml(self, MockFileLoader): + mock_file_loader = MockFileLoader.return_value + mock_file_loader.load_file.return_value = None + + dst_compiled_pipeline_path = GcsUri( + "gs://SOME NON EXISTING BUCKET/ NON EXISTING FILE" + ) + KfpOrchestrator.compile( + cuda_container_image="SOME NONEXISTENT IMAGE 1", + cpu_container_image="SOME NONEXISTENT IMAGE 2", + dataflow_container_image="SOME NONEXISTENT IMAGE 3", + dst_compiled_pipeline_path=dst_compiled_pipeline_path, + ) + mock_file_loader.load_file.assert_called_once_with( + file_uri_src=ANY, file_uri_dst=dst_compiled_pipeline_path + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/orchestration/kubeflow/kfp_runner_test.py b/python/tests/unit/orchestration/kubeflow/kfp_runner_test.py new file mode 100644 index 0000000..8589b17 --- /dev/null +++ b/python/tests/unit/orchestration/kubeflow/kfp_runner_test.py @@ -0,0 +1,34 @@ +import unittest + +from gigl.common.logger import Logger +from gigl.orchestration.kubeflow.runner import _parse_additional_job_args +from gigl.src.common.constants.components import GiGLComponents + +logger = Logger() + + +class KFPRunnerTest(unittest.TestCase): + def test_parse_additional_job_args( + self, + ): + args = [ + "subgraph_sampler.additional_spark35_jar_file_uris=gs://path/to/jar", + "subgraph_sampler.arg_2=value=10.243,123", + "split_generator.some_other_arg=value", + ] + + expected_parsed_args = { + GiGLComponents.SubgraphSampler: { + "additional_spark35_jar_file_uris": "gs://path/to/jar", + "arg_2": "value=10.243,123", + }, + GiGLComponents.SplitGenerator: { + "some_other_arg": "value", + }, + } + parsed_args = _parse_additional_job_args(args) + self.assertEqual(parsed_args, expected_parsed_args) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/src_tests/common_tests/types_tests/__init__.py b/python/tests/unit/src/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/types_tests/__init__.py rename to python/tests/unit/src/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/types_tests/pb_wrappers_tests/__init__.py b/python/tests/unit/src/common/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/types_tests/pb_wrappers_tests/__init__.py rename to python/tests/unit/src/common/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/utils_tests/__init__.py b/python/tests/unit/src/common/graph_builder/__init__.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/utils_tests/__init__.py rename to python/tests/unit/src/common/graph_builder/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/graph_builder_tests/gbml_graph_protocol_test.py b/python/tests/unit/src/common/graph_builder/gbml_graph_protocol_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/graph_builder_tests/gbml_graph_protocol_test.py rename to python/tests/unit/src/common/graph_builder/gbml_graph_protocol_test.py diff --git a/python/tests/unit/src_tests/common_tests/graph_builder_tests/pyg_graph_builder_test.py b/python/tests/unit/src/common/graph_builder/pyg_graph_builder_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/graph_builder_tests/pyg_graph_builder_test.py rename to python/tests/unit/src/common/graph_builder/pyg_graph_builder_test.py diff --git a/python/tests/unit/src_tests/common_tests/graph_builder_tests/pyg_graph_data_test.py b/python/tests/unit/src/common/graph_builder/pyg_graph_data_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/graph_builder_tests/pyg_graph_data_test.py rename to python/tests/unit/src/common/graph_builder/pyg_graph_data_test.py diff --git a/python/tests/unit/src_tests/config_populator_tests/__init__.py b/python/tests/unit/src/common/modeling_task_spec_utils/__init__.py similarity index 100% rename from python/tests/unit/src_tests/config_populator_tests/__init__.py rename to python/tests/unit/src/common/modeling_task_spec_utils/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/modeling_task_spec_utils_tests/early_stop_test.py b/python/tests/unit/src/common/modeling_task_spec_utils/early_stop_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/modeling_task_spec_utils_tests/early_stop_test.py rename to python/tests/unit/src/common/modeling_task_spec_utils/early_stop_test.py diff --git a/python/tests/unit/src_tests/data_preprocessor_tests/__init__.py b/python/tests/unit/src/common/models/__init__.py similarity index 100% rename from python/tests/unit/src_tests/data_preprocessor_tests/__init__.py rename to python/tests/unit/src/common/models/__init__.py diff --git a/python/tests/unit/src_tests/inference_tests/__init__.py b/python/tests/unit/src/common/models/layers/__init__.py similarity index 100% rename from python/tests/unit/src_tests/inference_tests/__init__.py rename to python/tests/unit/src/common/models/layers/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/models_tests/layers_tests/count_min_sketch_test.py b/python/tests/unit/src/common/models/layers/count_min_sketch_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/models_tests/layers_tests/count_min_sketch_test.py rename to python/tests/unit/src/common/models/layers/count_min_sketch_test.py diff --git a/python/tests/unit/src_tests/common_tests/models_tests/layers_tests/decoder_test.py b/python/tests/unit/src/common/models/layers/decoder_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/models_tests/layers_tests/decoder_test.py rename to python/tests/unit/src/common/models/layers/decoder_test.py diff --git a/python/tests/unit/src_tests/common_tests/models_tests/layers_tests/loss_test.py b/python/tests/unit/src/common/models/layers/loss_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/models_tests/layers_tests/loss_test.py rename to python/tests/unit/src/common/models/layers/loss_test.py diff --git a/python/tests/unit/src_tests/inference_tests/lib_tests/__init__.py b/python/tests/unit/src/common/types/__init__.py similarity index 100% rename from python/tests/unit/src_tests/inference_tests/lib_tests/__init__.py rename to python/tests/unit/src/common/types/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/types_tests/model_eval_metrics_test.py b/python/tests/unit/src/common/types/model_eval_metrics_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/types_tests/model_eval_metrics_test.py rename to python/tests/unit/src/common/types/model_eval_metrics_test.py diff --git a/python/tests/unit/src_tests/subgraph_sampler_tests/__init__.py b/python/tests/unit/src/common/types/pb_wrappers/__init__.py similarity index 100% rename from python/tests/unit/src_tests/subgraph_sampler_tests/__init__.py rename to python/tests/unit/src/common/types/pb_wrappers/__init__.py diff --git a/python/tests/unit/src_tests/common_tests/types_tests/pb_wrappers_tests/graph_data_types_test.py b/python/tests/unit/src/common/types/pb_wrappers/graph_data_types_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/types_tests/pb_wrappers_tests/graph_data_types_test.py rename to python/tests/unit/src/common/types/pb_wrappers/graph_data_types_test.py diff --git a/python/tests/unit/src/common/types/pb_wrappers/graph_metadata_test.py b/python/tests/unit/src/common/types/pb_wrappers/graph_metadata_test.py new file mode 100644 index 0000000..c8a3d8b --- /dev/null +++ b/python/tests/unit/src/common/types/pb_wrappers/graph_metadata_test.py @@ -0,0 +1,110 @@ +import unittest + +from parameterized import param, parameterized + +from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation +from gigl.src.common.types.pb_wrappers.graph_metadata import GraphMetadataPbWrapper +from snapchat.research.gbml import graph_schema_pb2 + +_NODE_TYPE_USER: str = "user" +_NODE_TYPE_ITEM: str = "item" +_DEFAULT_RELATION = "to" + +_EDGE_TYPE_USER_TO_USER_PB: graph_schema_pb2.EdgeType = graph_schema_pb2.EdgeType( + src_node_type=_NODE_TYPE_USER, + relation=_DEFAULT_RELATION, + dst_node_type=_NODE_TYPE_USER, +) +_EDGE_TYPE_USER_TO_ITEM_PB: graph_schema_pb2.EdgeType = graph_schema_pb2.EdgeType( + src_node_type=_NODE_TYPE_USER, + relation=_DEFAULT_RELATION, + dst_node_type=_NODE_TYPE_ITEM, +) + +_HOMOGENEOUS_GRAPH_METADATA_PB = graph_schema_pb2.GraphMetadata( + node_types=[_NODE_TYPE_USER], + edge_types=[_EDGE_TYPE_USER_TO_USER_PB], + condensed_node_type_map={0: _NODE_TYPE_USER}, + condensed_edge_type_map={0: _EDGE_TYPE_USER_TO_USER_PB}, +) +_HETEROGENEOUS_GRAPH_METADATA_PB = graph_schema_pb2.GraphMetadata( + node_types=[_NODE_TYPE_USER, _NODE_TYPE_ITEM], + edge_types=[_EDGE_TYPE_USER_TO_USER_PB, _EDGE_TYPE_USER_TO_ITEM_PB], + condensed_node_type_map={0: _NODE_TYPE_USER, 1: _NODE_TYPE_ITEM}, + condensed_edge_type_map={ + 0: _EDGE_TYPE_USER_TO_USER_PB, + 1: _EDGE_TYPE_USER_TO_ITEM_PB, + }, +) + +_HOMOGENEOUS_GRAPH_METADATA_PB_WRAPPER = GraphMetadataPbWrapper( + graph_metadata_pb=_HOMOGENEOUS_GRAPH_METADATA_PB +) +_HETEROGENEOUS_GRAPH_METADATA_PB_WRAPPER = GraphMetadataPbWrapper( + graph_metadata_pb=_HETEROGENEOUS_GRAPH_METADATA_PB +) + + +class GraphMetadataUnitTest(unittest.TestCase): + @parameterized.expand( + [ + param( + graph_metadata_pb_wrapper=_HOMOGENEOUS_GRAPH_METADATA_PB_WRAPPER, + expected_node_type=_NODE_TYPE_USER, + expected_edge_type=EdgeType( + src_node_type=NodeType(_NODE_TYPE_USER), + relation=Relation(_DEFAULT_RELATION), + dst_node_type=NodeType(_NODE_TYPE_USER), + ), + expected_condensed_node_type=0, + expected_condensed_edge_type=0, + ), + ] + ) + def test_homogeneous_property_correctness( + self, + graph_metadata_pb_wrapper: GraphMetadataPbWrapper, + expected_node_type: str, + expected_edge_type: EdgeType, + expected_condensed_node_type: int, + expected_condensed_edge_type: int, + ): + """ + Tests for success of homogeneous node, edge, condensed node, and condensed edge types with a homogeneous graph + """ + self.assertEqual( + graph_metadata_pb_wrapper.homogeneous_node_type, expected_node_type + ) + self.assertEqual( + graph_metadata_pb_wrapper.homogeneous_edge_type, expected_edge_type + ) + self.assertEqual( + graph_metadata_pb_wrapper.homogeneous_condensed_node_type, + expected_condensed_node_type, + ) + self.assertEqual( + graph_metadata_pb_wrapper.homogeneous_condensed_edge_type, + expected_condensed_edge_type, + ) + + @parameterized.expand( + [ + param( + graph_metadata_pb_wrapper=_HETEROGENEOUS_GRAPH_METADATA_PB_WRAPPER, + ), + ] + ) + def test_homogeneous_property_failure( + self, graph_metadata_pb_wrapper: GraphMetadataPbWrapper + ): + """ + Tests for failure of homogeneous node, edge, condensed node, and condensed edge types with a heterogeneous graph + """ + with self.assertRaises(ValueError): + graph_metadata_pb_wrapper.homogeneous_node_type + with self.assertRaises(ValueError): + graph_metadata_pb_wrapper.homogeneous_edge_type + with self.assertRaises(ValueError): + graph_metadata_pb_wrapper.homogeneous_condensed_node_type + with self.assertRaises(ValueError): + graph_metadata_pb_wrapper.homogeneous_condensed_edge_type diff --git a/python/tests/unit/src_tests/training_tests/__init__.py b/python/tests/unit/src/common/utils/__init__.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/__init__.py rename to python/tests/unit/src/common/utils/__init__.py diff --git a/python/tests/unit/src/common/utils/bq_test.py b/python/tests/unit/src/common/utils/bq_test.py new file mode 100644 index 0000000..7e3bd85 --- /dev/null +++ b/python/tests/unit/src/common/utils/bq_test.py @@ -0,0 +1,45 @@ +import unittest + +from parameterized import param, parameterized + +from gigl.src.common.utils.bq import BqUtils + + +class BqUtilsTest(unittest.TestCase): + @parameterized.expand( + [ + param( + bq_table_path="bq_project.bq_dataset.bq_table", + expected_project_id="bq_project", + expected_dataset_id="bq_dataset", + expected_table_name="bq_table", + ), + param( + bq_table_path="bq_project:bq_dataset.bq_table", + expected_project_id="bq_project", + expected_dataset_id="bq_dataset", + expected_table_name="bq_table", + ), + ] + ) + def test_parse_and_format_bq_path( + self, + bq_table_path, + expected_project_id, + expected_dataset_id, + expected_table_name, + ): + ( + parsed_project_id, + parsed_dataset_id, + parsed_table_name, + ) = BqUtils.parse_bq_table_path(bq_table_path=bq_table_path) + self.assertEqual(parsed_project_id, expected_project_id) + self.assertEqual(parsed_dataset_id, expected_dataset_id) + self.assertEqual(parsed_table_name, expected_table_name) + reconstructed_bq_table_path = BqUtils.join_path( + parsed_project_id, parsed_dataset_id, parsed_table_name + ) + self.assertEqual( + reconstructed_bq_table_path, BqUtils.format_bq_path(bq_table_path) + ) diff --git a/python/tests/unit/src_tests/common_tests/utils_tests/eval_metrics_test.py b/python/tests/unit/src/common/utils/eval_metrics_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/utils_tests/eval_metrics_test.py rename to python/tests/unit/src/common/utils/eval_metrics_test.py diff --git a/python/tests/unit/src_tests/common_tests/utils_tests/gbml_config_test.py b/python/tests/unit/src/common/utils/gbml_config_test.py similarity index 100% rename from python/tests/unit/src_tests/common_tests/utils_tests/gbml_config_test.py rename to python/tests/unit/src/common/utils/gbml_config_test.py diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/__init__.py b/python/tests/unit/src/config_populator/__init__.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/__init__.py rename to python/tests/unit/src/config_populator/__init__.py diff --git a/python/tests/unit/src_tests/config_populator_tests/config_populator_functionality_test.py b/python/tests/unit/src/config_populator/config_populator_functionality_test.py similarity index 100% rename from python/tests/unit/src_tests/config_populator_tests/config_populator_functionality_test.py rename to python/tests/unit/src/config_populator/config_populator_functionality_test.py diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/__init__.py b/python/tests/unit/src/data_preprocessor/__init__.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/__init__.py rename to python/tests/unit/src/data_preprocessor/__init__.py diff --git a/python/tests/unit/src_tests/data_preprocessor_tests/data_preprocessor_config_test.py b/python/tests/unit/src/data_preprocessor/data_preprocessor_config_test.py similarity index 100% rename from python/tests/unit/src_tests/data_preprocessor_tests/data_preprocessor_config_test.py rename to python/tests/unit/src/data_preprocessor/data_preprocessor_config_test.py diff --git a/python/tests/unit/src_tests/validation_tests/__init__.py b/python/tests/unit/src/inference/__init__.py similarity index 100% rename from python/tests/unit/src_tests/validation_tests/__init__.py rename to python/tests/unit/src/inference/__init__.py diff --git a/python/tests/unit/utils_tests/__init__.py b/python/tests/unit/src/inference/lib/__init__.py similarity index 100% rename from python/tests/unit/utils_tests/__init__.py rename to python/tests/unit/src/inference/lib/__init__.py diff --git a/python/tests/unit/src_tests/inference_tests/lib_tests/inference_output_schema_builder_test.py b/python/tests/unit/src/inference/lib/inference_output_schema_builder_test.py similarity index 100% rename from python/tests/unit/src_tests/inference_tests/lib_tests/inference_output_schema_builder_test.py rename to python/tests/unit/src/inference/lib/inference_output_schema_builder_test.py diff --git a/python/tests/unit/src_tests/inference_tests/lib_tests/node_anchor_based_link_prediction_inferencer_test.py b/python/tests/unit/src/inference/lib/node_anchor_based_link_prediction_inferencer_test.py similarity index 100% rename from python/tests/unit/src_tests/inference_tests/lib_tests/node_anchor_based_link_prediction_inferencer_test.py rename to python/tests/unit/src/inference/lib/node_anchor_based_link_prediction_inferencer_test.py diff --git a/python/tests/unit/src_tests/inference_tests/lib_tests/node_classification_inferencer_test.py b/python/tests/unit/src/inference/lib/node_classification_inferencer_test.py similarity index 100% rename from python/tests/unit/src_tests/inference_tests/lib_tests/node_classification_inferencer_test.py rename to python/tests/unit/src/inference/lib/node_classification_inferencer_test.py diff --git a/python/tests/unit/src/subgraph_sampler/__init__.py b/python/tests/unit/src/subgraph_sampler/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/src_tests/subgraph_sampler_tests/subgraph_sampler_test.py b/python/tests/unit/src/subgraph_sampler/subgraph_sampler_test.py similarity index 96% rename from python/tests/unit/src_tests/subgraph_sampler_tests/subgraph_sampler_test.py rename to python/tests/unit/src/subgraph_sampler/subgraph_sampler_test.py index 479a0ad..1b0c795 100644 --- a/python/tests/unit/src_tests/subgraph_sampler_tests/subgraph_sampler_test.py +++ b/python/tests/unit/src/subgraph_sampler/subgraph_sampler_test.py @@ -9,12 +9,15 @@ import gigl.env.dep_constants as dep_constants import gigl.src.common.constants.gcs as gcs_constants from gigl.common import GcsUri, LocalUri, UriFactory +from gigl.common.constants import ( + SPARK_31_TFRECORD_JAR_GCS_PATH, + SPARK_35_TFRECORD_JAR_GCS_PATH, +) from gigl.src.common.types import AppliedTaskIdentifier from gigl.src.common.utils import metrics_service_provider from gigl.src.subgraph_sampler import subgraph_sampler from gigl.src.subgraph_sampler.lib.ingestion_protocol import BaseIngestion from snapchat.research.gbml import gbml_config_pb2, gigl_resource_config_pb2 -from gigl.common.constants import SPARK_35_TFRECORD_JAR_GCS_PATH, SPARK_31_TFRECORD_JAR_GCS_PATH # Class that's used as a dummy to be injected and then mocked out @@ -72,6 +75,7 @@ def setUp(self): self.mock_file_loader = MagicMock() self.mock_file_loader.load_file = MagicMock() self.mock_file_loader.delete_files = MagicMock() + self.mock_file_loader.load_files = MagicMock() self.mock_gcs_utils = MagicMock() self.mock_gcs_utils.upload_files_to_gcs = MagicMock() @@ -152,7 +156,7 @@ def test_subgraph_sampler_for_spark( applied_task_identifier=self.task_identifier, resource_config_uri=LocalUri(self.resource_config_path_local_path), task_config_uri=LocalUri(self.gbml_config_path_local_path), - additional_spark35_local_jar_file_paths=[ + additional_spark35_jar_file_uris=[ LocalUri("/does/not/exist/should/not/be/passed/in") ], ) @@ -191,14 +195,13 @@ def test_subgraph_sampler_for_spark( ) with self.subTest("ensure main jar and sidecar jars are uploaded"): - self.mock_gcs_utils.upload_files_to_gcs.assert_called_once_with( - { + self.mock_file_loader.load_files.assert_called_once_with( + source_to_dest_file_uri_map={ LocalUri(self.main_jar_local_path): GcsUri.join( subgraph_sampler_root, "subgraph_sampler.jar", ), }, - parallel=True, ) with self.subTest("correct jar file uris are passed to dataproc"): @@ -262,9 +265,7 @@ def test_subgraph_sampler_for_spark35_and_graphdb_ingestion( custom_worker_image_uri="gcr.io/test_project/test_image:latest", resource_config_uri=LocalUri(self.resource_config_path_local_path), task_config_uri=LocalUri(self.gbml_config_path_local_path), - additional_spark35_local_jar_file_paths=[ - LocalUri(self.sidecar_jar_local_path) - ], + additional_spark35_jar_file_uris=[LocalUri(self.sidecar_jar_local_path)], ) subgraph_sampler_root = gcs_constants.get_subgraph_sampler_root_dir( applied_task_identifier=self.task_identifier @@ -300,8 +301,8 @@ def test_subgraph_sampler_for_spark35_and_graphdb_ingestion( ] ) with self.subTest("ensure main jar and sidecar jars are uploaded"): - self.mock_gcs_utils.upload_files_to_gcs.assert_called_once_with( - { + self.mock_file_loader.load_files.assert_called_once_with( + source_to_dest_file_uri_map={ LocalUri(self.main_jar_local_path): GcsUri.join( subgraph_sampler_root, "subgraph_sampler.jar", @@ -311,7 +312,6 @@ def test_subgraph_sampler_for_spark35_and_graphdb_ingestion( "sidecar.jar", ), }, - parallel=True, ) with self.subTest("correct jar file uris are passed to dataproc"): diff --git a/python/tests/unit/src/training/__init__.py b/python/tests/unit/src/training/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/src_tests/training_tests/gnn_trainer_test.py b/python/tests/unit/src/training/gnn_trainer_test.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/gnn_trainer_test.py rename to python/tests/unit/src/training/gnn_trainer_test.py diff --git a/python/tests/unit/src/training/lib/__init__.py b/python/tests/unit/src/training/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/src/training/lib/data_loaders/__init__.py b/python/tests/unit/src/training/lib/data_loaders/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/combined_iterable_dataset_test.py b/python/tests/unit/src/training/lib/data_loaders/combined_iterable_dataset_test.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/combined_iterable_dataset_test.py rename to python/tests/unit/src/training/lib/data_loaders/combined_iterable_dataset_test.py diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/node_anchor_based_link_prediction_batching_test.py b/python/tests/unit/src/training/lib/data_loaders/node_anchor_based_link_prediction_batching_test.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/node_anchor_based_link_prediction_batching_test.py rename to python/tests/unit/src/training/lib/data_loaders/node_anchor_based_link_prediction_batching_test.py diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/rooted_node_neighborhood_batching_test.py b/python/tests/unit/src/training/lib/data_loaders/rooted_node_neighborhood_batching_test.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/rooted_node_neighborhood_batching_test.py rename to python/tests/unit/src/training/lib/data_loaders/rooted_node_neighborhood_batching_test.py diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/supervised_node_classification_batching_test.py b/python/tests/unit/src/training/lib/data_loaders/supervised_node_classification_batching_test.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/supervised_node_classification_batching_test.py rename to python/tests/unit/src/training/lib/data_loaders/supervised_node_classification_batching_test.py diff --git a/python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/tf_records_iterable_dataset_test.py b/python/tests/unit/src/training/lib/data_loaders/tf_records_iterable_dataset_test.py similarity index 100% rename from python/tests/unit/src_tests/training_tests/lib_tests/data_loaders_tests/tf_records_iterable_dataset_test.py rename to python/tests/unit/src/training/lib/data_loaders/tf_records_iterable_dataset_test.py diff --git a/python/tests/unit/src/validation/__init__.py b/python/tests/unit/src/validation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/src/validation/lib/__init__.py b/python/tests/unit/src/validation/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/src/validation/lib/name_checks_test.py b/python/tests/unit/src/validation/lib/name_checks_test.py new file mode 100644 index 0000000..39bcf94 --- /dev/null +++ b/python/tests/unit/src/validation/lib/name_checks_test.py @@ -0,0 +1,42 @@ +import unittest + +from parameterized import param, parameterized + +from gigl.src.validation_check.libs.name_checks import ( + check_if_kfp_pipeline_job_name_valid, +) + + +class TestStringChecks(unittest.TestCase): + @parameterized.expand( + [ + param("valid_job_name", "valid_job_name"), + param("valid_job_name_with_numbers", "valid_job_name_123"), + ] + ) + def test_valid_job_names(self, name, job_name): + try: + check_if_kfp_pipeline_job_name_valid(job_name) + except ValueError: + self.fail( + f"check_if_kfp_pipeline_job_name_valid raised ValueError unexpectedly for {job_name}" + ) + + @parameterized.expand( + [ + param("empty_string", ""), + param("starts_with_number", "1invalid-job-name"), + param("contains_uppercase", "InvalidJobName"), + param("contains_special_characters", "invalid@job#name"), + param("too_long", "a" * 52), + param("ends_with_dash", "invalid-job-name-"), + param("ends_with_underscore", "invalid_job_name_"), + ] + ) + def test_invalid_job_names(self, name: str, job_name: str): + with self.assertRaises(ValueError): + check_if_kfp_pipeline_job_name_valid(job_name) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/src_tests/validation_tests/sampling_op_validation_test.py b/python/tests/unit/src/validation/sampling_op_validation_test.py similarity index 100% rename from python/tests/unit/src_tests/validation_tests/sampling_op_validation_test.py rename to python/tests/unit/src/validation/sampling_op_validation_test.py diff --git a/python/tests/unit/src_tests/validation_tests/subgraph_sampling_strategy_validation_test.py b/python/tests/unit/src/validation/subgraph_sampling_strategy_validation_test.py similarity index 100% rename from python/tests/unit/src_tests/validation_tests/subgraph_sampling_strategy_validation_test.py rename to python/tests/unit/src/validation/subgraph_sampling_strategy_validation_test.py diff --git a/python/tests/unit/types_tests/__init__.py b/python/tests/unit/types_tests/__init__.py new file mode 100644 index 0000000..d1f45a4 --- /dev/null +++ b/python/tests/unit/types_tests/__init__.py @@ -0,0 +1,10 @@ +# This directory unfortunately needs to be named "types_tests" +# Otherwise we hit import naming collisions with the "types" module +# See: +# ImportError: Failed to import test module: types.data_test +# Traceback (most recent call last): +# File "/opt/conda/envs/gnn/lib/python3.9/unittest/loader.py", line 436, in _find_test_path +# module = self._get_module_from_name(name) +# File "/opt/conda/envs/gnn/lib/python3.9/unittest/loader.py", line 377, in _get_module_from_name +# __import__(name) +# ModuleNotFoundError: No module named 'types.data_test'; 'types' is not a package diff --git a/python/tests/unit/types_tests/data_test.py b/python/tests/unit/types_tests/data_test.py new file mode 100644 index 0000000..b405b91 --- /dev/null +++ b/python/tests/unit/types_tests/data_test.py @@ -0,0 +1,126 @@ +import unittest + +import torch +from parameterized import param, parameterized + +from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation +from gigl.types.data import LoadedGraphTensors +from gigl.types.distributed import ( + DEFAULT_HOMOGENEOUS_EDGE_TYPE, + DEFAULT_HOMOGENEOUS_NODE_TYPE, + NEGATIVE_LABEL_RELATION, + POSITIVE_LABEL_RELATION, +) + + +class TestLoadedGraphTensors(unittest.TestCase): + @parameterized.expand( + [ + param( + "valid_inputs", + node_ids=torch.tensor([0, 1, 2]), + node_features=torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]), + edge_index=torch.tensor([[0, 1], [1, 2]]), + edge_features=torch.tensor([[0.1, 0.2], [0.3, 0.4]]), + positive_label=torch.tensor([[0, 2]]), + negative_label=torch.tensor([[1, 0]]), + expected_edge_index={ + DEFAULT_HOMOGENEOUS_EDGE_TYPE: torch.tensor([[0, 1], [1, 2]]), + EdgeType( + DEFAULT_HOMOGENEOUS_NODE_TYPE, + POSITIVE_LABEL_RELATION, + DEFAULT_HOMOGENEOUS_NODE_TYPE, + ): torch.tensor([[0, 2]]), + EdgeType( + DEFAULT_HOMOGENEOUS_NODE_TYPE, + NEGATIVE_LABEL_RELATION, + DEFAULT_HOMOGENEOUS_NODE_TYPE, + ): torch.tensor([[1, 0]]), + }, + ), + ] + ) + def test_treat_labels_as_edges_success( + self, + name, + node_ids, + node_features, + edge_index, + edge_features, + positive_label, + negative_label, + expected_edge_index, + ): + graph_tensors = LoadedGraphTensors( + node_ids=node_ids, + node_features=node_features, + edge_index=edge_index, + edge_features=edge_features, + positive_label=positive_label, + negative_label=negative_label, + ) + + graph_tensors.treat_labels_as_edges() + self.assertIsNone(graph_tensors.positive_label) + self.assertIsNone(graph_tensors.negative_label) + assert isinstance(graph_tensors.edge_index, dict) + self.assertEqual(graph_tensors.edge_index.keys(), expected_edge_index.keys()) + for edge_type, expected_tensor in expected_edge_index.items(): + torch.testing.assert_close( + graph_tensors.edge_index[edge_type], expected_tensor + ) + + @parameterized.expand( + [ + param( + "missing_labels", + node_ids=torch.tensor([0, 1, 2]), + node_features=torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]]), + edge_index=torch.tensor([[0, 1], [1, 2]]), + edge_features=torch.tensor([[0.1, 0.2], [0.3, 0.4]]), + positive_label=None, + negative_label=None, + raises=ValueError, + ), + param( + "heterogeneous_inputs", + node_ids={NodeType("type1"): torch.tensor([0, 1])}, + node_features=None, + edge_index={ + EdgeType( + NodeType("node1"), Relation("relation"), NodeType("node2") + ): torch.tensor([[0, 1]]) + }, + edge_features=None, + positive_label=torch.tensor([[0, 2]]), + negative_label=torch.tensor([[1, 0]]), + raises=ValueError, + ), + ] + ) + def test_treat_labels_as_edges_errors( + self, + name, + node_ids, + node_features, + edge_index, + edge_features, + positive_label, + negative_label, + raises, + ): + graph_tensors = LoadedGraphTensors( + node_ids=node_ids, + node_features=node_features, + edge_index=edge_index, + edge_features=edge_features, + positive_label=positive_label, + negative_label=negative_label, + ) + + with self.assertRaises(raises): + graph_tensors.treat_labels_as_edges() + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/types_tests/distributed_test.py b/python/tests/unit/types_tests/distributed_test.py new file mode 100644 index 0000000..466d07e --- /dev/null +++ b/python/tests/unit/types_tests/distributed_test.py @@ -0,0 +1,80 @@ +import unittest + +from parameterized import param, parameterized + +from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation +from gigl.types.distributed import ( + DEFAULT_HOMOGENEOUS_EDGE_TYPE, + DEFAULT_HOMOGENEOUS_NODE_TYPE, + to_heterogeneous_edge, + to_heterogeneous_node, + to_homogeneous, +) + + +class DistributedTypesTest(unittest.TestCase): + @parameterized.expand( + [ + param("none_input", None, None), + param( + "custom_node_type", + {"custom_node_type": "value"}, + {"custom_node_type": "value"}, + ), + param( + "default_node_type", "value", {DEFAULT_HOMOGENEOUS_NODE_TYPE: "value"} + ), + ] + ) + def test_to_hetergeneous_node(self, _, input_value, expected_output): + self.assertEqual(to_heterogeneous_node(input_value), expected_output) + + @parameterized.expand( + [ + param("none_input", None, None), + param( + "custom_edge_type", + {EdgeType(NodeType("src"), Relation("rel"), NodeType("dst")): "value"}, + {EdgeType(NodeType("src"), Relation("rel"), NodeType("dst")): "value"}, + ), + param( + "default_edge_type", "value", {DEFAULT_HOMOGENEOUS_EDGE_TYPE: "value"} + ), + ] + ) + def test_to_hetergeneous_edge(self, _, input_value, expected_output): + self.assertEqual(to_heterogeneous_edge(input_value), expected_output) + + @parameterized.expand( + [ + param("none_input", None, None), + param( + "single_value_input", + {EdgeType(NodeType("src"), Relation("rel"), NodeType("dst")): "value"}, + "value", + ), + param("direct_value_input", "value", "value"), + ] + ) + def test_from_heterogeneous(self, _, input_value, expected_output): + self.assertEqual(to_homogeneous(input_value), expected_output) + + @parameterized.expand( + [ + param( + "multiple_keys_input", + {NodeType("src"): "src_value", NodeType("dst"): "dst_value"}, + ), + param( + "empty_dict_input", + {}, + ), + ] + ) + def test_from_heterogeneous_invalid(self, _, input_value): + with self.assertRaises(ValueError): + to_homogeneous(input_value) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/utils/__init__.py b/python/tests/unit/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/python/tests/unit/utils/data_splitters_test.py b/python/tests/unit/utils/data_splitters_test.py new file mode 100644 index 0000000..a42cd9e --- /dev/null +++ b/python/tests/unit/utils/data_splitters_test.py @@ -0,0 +1,576 @@ +import unittest +from collections.abc import Mapping + +import torch +from parameterized import param, parameterized +from torch.testing import assert_close + +from gigl.src.common.types.graph_data import EdgeType, NodeType, Relation +from gigl.utils.data_splitters import ( + HashedNodeAnchorLinkSplitter, + _check_edge_index, + _check_val_test_percentage, + _fast_hash, + select_ssl_positive_label_edges, +) + +# For TestDataSplitters +_NODE_A = NodeType("A") +_NODE_B = NodeType("B") +_NODE_C = NodeType("C") +_TO = Relation("to") + +# For SelectSSLPositiveLabelEdgesTest +_NUM_EDGES = 1_000_000 +_TEST_EDGE_INDEX = torch.arange(0, _NUM_EDGES * 2).reshape((2, _NUM_EDGES)) +_INVALID_TEST_EDGE_INDEX = torch.arange(0, _NUM_EDGES * 10).reshape((10, _NUM_EDGES)) + + +class TestDataSplitters(unittest.TestCase): + @parameterized.expand( + [ + param( + "Fast hash with int32", + input_tensor=torch.tensor([[0, 1], [2, 3]], dtype=torch.int32), + expected_output=torch.tensor( + [[0, 1753845952], [697948427, 1408362973]], dtype=torch.int32 + ), + ), + param( + "Fast hash with int64", + input_tensor=torch.tensor([[0, 1], [2, 3]], dtype=torch.int64), + expected_output=torch.tensor( + [ + [0, 6350654354804651301], + [2606959014078780554, 2185194620014831856], + ] + ), + ), + ] + ) + def test_fast_hash( + self, _, input_tensor: torch.Tensor, expected_output: torch.Tensor + ): + actual = _fast_hash(input_tensor) + assert_close(actual=actual, expected=expected_output) + + @parameterized.expand( + [ + param( + "Using src nodes", + edges=torch.stack( + [ + torch.arange(10, dtype=torch.int64), + torch.zeros(10, dtype=torch.int64), + ] + ), + sampling_direction="out", + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected_train=torch.tensor( + [0, 1, 2, 3, 4, 5, 6, 7], dtype=torch.int64 + ), + expected_val=torch.tensor([8], dtype=torch.int64), + expected_test=torch.tensor([9], dtype=torch.int64), + ), + param( + "Using dst nodes", + edges=torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + sampling_direction="in", + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected_train=torch.tensor( + [0, 1, 2, 3, 4, 5, 6, 7], dtype=torch.int64 + ), + expected_val=torch.tensor([8], dtype=torch.int64), + expected_test=torch.tensor([9], dtype=torch.int64), + ), + param( + "With dups", + edges=torch.stack( + [ + torch.cat( + [ + torch.arange(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + torch.zeros(20, dtype=torch.int64), + ] + ), + sampling_direction="out", + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected_train=torch.tensor( + [0, 1, 2, 3, 4, 5, 6, 7], dtype=torch.int64 + ), + expected_val=torch.tensor([8], dtype=torch.int64), + expected_test=torch.tensor([9], dtype=torch.int64), + ), + param( + "Real hash fn", + edges=torch.stack( + [ + torch.zeros(20, dtype=torch.int64), + torch.arange(20, dtype=torch.int64), + ] + ), + sampling_direction="in", + hash_function=_fast_hash, + val_num=0.1, + test_num=0.1, + expected_train=torch.tensor( + [0, 18, 15, 10, 7, 19, 17, 3, 4, 2, 16, 14, 6, 11, 5, 13], + dtype=torch.int64, + ), + expected_val=torch.tensor([8, 1], dtype=torch.int64), + expected_test=torch.tensor([9, 12], dtype=torch.int64), + ), + param( + "With explicit val num", + edges=torch.stack( + [ + torch.arange(10, dtype=torch.int64), + torch.zeros(10, dtype=torch.int64), + ] + ), + sampling_direction="out", + hash_function=lambda x: x, + val_num=2, + test_num=0.1, + expected_train=torch.tensor([0, 1, 2, 3, 4, 5, 6], dtype=torch.int64), + expected_val=torch.tensor([7, 8], dtype=torch.int64), + expected_test=torch.tensor([9], dtype=torch.int64), + ), + param( + "With explicit test num", + edges=torch.stack( + [ + torch.arange(10, dtype=torch.int64), + torch.zeros(10, dtype=torch.int64), + ] + ), + sampling_direction="out", + hash_function=lambda x: x, + val_num=0.1, + test_num=3, + expected_train=torch.tensor([0, 1, 2, 3, 4, 5], dtype=torch.int64), + expected_val=torch.tensor([6], dtype=torch.int64), + expected_test=torch.tensor([7, 8, 9], dtype=torch.int64), + ), + param( + "With explicit val and test num", + edges=torch.stack( + [ + torch.arange(10, dtype=torch.int64), + torch.zeros(10, dtype=torch.int64), + ] + ), + sampling_direction="out", + hash_function=lambda x: x, + val_num=2, + test_num=3, + expected_train=torch.tensor([0, 1, 2, 3, 4], dtype=torch.int64), + expected_val=torch.tensor([5, 6], dtype=torch.int64), + expected_test=torch.tensor([7, 8, 9], dtype=torch.int64), + ), + param( + "Start from non-zero", + edges=torch.stack( + [ + torch.arange(2, 22, 2, dtype=torch.int64), + torch.zeros(10, dtype=torch.int64), + ] + ), + sampling_direction="out", + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected_train=torch.tensor( + [2, 4, 6, 8, 10, 12, 14, 16], dtype=torch.int64 + ), + expected_val=torch.tensor([18], dtype=torch.int64), + expected_test=torch.tensor([20], dtype=torch.int64), + ), + ] + ) + def test_node_based_link_splitter( + self, + _, + edges, + sampling_direction, + hash_function, + val_num, + test_num, + expected_train, + expected_val, + expected_test, + ): + splitter = HashedNodeAnchorLinkSplitter( + sampling_direction=sampling_direction, + hash_function=hash_function, + num_val=val_num, + num_test=test_num, + ) + train, val, test = splitter(edges) + + assert_close(train, expected_train, rtol=0, atol=0) + assert_close(val, expected_val, rtol=0, atol=0) + assert_close(test, expected_test, rtol=0, atol=0) + + @parameterized.expand( + [ + param( + "One supervision edge type", + edges={ + EdgeType(_NODE_A, _TO, _NODE_B): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ) + }, + edge_types_to_split=[EdgeType(_NODE_A, _TO, _NODE_B)], + hash_function=lambda x: x, + val_num=1, + test_num=1, + expected={ + _NODE_B: ( + torch.arange(8, dtype=torch.int64), + torch.tensor([8], dtype=torch.int64), + torch.tensor([9], dtype=torch.int64), + ) + }, + ), + param( + "One supervision edge type, multiple edge types in graph", + edges={ + EdgeType(_NODE_A, _TO, _NODE_B): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + EdgeType(_NODE_A, _TO, _NODE_C): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, 20, dtype=torch.int64), + ] + ), + }, + edge_types_to_split=[ + EdgeType(_NODE_A, _TO, _NODE_B), + ], + hash_function=lambda x: x, + val_num=1, + test_num=1, + expected={ + _NODE_B: ( + torch.arange(8, dtype=torch.int64), + torch.tensor([8], dtype=torch.int64), + torch.tensor([9], dtype=torch.int64), + ), + }, + ), + param( + "Multiple supervision edge types, mutliple target node types", + edges={ + EdgeType(_NODE_A, _TO, _NODE_B): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + EdgeType(_NODE_A, _TO, _NODE_C): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, 20, dtype=torch.int64), + ] + ), + }, + edge_types_to_split=[ + EdgeType(_NODE_A, _TO, _NODE_B), + EdgeType(_NODE_A, _TO, _NODE_C), + ], + hash_function=lambda x: x, + val_num=1, + test_num=1, + expected={ + _NODE_B: ( + torch.arange(8, dtype=torch.int64), + torch.tensor([8], dtype=torch.int64), + torch.tensor([9], dtype=torch.int64), + ), + _NODE_C: ( + torch.arange(10, 18, dtype=torch.int64), + torch.tensor([18], dtype=torch.int64), + torch.tensor([19], dtype=torch.int64), + ), + }, + ), + param( + "Multiple supervision edge types, one target node type", + edges={ + EdgeType(_NODE_B, _TO, _NODE_A): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + EdgeType(_NODE_C, _TO, _NODE_A): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, 20, dtype=torch.int64), + ] + ), + }, + edge_types_to_split=[ + EdgeType(_NODE_B, _TO, _NODE_A), + EdgeType(_NODE_C, _TO, _NODE_A), + ], + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected={ + _NODE_A: ( + torch.arange(16, dtype=torch.int64), + torch.tensor([16, 17], dtype=torch.int64), + torch.tensor([18, 19], dtype=torch.int64), + ), + }, + ), + param( + "Multiple supervision edge types, one target node type, dup nodes", + edges={ + EdgeType(_NODE_B, _TO, _NODE_A): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + EdgeType(_NODE_C, _TO, _NODE_A): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + }, + edge_types_to_split=[ + EdgeType(_NODE_B, _TO, _NODE_A), + EdgeType(_NODE_C, _TO, _NODE_A), + ], + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected={ + _NODE_A: ( + torch.arange(8, dtype=torch.int64), + torch.tensor([8], dtype=torch.int64), + torch.tensor([9], dtype=torch.int64), + ), + }, + ), + param( + "Multiple supervision edge types, one target node type, different input shapes", + edges={ + EdgeType(_NODE_B, _TO, _NODE_A): torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ), + EdgeType(_NODE_C, _TO, _NODE_A): torch.stack( + [ + torch.zeros(2, dtype=torch.int64), + torch.arange(30, 32, dtype=torch.int64), + ] + ), + }, + edge_types_to_split=[ + EdgeType(_NODE_B, _TO, _NODE_A), + EdgeType(_NODE_C, _TO, _NODE_A), + ], + hash_function=lambda x: x, + val_num=0.1, + test_num=0.1, + expected={ + _NODE_A: ( + torch.arange(10, dtype=torch.int64), + torch.tensor([30], dtype=torch.int64), + torch.tensor([31], dtype=torch.int64), + ), + }, + ), + ] + ) + def test_node_based_link_splitter_heterogenous( + self, + _, + edges, + edge_types_to_split, + hash_function, + val_num, + test_num, + expected, + ): + splitter = HashedNodeAnchorLinkSplitter( + sampling_direction="in", + hash_function=hash_function, + num_val=val_num, + num_test=test_num, + edge_types=edge_types_to_split, + ) + split = splitter(edges) + + assert isinstance(split, Mapping) + self.assertEqual(split.keys(), expected.keys()) + for node_type, ( + expected_train, + expected_val, + expected_test, + ) in expected.items(): + train, val, test = split[node_type] + assert_close(train, expected_train, rtol=0, atol=0) + assert_close(val, expected_val, rtol=0, atol=0) + assert_close(test, expected_test, rtol=0, atol=0) + + @parameterized.expand( + [ + param( + "No edges to split - empty", + {EdgeType(_NODE_A, _TO, _NODE_B): torch.zeros(10, 2)}, + edge_types_to_split=[], + ), + param( + "No edges to split - None", + {EdgeType(_NODE_A, _TO, _NODE_B): torch.zeros(10, 2)}, + edge_types_to_split=None, + ), + param( + "Edges not in map", + {EdgeType(_NODE_A, _TO, _NODE_B): torch.zeros(10, 2)}, + edge_types_to_split=[EdgeType(_NODE_C, _TO, _NODE_A)], + ), + ] + ) + def test_node_based_link_splitter_heterogenous_invalid( + self, + _, + edges, + edge_types_to_split, + ): + with self.assertRaises(ValueError): + HashedNodeAnchorLinkSplitter( + sampling_direction="in", edge_types=edge_types_to_split + )( + edge_index=edges, + ) + + def test_node_based_link_splitter_no_train_nodes(self): + edges = torch.stack( + [ + torch.zeros(10, dtype=torch.int64), + torch.arange(10, dtype=torch.int64), + ] + ) + with self.assertRaises(ValueError): + HashedNodeAnchorLinkSplitter( + sampling_direction="in", num_val=5, num_test=5 + )(edges) + + @parameterized.expand( + [ + param( + "Too high train percentage", train_percentage=2.0, val_percentage=0.9 + ), + param( + "Too low train percentage", train_percentage=-0.2, val_percentage=0.9 + ), + param("Too high val percentage", train_percentage=0.8, val_percentage=2.3), + param("Negative val percentage", train_percentage=0.8, val_percentage=-1.0), + ] + ) + def test_check_val_test_percentage(self, _, train_percentage, val_percentage): + with self.assertRaises(ValueError): + _check_val_test_percentage(train_percentage, val_percentage) + + @parameterized.expand( + [ + param("First dimension is not 2", edges=torch.zeros(3, 3)), + param("Not two dimmensions", edges=torch.zeros(2)), + param("Sparse tensor", edges=torch.zeros(2, 2).to_sparse()), + ] + ) + def test_check_edge_index(self, _, edges): + with self.assertRaises(ValueError): + _check_edge_index(edges) + + +class SelectSSLPositiveLabelEdgesTest(unittest.TestCase): + @parameterized.expand( + [ + param( + "Test positive label selection", + positive_label_percentage=0.1, + expected_num_labels=100_000, + ), + param( + "Test zero positive label selection", + positive_label_percentage=0, + expected_num_labels=0, + ), + param( + "Test all positive label selection", + positive_label_percentage=1, + expected_num_labels=1_000_000, + ), + ] + ) + def test_valid_label_selection( + self, _, positive_label_percentage: float, expected_num_labels: int + ): + labels = select_ssl_positive_label_edges( + edge_index=_TEST_EDGE_INDEX, + positive_label_percentage=positive_label_percentage, + ) + self.assertEqual(labels.size(1), expected_num_labels) + + @parameterized.expand( + [ + param( + "Test invalid edge index", + edge_index=_INVALID_TEST_EDGE_INDEX, + positive_label_percentage=0.1, + ), + param( + "Test negative positive label percentage", + edge_index=_TEST_EDGE_INDEX, + positive_label_percentage=-0.1, + ), + param( + "Test positive label percentage greater than 1", + edge_index=_TEST_EDGE_INDEX, + positive_label_percentage=1.1, + ), + ] + ) + def test_invalid_label_selection( + self, _, edge_index: torch.Tensor, positive_label_percentage: float + ): + with self.assertRaises(ValueError): + select_ssl_positive_label_edges( + edge_index=edge_index, + positive_label_percentage=positive_label_percentage, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/tests/unit/utils_tests/gcs_test.py b/python/tests/unit/utils/gcs_test.py similarity index 100% rename from python/tests/unit/utils_tests/gcs_test.py rename to python/tests/unit/utils/gcs_test.py diff --git a/python/tests/unit/utils_tests/local_fs_test.py b/python/tests/unit/utils/local_fs_test.py similarity index 100% rename from python/tests/unit/utils_tests/local_fs_test.py rename to python/tests/unit/utils/local_fs_test.py diff --git a/python/tests/unit/utils_tests/os_utils_test.py b/python/tests/unit/utils/os_utils_test.py similarity index 100% rename from python/tests/unit/utils_tests/os_utils_test.py rename to python/tests/unit/utils/os_utils_test.py diff --git a/python/tests/unit/utils/share_memory_test.py b/python/tests/unit/utils/share_memory_test.py new file mode 100644 index 0000000..f542856 --- /dev/null +++ b/python/tests/unit/utils/share_memory_test.py @@ -0,0 +1,67 @@ +import unittest +from collections import abc +from typing import Dict, Optional, Union + +import torch +from graphlearn_torch.partition import RangePartitionBook +from parameterized import param, parameterized + +from gigl.src.common.types.graph_data import NodeType +from gigl.utils.share_memory import share_memory + + +class ShareMemoryTest(unittest.TestCase): + @parameterized.expand( + [ + param( + "Test share_memory when provided entity is None", + entity=None, + ), + param( + "Test share_memory when provided entity is homogeneous", + entity=torch.ones(10), + ), + param( + "Test share_memory when provided entity is heterogeneous", + entity={ + NodeType("user"): torch.ones(10), + NodeType("item"): torch.ones(20) * 2, + }, + ), + param( + "Test share_memory with range partition book", + entity=RangePartitionBook( + partition_ranges=[(0, 3), (3, 5)], partition_idx=0 + ), + ), + ] + ) + def test_share_memory( + self, + _, + entity: Optional[ + Union[torch.Tensor, RangePartitionBook, Dict[NodeType, torch.Tensor]] + ], + ): + share_memory(entity=entity) + if isinstance(entity, torch.Tensor): + self.assertTrue(entity.is_shared()) + elif isinstance(entity, RangePartitionBook): + self.assertTrue(entity.partition_bounds.is_shared()) + elif isinstance(entity, abc.Mapping): + for entity_tensor in entity.values(): + self.assertTrue(entity_tensor.is_shared()) + + def test_share_empty_memory(self): + # If tensors are empty, they should not be moved to shared_memory, as this may lead to transient failures, which may cause processes to hang. + + # 1D Empty Tensor + empty_1d_tensor = torch.empty(0) + share_memory(empty_1d_tensor) + + self.assertFalse(empty_1d_tensor.is_shared()) + + # 2D Empty Tensor + empty_2d_tensor = torch.empty((5, 0)) + share_memory(empty_2d_tensor) + self.assertFalse(empty_2d_tensor.is_shared()) diff --git a/requirements/dev_linux_cpu_requirements_unified.txt b/requirements/dev_linux_cpu_requirements_unified.txt index 77e16bb..b2e204a 100644 --- a/requirements/dev_linux_cpu_requirements_unified.txt +++ b/requirements/dev_linux_cpu_requirements_unified.txt @@ -2934,7 +2934,7 @@ tensorflow==2.15.1 \ # tensorflow-serving-api # tensorflow-transform # tfx-bsl -tensorflow-data-validation==1.14.0 ; platform_machine != "arm64" \ +tensorflow-data-validation==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:00b29e486c04e18ca255c764f8d788b398daf43c18a9372220c0e6d70f6d1131 \ --hash=sha256:0aa894ee574855beb0bbe4a06032515d6a261d6326958edd67b562c210601acf \ --hash=sha256:148604e3d7263343a4fc9cf591743b8f022b9474f38b0cb46df943072977f2fb \ @@ -2966,26 +2966,26 @@ tensorflow-io-gcs-filesystem==0.37.1 \ --hash=sha256:fe8dcc6d222258a080ac3dfcaaaa347325ce36a7a046277f6b3e19abc1efb3c5 \ --hash=sha256:ffebb6666a7bfc28005f4fbbb111a455b5e7d6cd3b12752b7050863ecb27d5cc # via tensorflow -tensorflow-metadata==1.14.0 ; platform_machine != "arm64" \ +tensorflow-metadata==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:5ff79bf96f98c800fc08270b852663afe7e74d7e1f92b50ba1487bfc63894cdb # via # gigl (python/pyproject.toml) # tensorflow-data-validation # tensorflow-transform # tfx-bsl -tensorflow-serving-api==2.15.1 ; platform_machine != "arm64" \ +tensorflow-serving-api==2.15.1 ; platform_system != "Darwin" \ --hash=sha256:7db02614e57f948f94da3b4cba3e90d19b6acde0d7fb3c0972312da1f0580b65 # via # gigl (python/pyproject.toml) # tfx-bsl -tensorflow-transform==1.14.0 ; platform_machine != "arm64" \ +tensorflow-transform==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:f21bb7dfbced576a5eec77c4ddaa59e74855144b57045e43ac46d78f51524ad1 # via gigl (python/pyproject.toml) termcolor==3.0.1 \ --hash=sha256:a6abd5c6e1284cea2934443ba806e70e5ec8fd2449021be55c280f8a3731b611 \ --hash=sha256:da1ed4ec8a5dc5b2e17476d859febdb3cccb612be1c36e64511a6f2485c10c69 # via tensorflow -tfx-bsl==1.14.0 ; platform_machine != "arm64" \ +tfx-bsl==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:2cf105a8d2190f7a74b61cc52d59a80b8e2d238f07623cb41b55558ba8258b24 \ --hash=sha256:4c016162eeb9247ffb45b88a98b7f15c1d88d0719a200cea89a91fe5315e2afa \ --hash=sha256:585b194482b6e0a4e2643dafc8ff9621c7a85537d5ca2930485ef3806af4fbe5 \ diff --git a/requirements/dev_linux_cuda_requirements_unified.txt b/requirements/dev_linux_cuda_requirements_unified.txt index f083ecb..bac20a5 100644 --- a/requirements/dev_linux_cuda_requirements_unified.txt +++ b/requirements/dev_linux_cuda_requirements_unified.txt @@ -2924,7 +2924,7 @@ tensorflow==2.15.1 \ # tensorflow-serving-api # tensorflow-transform # tfx-bsl -tensorflow-data-validation==1.14.0 ; platform_machine != "arm64" \ +tensorflow-data-validation==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:00b29e486c04e18ca255c764f8d788b398daf43c18a9372220c0e6d70f6d1131 \ --hash=sha256:0aa894ee574855beb0bbe4a06032515d6a261d6326958edd67b562c210601acf \ --hash=sha256:148604e3d7263343a4fc9cf591743b8f022b9474f38b0cb46df943072977f2fb \ @@ -2956,26 +2956,26 @@ tensorflow-io-gcs-filesystem==0.37.1 \ --hash=sha256:fe8dcc6d222258a080ac3dfcaaaa347325ce36a7a046277f6b3e19abc1efb3c5 \ --hash=sha256:ffebb6666a7bfc28005f4fbbb111a455b5e7d6cd3b12752b7050863ecb27d5cc # via tensorflow -tensorflow-metadata==1.14.0 ; platform_machine != "arm64" \ +tensorflow-metadata==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:5ff79bf96f98c800fc08270b852663afe7e74d7e1f92b50ba1487bfc63894cdb # via # gigl (python/pyproject.toml) # tensorflow-data-validation # tensorflow-transform # tfx-bsl -tensorflow-serving-api==2.15.1 ; platform_machine != "arm64" \ +tensorflow-serving-api==2.15.1 ; platform_system != "Darwin" \ --hash=sha256:7db02614e57f948f94da3b4cba3e90d19b6acde0d7fb3c0972312da1f0580b65 # via # gigl (python/pyproject.toml) # tfx-bsl -tensorflow-transform==1.14.0 ; platform_machine != "arm64" \ +tensorflow-transform==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:f21bb7dfbced576a5eec77c4ddaa59e74855144b57045e43ac46d78f51524ad1 # via gigl (python/pyproject.toml) termcolor==3.0.1 \ --hash=sha256:a6abd5c6e1284cea2934443ba806e70e5ec8fd2449021be55c280f8a3731b611 \ --hash=sha256:da1ed4ec8a5dc5b2e17476d859febdb3cccb612be1c36e64511a6f2485c10c69 # via tensorflow -tfx-bsl==1.14.0 ; platform_machine != "arm64" \ +tfx-bsl==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:2cf105a8d2190f7a74b61cc52d59a80b8e2d238f07623cb41b55558ba8258b24 \ --hash=sha256:4c016162eeb9247ffb45b88a98b7f15c1d88d0719a200cea89a91fe5315e2afa \ --hash=sha256:585b194482b6e0a4e2643dafc8ff9621c7a85537d5ca2930485ef3806af4fbe5 \ diff --git a/requirements/install_py_deps.sh b/requirements/install_py_deps.sh index 492c43b..2ae778d 100644 --- a/requirements/install_py_deps.sh +++ b/requirements/install_py_deps.sh @@ -8,7 +8,7 @@ PIP_CREDENTIALS_MOUNTED=0 # When running this script in Docker environments, we for arg in "$@" do - case $arg in + case $arg in --dev) DEV=1 shift @@ -83,23 +83,31 @@ fi echo "Installing from ${req_file}" pip install -r $req_file $PIP_ARGS -# Only install GLT if not running on Mac. +# Only install GLT if not running on Mac. if ! is_running_on_mac; then # Without Ninja, we build sequentially which is very slow. echo "Installing Ninja as a build backend..." - sudo apt-get update -y - sudo apt-get install -y ninja-build + # Environments with sudo may require sudo to install ninja-build i.e. certain CI/CD environments. + # Whereas our docker images do not require sudo neither have it; thus this needs to be conditional. + if command -v sudo &> /dev/null; then + sudo apt-get update -y + sudo apt install ninja-build + else + apt-get update -y + apt install ninja-build + fi echo "Installing GraphLearn-Torch" # Occasionally, there is an existing GLT folder, delete it so we can clone. rm -rf graphlearn-for-pytorch - # We upstream some bug fixes recently to GLT which have not been released yet. + # We upstreamed some bug fixes recently to GLT which have not been released yet. + # * https://github.com/alibaba/graphlearn-for-pytorch/pull/154 # * https://github.com/alibaba/graphlearn-for-pytorch/pull/153 # * https://github.com/alibaba/graphlearn-for-pytorch/pull/151 # Thus, checking out a specific commit instead of a tagged version. git clone https://github.com/alibaba/graphlearn-for-pytorch.git \ && cd graphlearn-for-pytorch \ - && git checkout cb61c2734cf43d9b353c30755ccb8bdd678519c1 \ + && git checkout 26fe3d4e050b081bc51a79dc9547f244f5d314da \ && git submodule update --init \ && bash install_dependencies.sh if has_cuda_driver; @@ -118,8 +126,6 @@ else fi - - conda install -c conda-forge gperftools # tcmalloc, ref: https://google.github.io/tcmalloc/overview.html if [[ $DEV -eq 1 ]] diff --git a/requirements/install_scala_deps.sh b/requirements/install_scala_deps.sh index fcbd347..52efdd3 100644 --- a/requirements/install_scala_deps.sh +++ b/requirements/install_scala_deps.sh @@ -19,7 +19,7 @@ then else echo "Setting up Scala Deps for Linux Environment" mkdir -p tools/scala/coursier - curl -fL https://github.com/coursier/coursier/releases/latest/download/cs-x86_64-pc-linux.gz | gzip -d > tools/scala/coursier/cs && chmod +x tools/scala/coursier/cs && tools/scala/coursier/cs setup -y + curl -fL https://github.com/coursier/coursier/releases/download/v2.1.23/cs-x86_64-pc-linux.gz | gzip -d > tools/scala/coursier/cs && chmod +x tools/scala/coursier/cs && tools/scala/coursier/cs setup -y fi source ~/.profile diff --git a/requirements/linux_cpu_requirements_unified.txt b/requirements/linux_cpu_requirements_unified.txt index fff7673..0870bf1 100644 --- a/requirements/linux_cpu_requirements_unified.txt +++ b/requirements/linux_cpu_requirements_unified.txt @@ -2331,7 +2331,7 @@ tensorflow==2.15.1 \ # tensorflow-serving-api # tensorflow-transform # tfx-bsl -tensorflow-data-validation==1.14.0 ; platform_machine != "arm64" \ +tensorflow-data-validation==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:00b29e486c04e18ca255c764f8d788b398daf43c18a9372220c0e6d70f6d1131 \ --hash=sha256:0aa894ee574855beb0bbe4a06032515d6a261d6326958edd67b562c210601acf \ --hash=sha256:148604e3d7263343a4fc9cf591743b8f022b9474f38b0cb46df943072977f2fb \ @@ -2363,26 +2363,26 @@ tensorflow-io-gcs-filesystem==0.37.1 \ --hash=sha256:fe8dcc6d222258a080ac3dfcaaaa347325ce36a7a046277f6b3e19abc1efb3c5 \ --hash=sha256:ffebb6666a7bfc28005f4fbbb111a455b5e7d6cd3b12752b7050863ecb27d5cc # via tensorflow -tensorflow-metadata==1.14.0 ; platform_machine != "arm64" \ +tensorflow-metadata==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:5ff79bf96f98c800fc08270b852663afe7e74d7e1f92b50ba1487bfc63894cdb # via # gigl (python/pyproject.toml) # tensorflow-data-validation # tensorflow-transform # tfx-bsl -tensorflow-serving-api==2.15.1 ; platform_machine != "arm64" \ +tensorflow-serving-api==2.15.1 ; platform_system != "Darwin" \ --hash=sha256:7db02614e57f948f94da3b4cba3e90d19b6acde0d7fb3c0972312da1f0580b65 # via # gigl (python/pyproject.toml) # tfx-bsl -tensorflow-transform==1.14.0 ; platform_machine != "arm64" \ +tensorflow-transform==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:f21bb7dfbced576a5eec77c4ddaa59e74855144b57045e43ac46d78f51524ad1 # via gigl (python/pyproject.toml) termcolor==3.0.1 \ --hash=sha256:a6abd5c6e1284cea2934443ba806e70e5ec8fd2449021be55c280f8a3731b611 \ --hash=sha256:da1ed4ec8a5dc5b2e17476d859febdb3cccb612be1c36e64511a6f2485c10c69 # via tensorflow -tfx-bsl==1.14.0 ; platform_machine != "arm64" \ +tfx-bsl==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:2cf105a8d2190f7a74b61cc52d59a80b8e2d238f07623cb41b55558ba8258b24 \ --hash=sha256:4c016162eeb9247ffb45b88a98b7f15c1d88d0719a200cea89a91fe5315e2afa \ --hash=sha256:585b194482b6e0a4e2643dafc8ff9621c7a85537d5ca2930485ef3806af4fbe5 \ diff --git a/requirements/linux_cuda_requirements_unified.txt b/requirements/linux_cuda_requirements_unified.txt index 5a94832..36e95c1 100644 --- a/requirements/linux_cuda_requirements_unified.txt +++ b/requirements/linux_cuda_requirements_unified.txt @@ -2321,7 +2321,7 @@ tensorflow==2.15.1 \ # tensorflow-serving-api # tensorflow-transform # tfx-bsl -tensorflow-data-validation==1.14.0 ; platform_machine != "arm64" \ +tensorflow-data-validation==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:00b29e486c04e18ca255c764f8d788b398daf43c18a9372220c0e6d70f6d1131 \ --hash=sha256:0aa894ee574855beb0bbe4a06032515d6a261d6326958edd67b562c210601acf \ --hash=sha256:148604e3d7263343a4fc9cf591743b8f022b9474f38b0cb46df943072977f2fb \ @@ -2353,26 +2353,26 @@ tensorflow-io-gcs-filesystem==0.37.1 \ --hash=sha256:fe8dcc6d222258a080ac3dfcaaaa347325ce36a7a046277f6b3e19abc1efb3c5 \ --hash=sha256:ffebb6666a7bfc28005f4fbbb111a455b5e7d6cd3b12752b7050863ecb27d5cc # via tensorflow -tensorflow-metadata==1.14.0 ; platform_machine != "arm64" \ +tensorflow-metadata==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:5ff79bf96f98c800fc08270b852663afe7e74d7e1f92b50ba1487bfc63894cdb # via # gigl (python/pyproject.toml) # tensorflow-data-validation # tensorflow-transform # tfx-bsl -tensorflow-serving-api==2.15.1 ; platform_machine != "arm64" \ +tensorflow-serving-api==2.15.1 ; platform_system != "Darwin" \ --hash=sha256:7db02614e57f948f94da3b4cba3e90d19b6acde0d7fb3c0972312da1f0580b65 # via # gigl (python/pyproject.toml) # tfx-bsl -tensorflow-transform==1.14.0 ; platform_machine != "arm64" \ +tensorflow-transform==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:f21bb7dfbced576a5eec77c4ddaa59e74855144b57045e43ac46d78f51524ad1 # via gigl (python/pyproject.toml) termcolor==3.0.1 \ --hash=sha256:a6abd5c6e1284cea2934443ba806e70e5ec8fd2449021be55c280f8a3731b611 \ --hash=sha256:da1ed4ec8a5dc5b2e17476d859febdb3cccb612be1c36e64511a6f2485c10c69 # via tensorflow -tfx-bsl==1.14.0 ; platform_machine != "arm64" \ +tfx-bsl==1.14.0 ; platform_system != "Darwin" \ --hash=sha256:2cf105a8d2190f7a74b61cc52d59a80b8e2d238f07623cb41b55558ba8258b24 \ --hash=sha256:4c016162eeb9247ffb45b88a98b7f15c1d88d0719a200cea89a91fe5315e2afa \ --hash=sha256:585b194482b6e0a4e2643dafc8ff9621c7a85537d5ca2930485ef3806af4fbe5 \ diff --git a/scala/README.md b/scala/README.md index ce73a5b..5ffed60 100644 --- a/scala/README.md +++ b/scala/README.md @@ -2,37 +2,42 @@ ### Building and Testing the project -Run `make install_deps` first if you haven't to install scala & spark. -cd inside the ```scala``` directory. -To compile all projects in the repo and generate the jar files, run: +Run `make install_deps` first if you haven't to install scala & spark. cd inside the `scala` directory. To compile all +projects in the repo and generate the jar files, run: + ``` sbt assembly ``` To assemble a specific project: + ``` sbt subgraph_sampler/assembly ``` To run all tests: + ``` sbt test ``` Similarly to run a specific test suite: + ``` sbt subgraph_sampler/test ``` To clean up all target files: + ``` make clean_build_files ``` ### Running spark jobs locally -Please check the Makefile for commands to run the spark jobs locally. -The jobs makes use of the mocked assets from the directory +Please check the Makefile for commands to run the spark jobs locally. The jobs makes use of the mocked assets from the +directory + ``` common/src/test/assets ``` @@ -40,22 +45,28 @@ common/src/test/assets #### Set log level To silence the worker logs -1. Create log4j.properties file from template, under `/scala` dir, do `cp ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties.template ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties` -2. Update the first line in `log4j.properties` to `log4j.rootCategory=WARN, console` +1. Create log4j.properties file from template, under `/scala` dir, do + `cp ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties.template ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties` +1. Update the first line in `log4j.properties` to `log4j.rootCategory=WARN, console` Note: Mocked assets are generated using the dataset asset mocking suite (in `python/gigl/src/mocking/`) ### How to build and deploy spark-tfrecord package used in the Spark Jobs -Note: remember to have local deps for developing installed by running `make install_deps`. See main README.md for more details. -We make use of the Spark [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) Connector provided by the Linkedin repo [linkedin/spark-tfrecord](https://github.com/linkedin/spark-tfrecord). We deploy and maintain our own copies off the jar since not all sbt/scala vers are available on Maven Central, etc. +Note: remember to have local deps for developing installed by running `make install_deps`. See main README.md for more +details. + +We make use of the Spark [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) Connector provided by the +Linkedin repo [linkedin/spark-tfrecord](https://github.com/linkedin/spark-tfrecord). We deploy and maintain our own +copies off the jar since not all sbt/scala vers are available on Maven Central, etc. To build: -First clone the repo, then cd into directory. +First clone the repo, then cd into directory. Install maven if not already installed: + ``` Linux: sudo apt-get install maven @@ -63,12 +74,15 @@ sudo apt-get install maven OSX: brew install maven ``` + Build with maven (specific scala and spark versions can be found in `build.sbt` file in our repo) + ``` mvn -Pscala-2.12 clean install -Dspark.version=3.2.0 ``` Copy to GCS and deploy: + ``` gsutil cp target/spark-tfrecord_2.12-0.5.0.jar gs://$YOUR_BUCKET/your/path/to/snap-spark-custom-tfrecord_2.12-{version_number}.jar ``` diff --git a/scala/common/src/test/assets/resource_config.yaml b/scala/common/src/test/assets/resource_config.yaml index bd72115..5d0979b 100644 --- a/scala/common/src/test/assets/resource_config.yaml +++ b/scala/common/src/test/assets/resource_config.yaml @@ -8,9 +8,10 @@ shared_resource_config: temp_assets_bucket: "gs://gigl-cicd-temp" temp_regional_assets_bucket: "gs://gigl-cicd-temp" perm_assets_bucket: "gs://gigl-cicd-temp" # For testing, we don't persist models, configs, etc. - they should expire w/ TTL - temp_assets_bq_dataset_name: "TEST ASSET PLACEHOLDER" - embedding_bq_dataset_name: "TEST ASSET PLACEHOLDER" - gcp_service_account_email: "untrusted-external-github-gigl@external-snap-ci-github-gigl.iam.gserviceaccount.com" + # For testing we do not persist to bq and all tests are run locally so no gcp service account is needed + temp_assets_bq_dataset_name: "FAKE DATASET NAME 1" + embedding_bq_dataset_name: "FAKE DATASET NAME 2" + gcp_service_account_email: "FAKE SA" dataflow_runner: "DirectRunner" preprocessor_config: edge_preprocessor_config: diff --git a/scala/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml b/scala/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml index 4267a32..02a166c 100755 --- a/scala/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml +++ b/scala/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml @@ -1,8 +1,8 @@ datasetConfig: dataPreprocessorConfig: dataPreprocessorArgs: - bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_friend_user_edges - bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_nodes + bq_edges_table_name: "FAKE DATASET NAME 1" + bq_nodes_table_name: "FAKE DATASET NAME 2" dataPreprocessorConfigClsPath: this.is.non.existent.test.path.1 subgraphSamplerConfig: # numMaxTrainingSamplesToOutput: 10 diff --git a/scala/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml b/scala/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml index ccb6474..404c5b0 100755 --- a/scala/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml +++ b/scala/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml @@ -1,8 +1,8 @@ datasetConfig: dataPreprocessorConfig: dataPreprocessorArgs: - bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_friend_user_edges - bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_nodes + bq_edges_table_name: "FAKE DATASET NAME 1" + bq_nodes_table_name: "FAKE DATASET NAME 2" dataPreprocessorConfigClsPath: this.is.non.existent.test.path.1 subgraphSamplerConfig: # numMaxTrainingSamplesToOutput: 10 diff --git a/scala/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml b/scala/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml index 47321dc..cb1587a 100755 --- a/scala/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml +++ b/scala/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml @@ -12,8 +12,8 @@ taskMetadata: datasetConfig: dataPreprocessorConfig: dataPreprocessorArgs: - bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_supervised_node_classification_user_friend_user_edges - bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_supervised_node_classification_user_nodes + bq_edges_table_name: external-snap-ci-github-gigl.public_gigl.toy_graph_homogeneous_supervised_node_classification_user_friend_user_edges + bq_nodes_table_name: external-snap-ci-github-gigl.public_gigl.toy_graph_homogeneous_supervised_node_classification_user_nodes dataPreprocessorConfigClsPath: this.is.non.existent.test.path.1 subgraphSamplerConfig: numHops: 2 diff --git a/scala/split_generator/README.md b/scala/split_generator/README.md index 854a74b..e57be3b 100644 --- a/scala/split_generator/README.md +++ b/scala/split_generator/README.md @@ -1,44 +1,55 @@ ## Split Generator ---- + +______________________________________________________________________ + ### Steps: - 1. Read the assigner and split strategy classpaths from the config. - 2. Read the SGS output from GCS using the TFRecord Spark Connector (more details on that below) - 3. Coalesce the read dataframe. This is to increase the CPU utilization of the Spark job (no shuffle cost incurred) - 4. Cache this dataframe as we will require this to compute all the 3 (train/test/val) splits. If we do not cache this Spark tries to read the input from GCS 3 times. We can skip caching for inference only jobs as we only want to compute the test split. - 5. Convert the Array of Bytes Dataset to the proto Dataset. - 6. Using dataset.map() we map every sample to the output sample of the respective split using the SplitStrategy class(which does not take any Spark dependency). We do this for every split. - 7. The output dataset is converted back to a dataset of array of bytes which is written to GCS. + +``` +1. Read the assigner and split strategy classpaths from the config. +2. Read the SGS output from GCS using the TFRecord Spark Connector (more details on that below) +3. Coalesce the read dataframe. This is to increase the CPU utilization of the Spark job (no shuffle cost incurred) +4. Cache this dataframe as we will require this to compute all the 3 (train/test/val) splits. If we do not cache this Spark tries to read the input from GCS 3 times. We can skip caching for inference only jobs as we only want to compute the test split. +5. Convert the Array of Bytes Dataset to the proto Dataset. +6. Using dataset.map() we map every sample to the output sample of the respective split using the SplitStrategy class(which does not take any Spark dependency). We do this for every split. +7. The output dataset is converted back to a dataset of array of bytes which is written to GCS. +``` ### SplitGen Arguments: -* **train/test/val ratio** - Ratios of the respective split. We use the ```scala.util.hashing.MurmurHash3``` library to hash the edges/nodes to the splits. Murmur hash is independent of the machine it runs on and hence produces the same split irrespective of the underlying machine. -* **should_split_edges_symmetrically** +- **train/test/val ratio** - flag to indicate whether bidirectional edges should be assigned to the same split. That is if set to true a->b and b->a will belong to the same split. Typically set false for directed graphs. + Ratios of the respective split. We use the `scala.util.hashing.MurmurHash3` library to hash the edges/nodes to the + splits. Murmur hash is independent of the machine it runs on and hence produces the same split irrespective of the + underlying machine. -* **is_disjoint_mode** - flag to indicate if the training is done in disjoint mode or not. In non-disjoint mode, both supervision and message passing edges are treated the same for the train split. In disjoint mode, they are treated separately. +- **should_split_edges_symmetrically** -* **assignerClsPth** - class path to the assginer that is used for assigning the nodes/edges to a respective split. New assiners must inplement the abstract class ```Assigner``` + flag to indicate whether bidirectional edges should be assigned to the same split. That is if set to true a->b and + b->a will belong to the same split. Typically set false for directed graphs. -* **splitStrategyClsPth** - class path to the Split Strategy being used for the job. New implmentations must inplement the abstract class ```SplitStrategy``` - +- **is_disjoint_mode** flag to indicate if the training is done in disjoint mode or not. In non-disjoint mode, both + supervision and message passing edges are treated the same for the train split. In disjoint mode, they are treated + separately. +- **assignerClsPth** class path to the assginer that is used for assigning the nodes/edges to a respective split. New + assiners must inplement the abstract class `Assigner` + +- **splitStrategyClsPth** class path to the Split Strategy being used for the job. New implmentations must inplement the + abstract class `SplitStrategy` ### Implementation: -- Leverages Dataset.map() and uses custom Scala code to convert an input sample to 3 output samples for each train/test/val split. +- Leverages Dataset.map() and uses custom Scala code to convert an input sample to 3 output samples for each + train/test/val split. - Relies on caching and partition coalesce for performance optimization. - Can be run on mac, gCloud VMs and Dataproc VMs. (cmds are available in Makefile) - ## Spark Optimizations / Important points to consider -* The current spark plan only contains one Deserialization and Serialization step. It is important that we keep this behaviour as it directly affects the performance of the job. -* We use a coalesce factor to decrease the input partitions and in turn increase the CPU utilization. Currently this is hardcoded in the code to 12 (for main samples) and 4 (for rooted node neighborhood samples) as this was fastest. If the number of input partiotions is changed by SGS job, we can reconsider these numbers. - +- The current spark plan only contains one Deserialization and Serialization step. It is important that we keep this + behaviour as it directly affects the performance of the job. +- We use a coalesce factor to decrease the input partitions and in turn increase the CPU utilization. Currently this is + hardcoded in the code to 12 (for main samples) and 4 (for rooted node neighborhood samples) as this was fastest. If + the number of input partiotions is changed by SGS job, we can reconsider these numbers. diff --git a/scala/subgraph_sampler/README.md b/scala/subgraph_sampler/README.md index 338a1a2..10365f3 100644 --- a/scala/subgraph_sampler/README.md +++ b/scala/subgraph_sampler/README.md @@ -1,70 +1,99 @@ ## Notes for developers + 1. [SGS as of now](#sgs) -2. [Scalability](#scalability) -3. [Resources](#resources) +1. [Scalability](#scalability) +1. [Resources](#resources) ## SGS ---- + +______________________________________________________________________ + ### Steps: - 1. Load node/edge TFRecord resources into Spark DataFrame - 2. Sample 1hop neighbors (only node ids) - 3. Sample 2hop neighbors (only node ids) - 4. Hydrate kth hop neighbors with both node and edge features/type - 5. Merge 1hop and 2hop hydrated neighbors - 6. Merge hydrated root node to step 5, and create subgraphDF - 7. Append isolated nodes (if any) to subgraphDF [RootedNodeNeighnorhood] - 8. Add task-relevent samples to subgraphDF (such as positive node neighborshoods or node labels) to create trainingSubgraphDF - 9. (If specified) append isolated nodes to trainingSubgraphDF [SupervisedNodeClassificationSample,NodeAnchorBasedLinkPredictionSample ] - 10. Modify subgraphDF and trainingSubgraphDF schema to compatible structure as defined in `training_samples_schema.proto`. - 11. Convert DataFrames from step 10 to DataSet and map DataSet rows to ByteArray - 12. Write serialized DataSet to TFRecord + +``` +1. Load node/edge TFRecord resources into Spark DataFrame +2. Sample 1hop neighbors (only node ids) +3. Sample 2hop neighbors (only node ids) +4. Hydrate kth hop neighbors with both node and edge features/type +5. Merge 1hop and 2hop hydrated neighbors +6. Merge hydrated root node to step 5, and create subgraphDF +7. Append isolated nodes (if any) to subgraphDF [RootedNodeNeighnorhood] +8. Add task-relevent samples to subgraphDF (such as positive node neighborshoods or node labels) to create trainingSubgraphDF +9. (If specified) append isolated nodes to trainingSubgraphDF [SupervisedNodeClassificationSample,NodeAnchorBasedLinkPredictionSample ] +10. Modify subgraphDF and trainingSubgraphDF schema to compatible structure as defined in `training_samples_schema.proto`. +11. Convert DataFrames from step 10 to DataSet and map DataSet rows to ByteArray +12. Write serialized DataSet to TFRecord +``` ### Properties: -* **Isolated nodes** - are always included in RootedNodeNeighnorhood samples. They can be included in training samples if the assigned flag for isolated nodes is set to true. -* **Self loops** +- **Isolated nodes** - are removed in preprocessor. They can be added in trainer if users wish to, but SGS is not concerned with self loops. + are always included in RootedNodeNeighnorhood samples. They can be included in training samples if the assigned flag + for isolated nodes is set to true. -* **Num hops** +- **Self loops** - is hard-coded to k=2. But the code is extendable to k>2, only Spark optimizations are bottle-neck since as the number of hops grow neighborhood size grows exponentially. + are removed in preprocessor. They can be added in trainer if users wish to, but SGS is not concerned with self loops. -* **Graph type** +- **Num hops** - only supports homogeneous graphs. + is hard-coded to k=2. But the code is extendable to k>2, only Spark optimizations are bottle-neck since as the number + of hops grow neighborhood size grows exponentially. - supports both undirected and directed graphs. +- **Graph type** -* **Neighborhood sampling** + only supports homogeneous graphs. - uniform sampling. + supports both undirected and directed graphs. - - note that there are two implementations for uniform sampling: - 1. non-deterministic (using built-in Spark functions), which is the default mode of sampling in SGS - 2. deterministic (using hash based permutation). To enable it, set - ``` - subgraphSamplerConfigs: - experimetalFlags: - permutation_strategy: deterministic - ``` - for more info on why there are two implementations, check [this](https://docs.google.com/document/d/1TeJYu9bVFu463Pjfsv7UFtBxu9KZBH8eAH_SyV8vkAM/edit) doc +- **Neighborhood sampling** -* **numTrainingSamples** + uniform sampling. - If users wish to downsample number of nodes used for training (not in inferencer), they can adjut this number as below + - note that there are two implementations for uniform sampling: + 1. non-deterministic (using built-in Spark functions), which is the default mode of sampling in SGS + 1. deterministic (using hash based permutation). To enable it, set ``` subgraphSamplerConfigs: - numTrainingSamples : 10000000 + experimetalFlags: + permutation_strategy: deterministic ``` - NOTE that for NodeAnchorBasedLinkPrediction samples the value set for numTrainingSamples will be further reduced in the next component i.e. Split Generator by a factor of `train_split`. Because if in a train split for an anchor node if at least a positve edge does not exit, that anchor node is dropped from train split. + for more info on why there are two implementations, check + [this](https://docs.google.com/document/d/1TeJYu9bVFu463Pjfsv7UFtBxu9KZBH8eAH_SyV8vkAM/edit) doc + + - experimetalFlags can also be set to sample neighbors with or without replacement. To enable sampling with + replacement, set + + ``` + subgraphSamplerConfigs: + experimetalFlags: + sample_with_replacement: true + ``` + +- **numTrainingSamples** + + If users wish to downsample number of nodes used for training (not in inferencer), they can adjut this number as below + + ``` + subgraphSamplerConfigs: + numTrainingSamples : 10000000 + ``` + + NOTE that for NodeAnchorBasedLinkPrediction samples the value set for numTrainingSamples will be further reduced in + the next component i.e. Split Generator by a factor of `train_split`. Because if in a train split for an anchor node + if at least a positve edge does not exit, that anchor node is dropped from train split. + +- **Valid Samples** (for NodeAnchorBasedLinkPrediction Task) -* **Valid Samples** (for NodeAnchorBasedLinkPrediction Task) - 1. In random_negative_rooted_neighborhood_samples: every node which appears in the graph should get an embedding. Hence, they must appear in the rooted neighborhoods for inferencer to use, regardless of what their (in-)neighborhood looks like. + 1. In random_negative_rooted_neighborhood_samples: every node which appears in the graph should get an embedding. + Hence, they must appear in the rooted neighborhoods for inferencer to use, regardless of what their + (in-)neighborhood looks like. - 2. In node_anchor_based_link_prediction_samples: every node which has any outgoing edge could be a valid training sample, since in practice we will want to have our trained model robustly perform well at ranking the positive edge above negative edges, regardless of what their (in-)neighborhood looks like. + 1. In node_anchor_based_link_prediction_samples: every node which has any outgoing edge could be a valid training + sample, since in practice we will want to have our trained model robustly perform well at ranking the positive edge + above negative edges, regardless of what their (in-)neighborhood looks like. ### Implementation: @@ -76,48 +105,70 @@ - Can be run on mac, gCloud VMs and Dataproc VMs. (cmds are available in Makefile) - ## Scalability ---- + +______________________________________________________________________ ### Performance -Time: It is not expected to have SGS task running more than 3hrs. If that's happening the cluster size is too small, there are spills during some stages or caching steps implemented in code are not taking into effect. + +Time: It is not expected to have SGS task running more than 3hrs. If that's happening the cluster size is too small, +there are spills during some stages or caching steps implemented in code are not taking into effect. Cost: See Google Cloud [pricing calculator](https://cloud.google.com/products/calculator/#id=) ### Load + As any of below factor increases we should think of strategies to scale the SGS job: + 1. Graph size (number of nodes and edges) -2. Number of neighborhood samples and Number of Positive Samples (if any) -3. Node feature Dim -4. Edge feature Dim -5. Number of hops +1. Number of neighborhood samples and Number of Positive Samples (if any) +1. Node feature Dim +1. Edge feature Dim +1. Number of hops ### Spark Optimization/Scaling strategies -* The easiest way is to scale the cluster horizontally. As of now the number of data partitions (i.e. spark sql partitions), is proportional to cluster size, such that cpu usage is at max (>90%) to avoid waste of resources. Scaling out number of workers or changing the machine types should be done carefully. If we pick cluster that is too big for the task it will be to the job's disadvantage. (unnecessary large number of data partitions leads to slow reads, large network communications and long shuffle time) -* Use Local SSDs (NVMe interface) attached to each worker to store cached data. Do not cache data into working memory, since it intensifies spills. For final stages of SGS job (pointed out in the code), spills are inevitable but the spills are into SSDs and hence won't hurt performance badly). -* Each SGS task firstly creates a SubgraphDF (which includes all hydrated neighborhoods for all root nodes). This SubgraphDF must be cached and cache is NOT triggered unless the RootedNodeNeighborhood samples are written. Then downstream tasks do not repeat neighborhood sampling/hydration and in turn use cached data which saves at least one hr as of now for MAU data. -* Avoid using UDFs, instead maximize leveraging spark.sql [functions](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html). -* There are stages that require repartitioning as optimization strategy. That is because the size of data in each partition grows (we see spills) and by repartitioning we reduce the size of data in each partition. You can find stages that need repartition from Spark UI. -* Do NOT change YARN default parameters for Dataproc Cluster. -* If cost become a bottleneck, (and despite trying above strategies we still need to scale) Autoscaling Dataproc with customized autoscaling policy should be a potential solution. +- The easiest way is to scale the cluster horizontally. As of now the number of data partitions (i.e. spark sql + partitions), is proportional to cluster size, such that cpu usage is at max (>90%) to avoid waste of resources. + Scaling out number of workers or changing the machine types should be done carefully. If we pick cluster that is too + big for the task it will be to the job's disadvantage. (unnecessary large number of data partitions leads to slow + reads, large network communications and long shuffle time) +- Use Local SSDs (NVMe interface) attached to each worker to store cached data. Do not cache data into working memory, + since it intensifies spills. For final stages of SGS job (pointed out in the code), spills are inevitable but the + spills are into SSDs and hence won't hurt performance badly). +- Each SGS task firstly creates a SubgraphDF (which includes all hydrated neighborhoods for all root nodes). This + SubgraphDF must be cached and cache is NOT triggered unless the RootedNodeNeighborhood samples are written. Then + downstream tasks do not repeat neighborhood sampling/hydration and in turn use cached data which saves at least one hr + as of now for MAU data. +- Avoid using UDFs, instead maximize leveraging spark.sql + [functions](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html). +- There are stages that require repartitioning as optimization strategy. That is because the size of data in each + partition grows (we see spills) and by repartitioning we reduce the size of data in each partition. You can find + stages that need repartition from Spark UI. +- Do NOT change YARN default parameters for Dataproc Cluster. +- If cost become a bottleneck, (and despite trying above strategies we still need to scale) Autoscaling Dataproc with + customized autoscaling policy should be a potential solution. ## Resources ---- + +______________________________________________________________________ + Naming Conventions -* [Scala](https://docs.scala-lang.org/style/naming-conventions.html) +- [Scala](https://docs.scala-lang.org/style/naming-conventions.html) -* [Spark](https://github.com/databricks/scala-style-guide) +- [Spark](https://github.com/databricks/scala-style-guide) Spark -* [SQL](https://spark.apache.org/docs/latest/sql-ref-functions.html) -* [DataFrame](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html) + +- [SQL](https://spark.apache.org/docs/latest/sql-ref-functions.html) +- [DataFrame](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html) Protobuf and Spark -* [ScalaPB](https://scalapb.github.io/docs/sparksql/) -Optimization -* The version of Spark matters. -* https://medium.com/@vrba.dave +- [ScalaPB](https://scalapb.github.io/docs/sparksql/) + +Optimization + +- The version of Spark matters. +- https://medium.com/@vrba.dave diff --git a/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/NodeAnchorBasedLinkPredictionTask.scala b/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/NodeAnchorBasedLinkPredictionTask.scala index 979cc6d..505ea34 100644 --- a/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/NodeAnchorBasedLinkPredictionTask.scala +++ b/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/NodeAnchorBasedLinkPredictionTask.scala @@ -37,6 +37,9 @@ class NodeAnchorBasedLinkPredictionTask( val experimentalFlags = gbmlConfigWrapper.subgraphSamplerConfigPb.experimentalFlags val permutationStrategy = experimentalFlags.getOrElse("permutation_strategy", PermutationStrategy.NonDeterministic) + val sampleWithReplacement: Boolean = + experimentalFlags.getOrElse("sample_with_replacement", "false").toBoolean + println(f"Sample with replacement: ${sampleWithReplacement}") val numNeighborsToSample = gbmlConfigWrapper.subgraphSamplerConfigPb.numNeighborsToSample val includeIsolatedNodesInTrainingSamples = false // must be added to subgraphSamplerConfig in gbml config [As of now, SGS v2 with Spark, isolated nodes are NOT included in training samples ] @@ -62,6 +65,7 @@ class NodeAnchorBasedLinkPredictionTask( hydratedEdgeVIEW = hydratedEdgeVIEW, unhydratedEdgeVIEW = unhydratedEdgeVIEW, permutationStrategy = permutationStrategy, + sampleWithReplacement = sampleWithReplacement, ) // does not include isolated nodes and we must cache this DF // @spark: caching the DF associated with subgraphVIEW is CRITICAL, substantially reduces job time. Cache is // triggered once the action (i.e. *write* RootedNodeNeighborhoodSample to GCS) is finished. diff --git a/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/SGSPureSparkV1Task.scala b/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/SGSPureSparkV1Task.scala index 03a2c07..92c51cc 100644 --- a/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/SGSPureSparkV1Task.scala +++ b/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/SGSPureSparkV1Task.scala @@ -12,6 +12,8 @@ import libs.task.SamplingStrategy.shuffleBasedUniformPermutation import libs.task.SubgraphSamplerTask import org.apache.spark.sql.Column import org.apache.spark.sql.DataFrame +import org.apache.spark.sql.expressions.UserDefinedFunction +import org.apache.spark.sql.functions.udf import org.apache.spark.sql.types.IntegerType import org.apache.spark.sql.{functions => F} import org.apache.spark.storage.StorageLevel @@ -19,6 +21,7 @@ import snapchat.research.gbml.preprocessed_metadata.PreprocessedMetadata import java.util.UUID.randomUUID import scala.collection.mutable.ListBuffer +import scala.util.Random abstract class SGSPureSparkV1Task( gbmlConfigWrapper: GbmlConfigPbWrapper) @@ -36,6 +39,16 @@ abstract class SGSPureSparkV1Task( val uniqueTempViewSuffix: String = generateUniqueSuffix + val sampleWithReplacementUDF: UserDefinedFunction = udf((array: Seq[Int], numSamples: Int) => { + if (array == null || array.isEmpty) { + Seq.empty[Int] + } else { + val random = new Random() + (1 to numSamples).map(_ => array(random.nextInt(array.length))) + } + }) + spark.udf.register("sampleWithReplacementUDF", sampleWithReplacementUDF) + def loadNodeDataframeIntoSparkSql(condensedNodeType: Int): String = { /** For a given condensed_node_type, loads the node feature/type dataframe with columns: @@ -63,8 +76,6 @@ abstract class SGSPureSparkV1Task( // node_id is long. This casting makes datatype compatible with protobufs // We can remove this casting step, by changing type in graph_schema.proto // from uint32 to uint64 in the future. - // As of now making such fundamental change to protobuf message may mess up people's usage of mocked data. - // Discussed with @nshah. val preprocessedNodeVIEW: String = "preprocessedNodeDF" + uniqueTempViewSuffix preprocessedNodeDF.createOrReplaceTempView( @@ -303,6 +314,7 @@ abstract class SGSPureSparkV1Task( numNeighborsToSample: Int, unhydratedEdgeVIEW: String, permutationStrategy: String, + sampleWithReplacement: Boolean = false, ): String = { /** 1. for each dst node, take all in-edges as onehop array 2. randomly shuffle onehop array @@ -340,13 +352,27 @@ abstract class SGSPureSparkV1Task( val permutedOnehopArrayVIEW = "permutedOnehopArrayDF" + uniqueTempViewSuffix permutedOnehopArrayDF.createOrReplaceTempView(permutedOnehopArrayVIEW) - val sampledOnehopDF: DataFrame = spark.sql(f""" - SELECT - _dst_node AS _0_hop, - slice(_shuffled_1_hop_arr, 1, ${numNeighborsToSample}) AS _sampled_1_hop_arr - FROM - ${permutedOnehopArrayVIEW} - """) + val sampledOnehopDF: DataFrame = if (sampleWithReplacement) { + spark.sql( + f""" + SELECT + _dst_node AS _0_hop, + sampleWithReplacementUDF(_shuffled_1_hop_arr, ${numNeighborsToSample}) AS _sampled_1_hop_arr + FROM + ${permutedOnehopArrayVIEW} + """, + ) + } else { + spark.sql( + f""" + SELECT + _dst_node AS _0_hop, + slice(_shuffled_1_hop_arr, 1, ${numNeighborsToSample}) AS _sampled_1_hop_arr + FROM + ${permutedOnehopArrayVIEW} + """, + ) + } // @spark: critical NOTE: cache is necessary here, to break parallelism and enforce random shuffle/sampling happens once, to circumvent NON-determinism in F.shuffle. Otherwise, for all calls to sampledOnehopDF downstream, this stage will run in parallel and mess up onehop samples! // @spark: maybe in future we wanna try caching the exploded verison of below DF. @@ -366,6 +392,7 @@ abstract class SGSPureSparkV1Task( unhydratedEdgeVIEW: String, sampledOnehopVIEW: String, permutationStrategy: String, + sampleWithReplacement: Boolean = false, ): String = { /** 1. uses onehop nodes in sampledOnehopVIEW as reference to obtain twohop neighbors for each @@ -432,14 +459,29 @@ abstract class SGSPureSparkV1Task( val permutedTwohopArrayVIEW = "permutedTwohopArrayDF" + uniqueTempViewSuffix permutedTwohopArrayDF.createOrReplaceTempView(permutedTwohopArrayVIEW) // @spark: Since twohop df is called only once, no need to take care of NON determinism in the shuffle/sampling (for k hop any k-1 df must be cached) - val sampledTwohopDF: DataFrame = spark.sql(f""" + val sampledTwohopDF: DataFrame = if (sampleWithReplacement) { + spark.sql( + f""" SELECT _0_hop, _1_hop, - slice(_shuffled_2_hop_arr ,1 , ${numNeighborsToSample}) AS _sampled_2_hop_arr + sampleWithReplacementUDF(_shuffled_2_hop_arr, ${numNeighborsToSample}) AS _sampled_2_hop_arr FROM ${permutedTwohopArrayVIEW} - """) + """, + ) + } else { + spark.sql( + f""" + SELECT + _0_hop, + _1_hop, + slice(_shuffled_2_hop_arr, 1, ${numNeighborsToSample}) AS _sampled_2_hop_arr + FROM + ${permutedTwohopArrayVIEW} + """, + ) + } val sampledTwohopVIEW = "sampledTwohopDF" + uniqueTempViewSuffix sampledTwohopDF.createOrReplaceTempView(sampledTwohopVIEW) // @@ -632,6 +674,7 @@ abstract class SGSPureSparkV1Task( unhydratedEdgeVIEW: String, numNeighborsToSample: Int, permutationStrategy: String, + sampleWithReplacement: Boolean = false, ): String = { /** Adds root node features to hydrated neighborhood and creates final subgraphDF for each root @@ -651,12 +694,14 @@ abstract class SGSPureSparkV1Task( numNeighborsToSample = numNeighborsToSample, unhydratedEdgeVIEW = unhydratedEdgeVIEW, permutationStrategy = permutationStrategy, + sampleWithReplacement = sampleWithReplacement, ) val sampledTwohopVIEW = sampleTwohopSrcNodesUniformly( numNeighborsToSample = numNeighborsToSample, unhydratedEdgeVIEW = unhydratedEdgeVIEW, sampledOnehopVIEW = sampledOnehopVIEW, permutationStrategy = permutationStrategy, + sampleWithReplacement = sampleWithReplacement, ) // hydrate onehop neighbors val hydratedOnehopNeighborsVIEW = createKthHydratedNeighborhood( diff --git a/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/UserDefinedLabelsNodeAnchorBasedLinkPredictionTask.scala b/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/UserDefinedLabelsNodeAnchorBasedLinkPredictionTask.scala index d3017b6..12298b1 100644 --- a/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/UserDefinedLabelsNodeAnchorBasedLinkPredictionTask.scala +++ b/scala/subgraph_sampler/src/main/scala/libs/task/pureSpark/UserDefinedLabelsNodeAnchorBasedLinkPredictionTask.scala @@ -63,6 +63,9 @@ class UserDefinedLabelsNodeAnchorBasedLinkPredictionTask( val experimentalFlags = gbmlConfigWrapper.subgraphSamplerConfigPb.experimentalFlags val permutationStrategy = experimentalFlags.getOrElse("permutation_strategy", PermutationStrategy.NonDeterministic) + val sampleWithReplacement: Boolean = + experimentalFlags.getOrElse("sample_with_replacement", "false").toBoolean + println(f"Sample with replacement: ${sampleWithReplacement}") val numNeighborsToSample = gbmlConfigWrapper.subgraphSamplerConfigPb.numNeighborsToSample val includeIsolatedNodesInTrainingSamples = false // must be added to subgraphSamplerConfig in gbml config [As of now, SGS v2 with Spark, isolated nodes are NOT included in training samples ] @@ -117,6 +120,7 @@ class UserDefinedLabelsNodeAnchorBasedLinkPredictionTask( hydratedEdgeVIEW = hydratedEdgeVIEW, unhydratedEdgeVIEW = unhydratedEdgeVIEW, permutationStrategy = permutationStrategy, + sampleWithReplacement = sampleWithReplacement, ) // does not include isolated nodes and we must cache this DF // @spark: caching the DF associated with subgraphVIEW is CRITICAL, substantially reduces job time. Cache is // triggered once the action (i.e. *write* RootedNodeNeighborhoodSample to GCS) is finished. diff --git a/scala/subgraph_sampler/src/test/scala/SGSPureSparkV1TaskTest.scala b/scala/subgraph_sampler/src/test/scala/SGSPureSparkV1TaskTest.scala index 1cc7f2b..3b339ec 100644 --- a/scala/subgraph_sampler/src/test/scala/SGSPureSparkV1TaskTest.scala +++ b/scala/subgraph_sampler/src/test/scala/SGSPureSparkV1TaskTest.scala @@ -210,7 +210,32 @@ class SGSPureSparkV1TaskTest extends AnyFunSuite with BeforeAndAfterAll with Sha .getSeq[Integer](0) nodeList should contain allElementsOf randomSamplesList assert(randomSamplesList.length == numNeighborsToSample) + } + test("Onehop samples with replacement are valid.") { + val uniqueTestViewSuffix = "_" + randomUUID.toString.replace("-", "_") + val unhydratedEdgeDF = mockUnhydratedEdgeForCurrentTest + val unhydratedEdgeVIEW = "unhydratedEdgeDF" + uniqueTestViewSuffix + unhydratedEdgeDF.createOrReplaceTempView(unhydratedEdgeVIEW) + var numNeighborsToSample = 10 + val sampledOnehopVIEW = + sgsTask.sampleOnehopSrcNodesUniformly( + numNeighborsToSample = numNeighborsToSample, + unhydratedEdgeVIEW = unhydratedEdgeVIEW, + permutationStrategy = "non-deterministic", + sampleWithReplacement = true, + ) + val sampledOnehopDF = sparkTest.table(sampledOnehopVIEW) + // must choose a nodeId st number of its in-edges is > numNeighborsToSample [to test randomness] + var nodeId = 0 + val nodeList = Seq(1, 2, 3, 4, 5, 6, 7, 8) + val randomSamplesList = sampledOnehopDF + .filter(F.col("_0_hop") === nodeId) + .select("_sampled_1_hop_arr") + .first + .getSeq[Integer](0) + nodeList should contain allElementsOf randomSamplesList + assert(randomSamplesList.length == numNeighborsToSample) } test("Twohop samples are valid.") { @@ -245,6 +270,39 @@ class SGSPureSparkV1TaskTest extends AnyFunSuite with BeforeAndAfterAll with Sha assert(randomTwohopSamplesList.length == numNeighborsToSample) } + test("Twohop samples with replacement are valid.") { + val uniqueTestViewSuffix = "_" + randomUUID.toString.replace("-", "_") + val unhydratedEdgeDF = mockUnhydratedEdgeForCurrentTest + val unhydratedEdgeVIEW = "unhydratedEdgeDF" + uniqueTestViewSuffix + unhydratedEdgeDF.createOrReplaceTempView(unhydratedEdgeVIEW) + val onehopData = Seq((1, Seq(3, 0, 2)), (3, Seq(0, 1, 1)), (2, Seq(0, 1, 0)), (0, Seq(1, 3, 2))) + val sampledOnehopDF = onehopData.toDF("_0_hop", "_sampled_1_hop_arr") + val sampledOnehopVIEW = "sampledOnehopDF" + uniqueTestViewSuffix + sampledOnehopDF.createOrReplaceTempView(sampledOnehopVIEW) + var numNeighborsToSample = 10 + + val sampledTwohopVIEW = sgsTask.sampleTwohopSrcNodesUniformly( + numNeighborsToSample = numNeighborsToSample, + unhydratedEdgeVIEW = unhydratedEdgeVIEW, + sampledOnehopVIEW = sampledOnehopVIEW, + permutationStrategy = "non-deterministic", + sampleWithReplacement = true, + ) + val sampledTwohopDF = sparkTest.table(sampledTwohopVIEW) + // must choose a nodeId st number of its in-edges is <= numNeighborsToSample [no randomness] + var zerohopId = 1 + // must choose a nodeId st number of its in-edges is > numNeighborsToSample [to test randomness] + var onehopId = 0 + var twohopList = Seq(1, 2, 3, 4, 5, 6, 7, 8) + val randomTwohopSamplesList = sampledTwohopDF + .filter(F.col("_0_hop") === zerohopId && F.col("_1_hop") === onehopId) + .select("_sampled_2_hop_arr") + .first + .getSeq[Integer](0) + twohopList should contain allElementsOf randomTwohopSamplesList + assert(randomTwohopSamplesList.length == numNeighborsToSample) + } + test("Hydrated kth hop nodes have right col and node ids") { val uniqueTestViewSuffix = "_" + randomUUID.toString.replace("-", "_") val edgeData = Seq( @@ -521,4 +579,30 @@ class SGSPureSparkV1TaskTest extends AnyFunSuite with BeforeAndAfterAll with Sha expectedIsolatedNodeList should contain allElementsOf curIsolatedNodesList } + test("sampleWithReplacementUDF returns correct number of samples") { + // Use the already registered UDF from SGSPureSparkV1Task + val sampleWithReplacementUDF = sgsTask.sampleWithReplacementUDF + + // Create a DataFrame with sample data + val data = Seq( + (Seq(1, 2, 3, 4, 5), 10), + (Seq(6, 7, 8, 9, 10), 2), + (Seq.empty[Int], 3), + (null, 3), + ).toDF("array", "numSamples") + + // Apply the UDF + val resultDF = + data.withColumn("samples", sampleWithReplacementUDF(F.col("array"), F.col("numSamples"))) + + // Collect the results + val results = resultDF.collect() + + // Write assertions + assert(results(0).getAs[Seq[Int]]("samples").length == 10) + assert(results(1).getAs[Seq[Int]]("samples").length == 2) + assert(results(2).getAs[Seq[Int]]("samples").isEmpty) + assert(results(3).getAs[Seq[Int]]("samples").isEmpty) + } + } diff --git a/scala_spark35/README.md b/scala_spark35/README.md index 017ccf6..776dbdf 100644 --- a/scala_spark35/README.md +++ b/scala_spark35/README.md @@ -2,37 +2,42 @@ ### Building and Testing the project -Run `make install_deps` first if you haven't to install scala & spark. -cd inside the ```scala_spark35``` directory. -To compile all projects in the repo and generate the jar files, run: +Run `make install_deps` first if you haven't to install scala & spark. cd inside the `scala_spark35` directory. To +compile all projects in the repo and generate the jar files, run: + ``` sbt assembly ``` To assemble a specific project: + ``` sbt subgraph_sampler/assembly ``` To run all tests: + ``` sbt test ``` Similarly to run a specific test suite: + ``` sbt subgraph_sampler/test ``` To clean up all target files: + ``` make clean_build_files ``` ### Running spark jobs locally -Please check the Makefile for commands to run the spark jobs locally. -The jobs makes use of the mocked assets from the directory +Please check the Makefile for commands to run the spark jobs locally. The jobs makes use of the mocked assets from the +directory + ``` common/src/test/assets ``` @@ -40,22 +45,28 @@ common/src/test/assets #### Set log level To silence the worker logs -1. Create log4j.properties file from template, under `/scala` dir, do `cp ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties.template ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties` -2. Update the first line in `log4j.properties` to `log4j.rootCategory=WARN, console` +1. Create log4j.properties file from template, under `/scala` dir, do + `cp ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties.template ../tools/scala/spark-3.1.3-bin-hadoop3.2/conf/log4j.properties` +1. Update the first line in `log4j.properties` to `log4j.rootCategory=WARN, console` Note: Mocked assets are generated using the dataset asset mocking suite (in `python/gigl/src/mocking/`) ### How to build and deploy spark-tfrecord package used in the Spark Jobs -Note: remember to have local deps for developing installed by running `make install_deps`. See main README.md for more details. -We make use of the Spark [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) Connector provided by the Linkedin repo [linkedin/spark-tfrecord](https://github.com/linkedin/spark-tfrecord). We deploy and maintain our own copies off the jar since not all sbt/scala vers are available on Maven Central, etc. +Note: remember to have local deps for developing installed by running `make install_deps`. See main README.md for more +details. + +We make use of the Spark [TFRecord](https://www.tensorflow.org/tutorials/load_data/tfrecord) Connector provided by the +Linkedin repo [linkedin/spark-tfrecord](https://github.com/linkedin/spark-tfrecord). We deploy and maintain our own +copies off the jar since not all sbt/scala vers are available on Maven Central, etc. To build: -First clone the repo, then cd into directory. +First clone the repo, then cd into directory. Install maven if not already installed: + ``` Linux: sudo apt-get install maven @@ -63,26 +74,27 @@ sudo apt-get install maven OSX: brew install maven ``` + Build with maven (specific scala and spark versions can be found in `build.sbt` file in our repo) + ``` mvn -Pscala-2.12 clean install -Dspark.version=3.2.0 ``` Copy to GCS and deploy: + ``` gsutil cp target/spark-tfrecord_2.12-0.5.0.jar gs://$YOUR_BUCKET/your/path/to/snap-spark-custom-tfrecord_2.12-{version_number}.jar ``` Note: Snap currently hosts these in the `public-gigl` GCS bucket. - ## FAQ / Common Issues -1. `NoSuchMethodError` or `NoClassDefFoundError` or `java.lang.IllegalStateException: You are currently running with version x of some-google-lib` +1. `NoSuchMethodError` or `NoClassDefFoundError` or + `java.lang.IllegalStateException: You are currently running with version x of some-google-lib` -We need make sure that we have compatible versions of Cloud Client Libraries. -To do this we use the versions specified in the Google Cloud Libraries Bill of Materials (BOM). -The libraries in the BOM don't have dependency conflicts that would manifest as -To do this, follow instructions in `project/match_bom.py`. -You will have to run the script with the relevant instructions provided inside the file to ensure google packages -do not conflict. +We need make sure that we have compatible versions of Cloud Client Libraries. To do this we use the versions specified +in the Google Cloud Libraries Bill of Materials (BOM). The libraries in the BOM don't have dependency conflicts that +would manifest as To do this, follow instructions in `project/match_bom.py`. You will have to run the script with the +relevant instructions provided inside the file to ensure google packages do not conflict. diff --git a/scala_spark35/build.sbt b/scala_spark35/build.sbt index b9434f9..9a02830 100644 --- a/scala_spark35/build.sbt +++ b/scala_spark35/build.sbt @@ -54,6 +54,7 @@ lazy val dependencies = // Testing val scalatest = "org.scalatest" %% "scalatest" % "3.2.11" % Test // Not included in fat jar during compile time due to dependency issues; injected through spark-submit at runtime + // TODO: (svij-sc) Find a common place to pull this jar uri from // The jar file is built using Snap's fork of the Linkedin TfRecord Spark Connector. val tfRecordConnector = "com.linkedin.sparktfrecord" % "spark-tfrecord_2.12" % "0.6.1" % Test from SPARK_35_TFRECORD_JAR_GCS_PATH diff --git a/scala_spark35/common/src/main/scala/graphdb/nebula/NebulaQueryResponseTranslator.scala b/scala_spark35/common/src/main/scala/graphdb/nebula/NebulaQueryResponseTranslator.scala index 7d2e49d..6173126 100644 --- a/scala_spark35/common/src/main/scala/graphdb/nebula/NebulaQueryResponseTranslator.scala +++ b/scala_spark35/common/src/main/scala/graphdb/nebula/NebulaQueryResponseTranslator.scala @@ -68,7 +68,7 @@ class NebulaQueryResponseTranslator( LIMIT [${numNodesToSample}] """ } - // TODO: (svij-sc) This is not TRUE RandomWeighted sampling. + // TODO: (svij) This is not TRUE RandomWeighted sampling. // Consider this a placeholder that tries to do a "similar computation" // Work needs to be done to implement true RandomWeighted sampling. case SamplingOp.SamplingMethod.RandomWeighted(value) => { diff --git a/scala_spark35/common/src/test/assets/resource_config.yaml b/scala_spark35/common/src/test/assets/resource_config.yaml index bd72115..1a795ef 100644 --- a/scala_spark35/common/src/test/assets/resource_config.yaml +++ b/scala_spark35/common/src/test/assets/resource_config.yaml @@ -8,9 +8,9 @@ shared_resource_config: temp_assets_bucket: "gs://gigl-cicd-temp" temp_regional_assets_bucket: "gs://gigl-cicd-temp" perm_assets_bucket: "gs://gigl-cicd-temp" # For testing, we don't persist models, configs, etc. - they should expire w/ TTL - temp_assets_bq_dataset_name: "TEST ASSET PLACEHOLDER" - embedding_bq_dataset_name: "TEST ASSET PLACEHOLDER" - gcp_service_account_email: "untrusted-external-github-gigl@external-snap-ci-github-gigl.iam.gserviceaccount.com" + temp_assets_bq_dataset_name: "FAKE DATASET NAME 1" + embedding_bq_dataset_name: "FAKE DATASET NAME 2" + gcp_service_account_email: "FAKE SA" dataflow_runner: "DirectRunner" preprocessor_config: edge_preprocessor_config: diff --git a/scala_spark35/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml b/scala_spark35/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml index 4655a50..d34adb5 100755 --- a/scala_spark35/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml +++ b/scala_spark35/common/src/test/assets/subgraph_sampler/heterogeneous/node_anchor_based_link_prediction/frozen_gbml_config_graphdb_dblp_local.yaml @@ -4,8 +4,8 @@ datasetConfig: dataPreprocessorConfig: dataPreprocessorArgs: - bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_friend_user_edges - bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_nodes + bq_edges_table_name: "FAKE DATASET NAME 1" + bq_nodes_table_name: "FAKE DATASET NAME 2" dataPreprocessorConfigClsPath: this.is.non.existent.test.path.1 subgraphSamplerConfig: subgraphSamplingStrategy: diff --git a/scala_spark35/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml b/scala_spark35/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml index 6dd442f..6e9eddb 100644 --- a/scala_spark35/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml +++ b/scala_spark35/common/src/test/assets/subgraph_sampler/node_anchor_based_link_prediction/frozen_gbml_config.yaml @@ -1,8 +1,8 @@ datasetConfig: dataPreprocessorConfig: dataPreprocessorArgs: - bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_friend_user_edges - bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_node_anchor_user_nodes + bq_edges_table_name: "FAKE DATASET NAME 1" + bq_nodes_table_name: "FAKE DATASET NAME 2" dataPreprocessorConfigClsPath: this.is.non.existent.test.path.1 subgraphSamplerConfig: # numMaxTrainingSamplesToOutput: 10 diff --git a/scala_spark35/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml b/scala_spark35/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml index 47321dc..cb1587a 100644 --- a/scala_spark35/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml +++ b/scala_spark35/common/src/test/assets/subgraph_sampler/supervised_node_classification/frozen_gbml_config.yaml @@ -12,8 +12,8 @@ taskMetadata: datasetConfig: dataPreprocessorConfig: dataPreprocessorArgs: - bq_edges_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_supervised_node_classification_user_friend_user_edges - bq_nodes_table_name: external-snap-ci-github-gigl.gbml_mocked_assets.toy_graph_homogeneous_supervised_node_classification_user_nodes + bq_edges_table_name: external-snap-ci-github-gigl.public_gigl.toy_graph_homogeneous_supervised_node_classification_user_friend_user_edges + bq_nodes_table_name: external-snap-ci-github-gigl.public_gigl.toy_graph_homogeneous_supervised_node_classification_user_nodes dataPreprocessorConfigClsPath: this.is.non.existent.test.path.1 subgraphSamplerConfig: numHops: 2 diff --git a/scala_spark35/common/src/test/scala/types/pb_wrappers/GraphPbWrappersTest.scala b/scala_spark35/common/src/test/scala/types/pb_wrappers/GraphPbWrappersTest.scala index 3cf51aa..0234a82 100644 --- a/scala_spark35/common/src/test/scala/types/pb_wrappers/GraphPbWrappersTest.scala +++ b/scala_spark35/common/src/test/scala/types/pb_wrappers/GraphPbWrappersTest.scala @@ -2,7 +2,7 @@ import common.types.pb_wrappers.GraphPbWrappers import org.scalatest.funsuite.AnyFunSuite import snapchat.research.gbml.graph_schema.Graph -object GraphPbWrappersTest extends AnyFunSuite with PbWrappersTestGraphSetup { +class GraphPbWrappersTest extends AnyFunSuite with PbWrappersTestGraphSetup { test( "mergeGraphs - test Graph protos merged as expected with mergeGraphs", ) { diff --git a/scala_spark35/common/src/test/scala/types/pb_wrappers/NodeAnchorBasedLinkpredictionSamplePbWrapperTest.scala b/scala_spark35/common/src/test/scala/types/pb_wrappers/NodeAnchorBasedLinkpredictionSamplePbWrapperTest.scala index 72a503f..f059408 100644 --- a/scala_spark35/common/src/test/scala/types/pb_wrappers/NodeAnchorBasedLinkpredictionSamplePbWrapperTest.scala +++ b/scala_spark35/common/src/test/scala/types/pb_wrappers/NodeAnchorBasedLinkpredictionSamplePbWrapperTest.scala @@ -4,7 +4,7 @@ import snapchat.research.gbml.graph_schema.Graph import snapchat.research.gbml.training_samples_schema.NodeAnchorBasedLinkPredictionSample import snapchat.research.gbml.training_samples_schema.RootedNodeNeighborhood -object NodeAnchorBasedLinkpredictionSamplePbWrapperTest +class NodeAnchorBasedLinkpredictionSamplePbWrapperTest extends AnyFunSuite with PbWrappersTestGraphSetup { test( diff --git a/scala_spark35/common/src/test/scala/types/pb_wrappers/RootedNodeNeighborhoodPbWrapperTest.scala b/scala_spark35/common/src/test/scala/types/pb_wrappers/RootedNodeNeighborhoodPbWrapperTest.scala index 5d7cf2f..89d8cfc 100644 --- a/scala_spark35/common/src/test/scala/types/pb_wrappers/RootedNodeNeighborhoodPbWrapperTest.scala +++ b/scala_spark35/common/src/test/scala/types/pb_wrappers/RootedNodeNeighborhoodPbWrapperTest.scala @@ -3,7 +3,7 @@ import org.scalatest.funsuite.AnyFunSuite import snapchat.research.gbml.graph_schema.Graph import snapchat.research.gbml.training_samples_schema.RootedNodeNeighborhood -object RootedNodeNeighborhoodPbWrapperTest extends AnyFunSuite with PbWrappersTestGraphSetup { +class RootedNodeNeighborhoodPbWrapperTest extends AnyFunSuite with PbWrappersTestGraphSetup { test( "mergeRootedNodeNeighborhoods - test RootedNodeNeighborhood protos merge as expected with mergeRootedNodeNeighborhoods", ) { diff --git a/scala_spark35/split_generator/README.md b/scala_spark35/split_generator/README.md index 854a74b..e57be3b 100644 --- a/scala_spark35/split_generator/README.md +++ b/scala_spark35/split_generator/README.md @@ -1,44 +1,55 @@ ## Split Generator ---- + +______________________________________________________________________ + ### Steps: - 1. Read the assigner and split strategy classpaths from the config. - 2. Read the SGS output from GCS using the TFRecord Spark Connector (more details on that below) - 3. Coalesce the read dataframe. This is to increase the CPU utilization of the Spark job (no shuffle cost incurred) - 4. Cache this dataframe as we will require this to compute all the 3 (train/test/val) splits. If we do not cache this Spark tries to read the input from GCS 3 times. We can skip caching for inference only jobs as we only want to compute the test split. - 5. Convert the Array of Bytes Dataset to the proto Dataset. - 6. Using dataset.map() we map every sample to the output sample of the respective split using the SplitStrategy class(which does not take any Spark dependency). We do this for every split. - 7. The output dataset is converted back to a dataset of array of bytes which is written to GCS. + +``` +1. Read the assigner and split strategy classpaths from the config. +2. Read the SGS output from GCS using the TFRecord Spark Connector (more details on that below) +3. Coalesce the read dataframe. This is to increase the CPU utilization of the Spark job (no shuffle cost incurred) +4. Cache this dataframe as we will require this to compute all the 3 (train/test/val) splits. If we do not cache this Spark tries to read the input from GCS 3 times. We can skip caching for inference only jobs as we only want to compute the test split. +5. Convert the Array of Bytes Dataset to the proto Dataset. +6. Using dataset.map() we map every sample to the output sample of the respective split using the SplitStrategy class(which does not take any Spark dependency). We do this for every split. +7. The output dataset is converted back to a dataset of array of bytes which is written to GCS. +``` ### SplitGen Arguments: -* **train/test/val ratio** - Ratios of the respective split. We use the ```scala.util.hashing.MurmurHash3``` library to hash the edges/nodes to the splits. Murmur hash is independent of the machine it runs on and hence produces the same split irrespective of the underlying machine. -* **should_split_edges_symmetrically** +- **train/test/val ratio** - flag to indicate whether bidirectional edges should be assigned to the same split. That is if set to true a->b and b->a will belong to the same split. Typically set false for directed graphs. + Ratios of the respective split. We use the `scala.util.hashing.MurmurHash3` library to hash the edges/nodes to the + splits. Murmur hash is independent of the machine it runs on and hence produces the same split irrespective of the + underlying machine. -* **is_disjoint_mode** - flag to indicate if the training is done in disjoint mode or not. In non-disjoint mode, both supervision and message passing edges are treated the same for the train split. In disjoint mode, they are treated separately. +- **should_split_edges_symmetrically** -* **assignerClsPth** - class path to the assginer that is used for assigning the nodes/edges to a respective split. New assiners must inplement the abstract class ```Assigner``` + flag to indicate whether bidirectional edges should be assigned to the same split. That is if set to true a->b and + b->a will belong to the same split. Typically set false for directed graphs. -* **splitStrategyClsPth** - class path to the Split Strategy being used for the job. New implmentations must inplement the abstract class ```SplitStrategy``` - +- **is_disjoint_mode** flag to indicate if the training is done in disjoint mode or not. In non-disjoint mode, both + supervision and message passing edges are treated the same for the train split. In disjoint mode, they are treated + separately. +- **assignerClsPth** class path to the assginer that is used for assigning the nodes/edges to a respective split. New + assiners must inplement the abstract class `Assigner` + +- **splitStrategyClsPth** class path to the Split Strategy being used for the job. New implmentations must inplement the + abstract class `SplitStrategy` ### Implementation: -- Leverages Dataset.map() and uses custom Scala code to convert an input sample to 3 output samples for each train/test/val split. +- Leverages Dataset.map() and uses custom Scala code to convert an input sample to 3 output samples for each + train/test/val split. - Relies on caching and partition coalesce for performance optimization. - Can be run on mac, gCloud VMs and Dataproc VMs. (cmds are available in Makefile) - ## Spark Optimizations / Important points to consider -* The current spark plan only contains one Deserialization and Serialization step. It is important that we keep this behaviour as it directly affects the performance of the job. -* We use a coalesce factor to decrease the input partitions and in turn increase the CPU utilization. Currently this is hardcoded in the code to 12 (for main samples) and 4 (for rooted node neighborhood samples) as this was fastest. If the number of input partiotions is changed by SGS job, we can reconsider these numbers. - +- The current spark plan only contains one Deserialization and Serialization step. It is important that we keep this + behaviour as it directly affects the performance of the job. +- We use a coalesce factor to decrease the input partitions and in turn increase the CPU utilization. Currently this is + hardcoded in the code to 12 (for main samples) and 4 (for rooted node neighborhood samples) as this was fastest. If + the number of input partiotions is changed by SGS job, we can reconsider these numbers. diff --git a/scala_spark35/subgraph_sampler/README.md b/scala_spark35/subgraph_sampler/README.md index 2c4d3bc..a438460 100644 --- a/scala_spark35/subgraph_sampler/README.md +++ b/scala_spark35/subgraph_sampler/README.md @@ -1,77 +1,96 @@ ## Notes for developers + 1. [SGS as of now](#sgs) -2. [Scalability](#scalability) -3. [Resources](#resources) +1. [Scalability](#scalability) +1. [Resources](#resources) + +## SGS -SGS ---- ### Set log level To silence the worker logs -1. Create log4j.properties file from template, under `/scala_spark35` dir, do `cp ../tools/scala/spark-3.5.0-bin-hadoop3/conf/log4j2.properties.template ../tools/scala/spark-3.5.0-bin-hadoop3/conf/log4j2.properties` -2. Update the first line in `log4j.properties` to `rootLogger.level = WARN -rootLogger.appenderRef.stdout.ref = console` + +1. Create log4j.properties file from template, under `/scala_spark35` dir, do + `cp ../tools/scala/spark-3.5.0-bin-hadoop3/conf/log4j2.properties.template ../tools/scala/spark-3.5.0-bin-hadoop3/conf/log4j2.properties` +1. Update the first line in `log4j.properties` to `rootLogger.level = WARN rootLogger.appenderRef.stdout.ref = console` ### Steps: - 1. Load node/edge TFRecord resources into Spark DataFrame - 2. Sample 1hop neighbors (only node ids) - 3. Sample 2hop neighbors (only node ids) - 4. Hydrate kth hop neighbors with both node and edge features/type - 5. Merge 1hop and 2hop hydrated neighbors - 6. Merge hydrated root node to step 5, and create subgraphDF - 7. Append isolated nodes (if any) to subgraphDF [RootedNodeNeighnorhood] - 8. Add task-relevent samples to subgraphDF (such as positive node neighborshoods or node labels) to create trainingSubgraphDF - 9. (If specified) append isolated nodes to trainingSubgraphDF [SupervisedNodeClassificationSample,NodeAnchorBasedLinkPredictionSample ] - 10. Modify subgraphDF and trainingSubgraphDF schema to compatible structure as defined in `training_samples_schema.proto`. - 11. Convert DataFrames from step 10 to DataSet and map DataSet rows to ByteArray - 12. Write serialized DataSet to TFRecord + +``` +1. Load node/edge TFRecord resources into Spark DataFrame +2. Sample 1hop neighbors (only node ids) +3. Sample 2hop neighbors (only node ids) +4. Hydrate kth hop neighbors with both node and edge features/type +5. Merge 1hop and 2hop hydrated neighbors +6. Merge hydrated root node to step 5, and create subgraphDF +7. Append isolated nodes (if any) to subgraphDF [RootedNodeNeighnorhood] +8. Add task-relevent samples to subgraphDF (such as positive node neighborshoods or node labels) to create trainingSubgraphDF +9. (If specified) append isolated nodes to trainingSubgraphDF [SupervisedNodeClassificationSample,NodeAnchorBasedLinkPredictionSample ] +10. Modify subgraphDF and trainingSubgraphDF schema to compatible structure as defined in `training_samples_schema.proto`. +11. Convert DataFrames from step 10 to DataSet and map DataSet rows to ByteArray +12. Write serialized DataSet to TFRecord +``` ### Properties: -* **Isolated nodes** - are always included in RootedNodeNeighnorhood samples. They can be included in training samples if the assigned flag for isolated nodes is set to true. -* **Self loops** +- **Isolated nodes** - are removed in preprocessor. They can be added in trainer if users wish to, but SGS is not concerned with self loops. + are always included in RootedNodeNeighnorhood samples. They can be included in training samples if the assigned flag + for isolated nodes is set to true. -* **Num hops** +- **Self loops** - is hard-coded to k=2. But the code is extendable to k>2, only Spark optimizations are bottle-neck since as the number of hops grow neighborhood size grows exponentially. + are removed in preprocessor. They can be added in trainer if users wish to, but SGS is not concerned with self loops. -* **Graph type** +- **Num hops** - only supports homogeneous graphs. + is hard-coded to k=2. But the code is extendable to k>2, only Spark optimizations are bottle-neck since as the number + of hops grow neighborhood size grows exponentially. - supports both undirected and directed graphs. +- **Graph type** -* **Neighborhood sampling** + only supports homogeneous graphs. - uniform sampling. + supports both undirected and directed graphs. - - note that there are two implementations for uniform sampling: - 1. non-deterministic (using built-in Spark functions), which is the default mode of sampling in SGS - 2. deterministic (using hash based permutation). To enable it, set - ``` - subgraphSamplerConfigs: - experimetalFlags: - permutation_strategy: deterministic - ``` - for more info on why there are two implementations, check [this](https://docs.google.com/document/d/1TeJYu9bVFu463Pjfsv7UFtBxu9KZBH8eAH_SyV8vkAM/edit) doc +- **Neighborhood sampling** -* **numTrainingSamples** + uniform sampling. - If users wish to downsample number of nodes used for training (not in inferencer), they can adjut this number as below + - note that there are two implementations for uniform sampling: + 1. non-deterministic (using built-in Spark functions), which is the default mode of sampling in SGS + 1. deterministic (using hash based permutation). To enable it, set ``` subgraphSamplerConfigs: - numTrainingSamples : 10000000 + experimetalFlags: + permutation_strategy: deterministic ``` - NOTE that for NodeAnchorBasedLinkPrediction samples the value set for numTrainingSamples will be further reduced in the next component i.e. Split Generator by a factor of `train_split`. Because if in a train split for an anchor node if at least a positve edge does not exit, that anchor node is dropped from train split. + for more info on why there are two implementations, check + [this](https://docs.google.com/document/d/1TeJYu9bVFu463Pjfsv7UFtBxu9KZBH8eAH_SyV8vkAM/edit) doc + +- **numTrainingSamples** -* **Valid Samples** (for NodeAnchorBasedLinkPrediction Task) - 1. In random_negative_rooted_neighborhood_samples: every node which appears in the graph should get an embedding. Hence, they must appear in the rooted neighborhoods for inferencer to use, regardless of what their (in-)neighborhood looks like. + If users wish to downsample number of nodes used for training (not in inferencer), they can adjut this number as below - 2. In node_anchor_based_link_prediction_samples: every node which has any outgoing edge could be a valid training sample, since in practice we will want to have our trained model robustly perform well at ranking the positive edge above negative edges, regardless of what their (in-)neighborhood looks like. + ``` + subgraphSamplerConfigs: + numTrainingSamples : 10000000 + ``` + + NOTE that for NodeAnchorBasedLinkPrediction samples the value set for numTrainingSamples will be further reduced in + the next component i.e. Split Generator by a factor of `train_split`. Because if in a train split for an anchor node + if at least a positve edge does not exit, that anchor node is dropped from train split. + +- **Valid Samples** (for NodeAnchorBasedLinkPrediction Task) + + 1. In random_negative_rooted_neighborhood_samples: every node which appears in the graph should get an embedding. + Hence, they must appear in the rooted neighborhoods for inferencer to use, regardless of what their + (in-)neighborhood looks like. + + 1. In node_anchor_based_link_prediction_samples: every node which has any outgoing edge could be a valid training + sample, since in practice we will want to have our trained model robustly perform well at ranking the positive edge + above negative edges, regardless of what their (in-)neighborhood looks like. ### Implementation: @@ -83,48 +102,70 @@ rootLogger.appenderRef.stdout.ref = console` - Can be run on mac, gCloud VMs and Dataproc VMs. (cmds are available in Makefile) - ## Scalability ---- + +______________________________________________________________________ ### Performance -Time: It is not expected to have SGS task running more than 3hrs. If that's happening the cluster size is too small, there are spills during some stages or caching steps implemented in code are not taking into effect. + +Time: It is not expected to have SGS task running more than 3hrs. If that's happening the cluster size is too small, +there are spills during some stages or caching steps implemented in code are not taking into effect. Cost: See Google Cloud [pricing calculator](https://cloud.google.com/products/calculator/#id=) ### Load + As any of below factor increases we should think of strategies to scale the SGS job: + 1. Graph size (number of nodes and edges) -2. Number of neighborhood samples and Number of Positive Samples (if any) -3. Node feature Dim -4. Edge feature Dim -5. Number of hops +1. Number of neighborhood samples and Number of Positive Samples (if any) +1. Node feature Dim +1. Edge feature Dim +1. Number of hops ### Spark Optimization/Scaling strategies -* The easiest way is to scale the cluster horizontally. As of now the number of data partitions (i.e. spark sql partitions), is proportional to cluster size, such that cpu usage is at max (>90%) to avoid waste of resources. Scaling out number of workers or changing the machine types should be done carefully. If we pick cluster that is too big for the task it will be to the job's disadvantage. (unnecessary large number of data partitions leads to slow reads, large network communications and long shuffle time) -* Use Local SSDs (NVMe interface) attached to each worker to store cached data. Do not cache data into working memory, since it intensifies spills. For final stages of SGS job (pointed out in the code), spills are inevitable but the spills are into SSDs and hence won't hurt performance badly). -* Each SGS task firstly creates a SubgraphDF (which includes all hydrated neighborhoods for all root nodes). This SubgraphDF must be cached and cache is NOT triggered unless the RootedNodeNeighborhood samples are written. Then downstream tasks do not repeat neighborhood sampling/hydration and in turn use cached data which saves at least one hr as of now for MAU data. -* Avoid using UDFs, instead maximize leveraging spark.sql [functions](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html). -* There are stages that require repartitioning as optimization strategy. That is because the size of data in each partition grows (we see spills) and by repartitioning we reduce the size of data in each partition. You can find stages that need repartition from Spark UI. -* Do NOT change YARN default parameters for Dataproc Cluster. -* If cost become a bottleneck, (and despite trying above strategies we still need to scale) Autoscaling Dataproc with customized autoscaling policy should be a potential solution. +- The easiest way is to scale the cluster horizontally. As of now the number of data partitions (i.e. spark sql + partitions), is proportional to cluster size, such that cpu usage is at max (>90%) to avoid waste of resources. + Scaling out number of workers or changing the machine types should be done carefully. If we pick cluster that is too + big for the task it will be to the job's disadvantage. (unnecessary large number of data partitions leads to slow + reads, large network communications and long shuffle time) +- Use Local SSDs (NVMe interface) attached to each worker to store cached data. Do not cache data into working memory, + since it intensifies spills. For final stages of SGS job (pointed out in the code), spills are inevitable but the + spills are into SSDs and hence won't hurt performance badly). +- Each SGS task firstly creates a SubgraphDF (which includes all hydrated neighborhoods for all root nodes). This + SubgraphDF must be cached and cache is NOT triggered unless the RootedNodeNeighborhood samples are written. Then + downstream tasks do not repeat neighborhood sampling/hydration and in turn use cached data which saves at least one hr + as of now for MAU data. +- Avoid using UDFs, instead maximize leveraging spark.sql + [functions](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html). +- There are stages that require repartitioning as optimization strategy. That is because the size of data in each + partition grows (we see spills) and by repartitioning we reduce the size of data in each partition. You can find + stages that need repartition from Spark UI. +- Do NOT change YARN default parameters for Dataproc Cluster. +- If cost become a bottleneck, (and despite trying above strategies we still need to scale) Autoscaling Dataproc with + customized autoscaling policy should be a potential solution. ## Resources ---- + +______________________________________________________________________ + Naming Conventions -* [Scala](https://docs.scala-lang.org/style/naming-conventions.html) +- [Scala](https://docs.scala-lang.org/style/naming-conventions.html) -* [Spark](https://github.com/databricks/scala-style-guide) +- [Spark](https://github.com/databricks/scala-style-guide) Spark -* [SQL](https://spark.apache.org/docs/latest/sql-ref-functions.html) -* [DataFrame](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html) + +- [SQL](https://spark.apache.org/docs/latest/sql-ref-functions.html) +- [DataFrame](https://spark.apache.org/docs/latest/api/scala/org/apache/spark/sql/functions$.html) Protobuf and Spark -* [ScalaPB](https://scalapb.github.io/docs/sparksql/) -Optimization -* The version of Spark matters. -* https://medium.com/@vrba.dave +- [ScalaPB](https://scalapb.github.io/docs/sparksql/) + +Optimization + +- The version of Spark matters. +- https://medium.com/@vrba.dave diff --git a/scala_spark35/subgraph_sampler/src/test/scala/libs/utils/SGSTaskTest.scala b/scala_spark35/subgraph_sampler/src/test/scala/libs/utils/SGSTaskTest.scala index dce90cb..3ad4ffa 100644 --- a/scala_spark35/subgraph_sampler/src/test/scala/libs/utils/SGSTaskTest.scala +++ b/scala_spark35/subgraph_sampler/src/test/scala/libs/utils/SGSTaskTest.scala @@ -18,7 +18,7 @@ import snapchat.research.gbml.graph_schema.Graph import snapchat.research.gbml.graph_schema.Node import snapchat.research.gbml.training_samples_schema.RootedNodeNeighborhood -object SGSTaskTest extends AnyFunSuite with BeforeAndAfterAll with SharedSparkSession { +class SGSTaskTest extends AnyFunSuite with BeforeAndAfterAll with SharedSparkSession { var gbmlConfigWrapper: GbmlConfigPbWrapper = _ var graphMetadataPbWrapper: GraphMetadataPbWrapper = _ diff --git a/scripts/assert_yaml_configs_parse.py b/scripts/assert_yaml_configs_parse.py index fd22a36..40b2d49 100644 --- a/scripts/assert_yaml_configs_parse.py +++ b/scripts/assert_yaml_configs_parse.py @@ -3,6 +3,7 @@ Note that this does not check the *contents* of the fields set, e.g. `python_class_path: not a valid path` will not be caught. This script does a subset of what config_validator does, but is faster and can be used locally. +You may also put "# yaml-check: disable" at the top of a YAML file to ignore it. Usage: python assert_yaml_configs_parse.py --directories ... [--ignore_regex ...] @@ -15,6 +16,8 @@ The script recursively searches through the specified directories for YAML files. It attempts to parse each YAML file as either a GiglResourceConfig or GbmlConfig based on the filename. If a file cannot be parsed, it logs the error and reports all invalid files at the end. + If any of the ignore_regex matches the file path, or the first line of the file starts with "# yaml-check: disable", + the file will be skipped. Examples: To check all YAML files in the 'configs' directory: @@ -37,6 +40,8 @@ logger = Logger() +_IGNORE_COMMENT = "# yaml-check: disable" + def assert_configs_parse(directories: List[str], ignore_regex: List[str] = []) -> None: proto_utils = ProtoUtils() @@ -54,6 +59,12 @@ def assert_configs_parse(directories: List[str], ignore_regex: List[str] = []) - and not any(r.match(file_path) for r in ignore) and ("resource_config" in file or "task_config" in file) ): + with open(file_path, "r") as f: + if f.readline().strip().startswith(_IGNORE_COMMENT): + logger.info( + f"Ignored {file_path} due to the '{_IGNORE_COMMENT}' header." + ) + continue total += 1 yaml_file = UriFactory.create_uri(file_path) try: diff --git a/scripts/build_and_push_docker_image.py b/scripts/build_and_push_docker_image.py index 0e80d2a..30a43de 100644 --- a/scripts/build_and_push_docker_image.py +++ b/scripts/build_and_push_docker_image.py @@ -5,13 +5,12 @@ from pathlib import Path from typing import Optional -from gigl.common.logger import Logger - from gigl.common.constants import ( DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG, DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG, DOCKER_LATEST_BASE_DATAFLOW_IMAGE_NAME_WITH_TAG, ) +from gigl.common.logger import Logger logger = Logger() diff --git a/scripts/bump_version.py b/scripts/bump_version.py index a587dbd..a701cc5 100644 --- a/scripts/bump_version.py +++ b/scripts/bump_version.py @@ -2,15 +2,15 @@ import re from typing import Optional -from gigl.env.pipelines_config import get_resource_config - -from .build_and_push_docker_image import build_and_push_image from gigl.common.constants import ( DOCKER_LATEST_BASE_CPU_IMAGE_NAME_WITH_TAG, DOCKER_LATEST_BASE_CUDA_IMAGE_NAME_WITH_TAG, GIGL_ROOT_DIR, PATH_GIGL_PKG_INIT_FILE, ) +from gigl.env.pipelines_config import get_resource_config + +from .build_and_push_docker_image import build_and_push_image def get_current_version(filename: str) -> Optional[str]: