diff --git a/.github/ISSUE_TEMPLATE/bug-report.md b/.github/ISSUE_TEMPLATE/bug-report.md new file mode 100644 index 0000000..e49a80a --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug-report.md @@ -0,0 +1,15 @@ +--- +name: Bug Report +about: Report a bug encountered while operating Ianvs +labels: kind/bug + +--- + + +**What happened**: + +**What you expected to happen**: + +**How to reproduce it (as minimally and precisely as possible)**: + +**Anything else we need to know?**: \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/cleanup.md b/.github/ISSUE_TEMPLATE/cleanup.md new file mode 100644 index 0000000..920f318 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/cleanup.md @@ -0,0 +1,13 @@ +--- +name: Cleanup +about: Cleaning up code, process, or technical debt, etc. +labels: kind/cleanup + +--- + + + +**What should be cleaned up or changed**: + +**Why is this needed**: + diff --git a/.github/ISSUE_TEMPLATE/documentation.md b/.github/ISSUE_TEMPLATE/documentation.md new file mode 100644 index 0000000..abce38c --- /dev/null +++ b/.github/ISSUE_TEMPLATE/documentation.md @@ -0,0 +1,12 @@ +--- +name: Documentation Request +about: Suggest what should be documented in Ianvs +labels: kind/documentation + +--- + + + +**What would you like to be documented**: + +**Why is this needed**: diff --git a/.github/ISSUE_TEMPLATE/enhancement.md b/.github/ISSUE_TEMPLATE/enhancement.md new file mode 100644 index 0000000..a499b01 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/enhancement.md @@ -0,0 +1,11 @@ +--- +name: Enhancement Request +about: Suggest an enhancement to the Ianvs project +labels: kind/feature + +--- + + +**What would you like to be added/modified**: + +**Why is this needed**: diff --git a/.github/ISSUE_TEMPLATE/question.md b/.github/ISSUE_TEMPLATE/question.md new file mode 100644 index 0000000..3c4c977 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/question.md @@ -0,0 +1,10 @@ +--- +name: Question +about: Ask a question about using Ianvs +labels: kind/question + +--- + + + + diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..39bdeba --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,33 @@ + + +**What type of PR is this?** + + + + +**What this PR does / why we need it**: + +**Which issue(s) this PR fixes**: + +Fixes # diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 0000000..7fcc29c --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,67 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ main ] + pull_request: + # The branches below must be a subset of the branches above + branches: [ main ] + schedule: + - cron: '31 15 * * 2' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ] + # Learn more: + # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v1 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # queries: ./path/to/local/query, your-org/your-repo/queries@main + + # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v1 + + # ℹ️ Command-line programs to run using the OS shell. + # 📚 https://git.io/JvXDl + + # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines + # and modify them (or add more) to build your code if your project + # uses a compiled language + + #- run: | + # make bootstrap + # make release + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v1 \ No newline at end of file diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml new file mode 100644 index 0000000..3eb20d0 --- /dev/null +++ b/.github/workflows/main.yaml @@ -0,0 +1,33 @@ +name: CI + +on: + push: + branches: + - main + tags: + pull_request: + +jobs: + pylint: + runs-on: ubuntu-latest + name: pylint + strategy: + matrix: + python-version: [ "3.6", "3.7", "3.8", "3.9" ] + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install libgl1-mesa-glx -y + python -m pip install --upgrade pip + python -m pip install pylint + python -m pip install ${{github.workspace}}/examples/resources/third_party/* + python -m pip install -r ${{github.workspace}}/requirements.txt + - name: Analysing code of core with pylint + run: | + pylint '${{github.workspace}}/core' diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5e2ef7f --- /dev/null +++ b/.gitignore @@ -0,0 +1,133 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# .vscode +.vscode/ +.vscode/settings.json + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md new file mode 100644 index 0000000..84a3f18 --- /dev/null +++ b/CODE_OF_CONDUCT.md @@ -0,0 +1,5 @@ +# KubeEdge Community Code of Conduct + +KubeEdge follows the [CNCF Code of Conduct](https://github.com/cncf/foundation/blob/master/code-of-conduct.md). + +Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at kubeedge@gmail.com. diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..03b64b6 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,7 @@ +# Contributing Guidelines + +Welcome to Ianvs. We are excited about the prospect of you joining our [community](https://github.com/kubeedge/community)! The KubeEdge community abides by the CNCF [code of conduct](CODE-OF-CONDUCT.md). Here is an excerpt: + +_As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities._ + +To learn more about contributing to the [Ianvs code repo](README.md), check out the [contributing guide](docs/guides). diff --git a/OWNERS b/OWNERS new file mode 100644 index 0000000..519a9d4 --- /dev/null +++ b/OWNERS @@ -0,0 +1,9 @@ +approvers: + - MooreZheng + - JimmyYang20 + +reviewers: + - MooreZheng + - JimmyYang20 + - jaypume + - back1860 diff --git a/README.md b/README.md new file mode 100644 index 0000000..b50b07d --- /dev/null +++ b/README.md @@ -0,0 +1,106 @@ +This repository has been transferred to [kubeedge/ianvs](https://github.com/kubeedge/ianvs) on 2022-07-14. + +# Ianvs +[![CI](https://github.com/kubeedge-sedna/ianvs/workflows/CI/badge.svg?branch=v0.1)](https://github.com/kubeedge-sedna/ianvs/actions) +[![LICENSE SCAN](https://app.fossa.com/api/projects/git%2Bgithub.com%2Fkubeedge-sedna%2Fianvs.svg?type=shield)](https://app.fossa.com/projects/git%2Bgithub.com%2Fkubeedge-sedna%2Fianvs?ref=badge_shield) +[![LICENSE](https://img.shields.io/github/license/kubeedge-sedna/ianvs.svg)](/LICENSE) + +Ianvs is a distributed synergy AI benchmarking project incubated in KubeEdge SIG AI. Ianvs aims to test the performance of distributed synergy AI solutions following recognized standards, in order to facilitate more efficient and effective development. More detailedly, Ianvs prepares not only test cases with datasets and corresponding algorithms, but also benchmarking tools including simulation and hyper-parameter searching. Ianvs also revealing best practices for developers and end users with presentation tools including leaderboards and test reports. + +## Scope +The distributed synergy AI benchmarking Ianvs aims to test the performance of distributed synergy AI solutions following recognized standards, in order to facilitate more efficient and effective development. + +The scope of Ianvs includes +- Providing end-to-end benchmark toolkits across devices, edge nodes and cloud nodes based on typical distributed-synergy AI paradigms and applications. + - Tools to manage test environment. For example, it would be necessary to support the CRUD (Create, Read, Update and Delete) actions in test environments. Elements of such test environments include algorithm-wise and system-wise configuration. + - Tools to control test cases. Typical examples include paradigm templates, simulation tools, and hyper-parameter-based assistant tools. + - Tools to manage benchmark presentation, e.g., leaderboard and test report generation. +- Cooperation with other organizations or communities, e.g., in KubeEdge SIG AI, to establish comprehensive benchmarks and developed related applications, which can include but are not limited to + - Dataset collection, re-organization, and publication + - Formalized specifications, e.g., standards + - Holding competitions or coding events, e.g., open source promotion plan + - Maintaining solution leaderboards or certifications for commercial usage + + +## Architecture +The architectures and related concepts are shown in the below figure. The ianvs is designed to run within a single node. Critical components include +- Test Environment Manager: the CRUD of test environments serving for global usage +- Test Case Controller: control the runtime behavior of test cases like instance generation and vanish + - Generation Assistant: assist users to generate test cases based on certain rules or constraints, e.g., the range of parameters + - Simulation Controller: control the simulation process of edge-cloud synergy AI, including the instance generation and vanishment of simulation containers +- Story Manager: the output management and presentation of the test case, e.g., leaderboards + + +![](docs/guides/images/ianvs_arch.png) + +More details on Ianvs components: +1. Test-Environment Manager supports the CRUD of Test environments, which basically includes + - Algorithm-wise configuration + - Public datasets + - Pre-processing algorithms + - Feature engineering algorithms + - Post-processing algorithms like metric computation + - System-wise configuration + - Overall architecture + - System constraints or budgets + - End-to-end cross-node + - Per node +1. Test-case Controller, which includes but is not limited to the following components + - Templates of common distributed-synergy-AI paradigms, which can help the developer to prepare their test case without too much effort. Such paradigms include edge-cloud synergy joint inference, incremental learning, federated learning, and lifelong learning. + - Simulation tools. Develop simulated test environments for test cases + - Other tools to assist test-case generation. For instance, prepare test cases based on a given range of hyper-parameters. +1. Story Manager, which includes but is not limited to the following components + - Leaderboard generation + - Test report generation + + +## Guides + +### Documents + +Documentation is located on [readthedoc.io](https://ianvs.readthedocs.io/). These documents can help you understand Ianvs better. + + +### Installation +Follow the [Ianvs installation document](docs/guides/how-to-install-ianvs.md) to install Ianvs. + +### Examples +Scenario PCB-AoI:[Industrial Defect Detection on the PCB-AoI Dataset](/examples/pcb-aoi/README.md). +Example PCB-AoI-1:[Testing single task learning in industrial defect detection](/docs/proposals/test-reports/testing-single-task-learning-in-industrial-defect-detection-with-pcb-aoi.md). +Example PCB-AoI-2:[Testing incremental learning in industrial defect detection](/docs/proposals/test-reports/testing-incremental-learning-in-industrial-defect-detection-with-pcb-aoi.md). + + +## Roadmap + +* [2022 H2 Roadmap](docs/roadmap.md) + +## Meeting + +Regular Community Meeting for KubeEdge SIG AI: +- Europe Time: **Thursdays at 16:30-17:30 Beijing Time** (biweekly, starting from Feb. 2022). +([Convert to your timezone.](https://www.thetimezoneconverter.com/?t=16%3A30&tz=GMT%2B8&)) +- Pacific Time: **Thursdays at 10:00-11:00 Beijing Time** (biweekly, starting from Feb. 2022). +([Convert to your timezone.](https://www.thetimezoneconverter.com/?t=10%3A00&tz=GMT%2B8&)) + +Resources: +- [Meeting notes and agenda](https://docs.google.com/document/d/12n3kGUWTkAH4q2Wv5iCVGPTA_KRWav_eakbFrF9iAww/edit) +- [Meeting recordings](https://www.youtube.com/playlist?list=PLQtlO1kVWGXkRGkjSrLGEPJODoPb8s5FM) +- [Meeting link](https://zoom.us/j/4167237304) +- [Meeting Calendar](https://calendar.google.com/calendar/u/0/r?cid=Y19nODluOXAwOG05MzFiYWM3NmZsajgwZzEwOEBncm91cC5jYWxlbmRhci5nb29nbGUuY29t) | [Subscribe](https://calendar.google.com/calendar/u/0/r?cid=OHJqazhvNTE2dmZ0ZTIxcWlidmxhZTNsajRAZ3JvdXAuY2FsZW5kYXIuZ29vZ2xlLmNvbQ) + +## Contact + + + +If you have questions, feel free to reach out to us in the following ways: +- [slack channel](https://app.slack.com/client/TDZ5TGXQW/C01EG84REVB/details) + +## Contributing + +If you're interested in being a contributor and want to get involved in developing the Ianvs code, please see [CONTRIBUTING](CONTRIBUTING.md) for details on submitting patches and the contribution workflow. + +## License + +Ianvs is under the Apache 2.0 license. See the [LICENSE](LICENSE) file for details. diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..782f1db --- /dev/null +++ b/core/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .__version__ import __version__ diff --git a/core/__version__.py b/core/__version__.py new file mode 100644 index 0000000..9ea5051 --- /dev/null +++ b/core/__version__.py @@ -0,0 +1,22 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ianvs version information""" + +# follow Semantic Versioning (https://semver.org/) +_MAJOR_V = '0' +_MINOR_V = '1' +_PATCH_V = '0' + +__version__ = '.'.join([_MAJOR_V, _MINOR_V, _PATCH_V]) diff --git a/core/cmd/__init__.py b/core/cmd/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/cmd/benchmarking.py b/core/cmd/benchmarking.py new file mode 100644 index 0000000..5d44451 --- /dev/null +++ b/core/cmd/benchmarking.py @@ -0,0 +1,69 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""main""" + +import sys +import argparse + +from core.common.log import LOGGER +from core.common import utils +from core.cmd.obj import BenchmarkingJob +from core.__version__ import __version__ + + +def main(): + """ main command-line interface to ianvs""" + try: + parser = _generate_parser() + args = parser.parse_args() + config_file = args.benchmarking_config_file + if not utils.is_local_file(config_file): + raise SystemExit(f"not found benchmarking config({config_file}) file in local") + + config = utils.yaml2dict(args.benchmarking_config_file) + job = BenchmarkingJob(config[str.lower(BenchmarkingJob.__name__)]) + job.run() + + LOGGER.info("benchmarkingjob runs successfully.") + except Exception as err: + raise Exception(f"benchmarkingjob runs failed, error: {err}.") from err + + +def _generate_parser(): + parser = argparse.ArgumentParser(description='AI Benchmarking Tool') + parser.prog = "ianvs" + + parser.add_argument("-f", + "--benchmarking_config_file", + nargs="?", + type=str, + help="run a benchmarking job, " + "and the benchmarking config file must be yaml/yml file.") + + parser.add_argument('-v', + '--version', + action='version', + version=__version__, + help='show program version info and exit.') + + if len(sys.argv) == 1: + parser.print_help(sys.stderr) + sys.exit(1) + + return parser + + +if __name__ == '__main__': + main() diff --git a/core/cmd/obj/__init__.py b/core/cmd/obj/__init__.py new file mode 100644 index 0000000..1000701 --- /dev/null +++ b/core/cmd/obj/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .benchmarkingjob import BenchmarkingJob diff --git a/core/cmd/obj/benchmarkingjob.py b/core/cmd/obj/benchmarkingjob.py new file mode 100644 index 0000000..7f88f27 --- /dev/null +++ b/core/cmd/obj/benchmarkingjob.py @@ -0,0 +1,119 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""BenchmarkingJob""" + +import os + +from core.common import utils +from core.common.constant import TestObjectType +from core.testenvmanager.testenv import TestEnv +from core.storymanager.rank import Rank +from core.testcasecontroller.testcasecontroller import TestCaseController + + +# pylint: disable=too-few-public-methods +class BenchmarkingJob: + """ + BenchmarkingJob: + providing a end-to-end benchmarking job. + + Parameters + ---------- + config: dict + config of a end-to-end benchmarking job, + includes: test env, algorithms, rank setting, etc. + + """ + + def __init__(self, config): + self.name: str = "" + self.workspace: str = "./workspace" + self.test_object: dict = {} + self.rank = None + self.test_env = None + self.testcase_controller = TestCaseController() + self._parse_config(config) + + def _check_fields(self): + if not self.name and not isinstance(self.name, str): + ValueError(f"algorithm name({self.name}) must be provided and be string type.") + raise ValueError(f"benchmarkingjob's name({self.name}) must be provided" + f" and be string type.") + + if not isinstance(self.workspace, str): + raise ValueError(f"benchmarkingjob's workspace({self.workspace}) must be string type.") + + if not self.test_object and not isinstance(self.test_object, dict): + raise ValueError(f"benchmarkingjob's test_object({self.test_object})" + f" must be dict type.") + + test_object_types = [e.value for e in TestObjectType.__members__.values()] + test_object_type = self.test_object.get("type") + if test_object_type not in test_object_types: + raise ValueError( + f"benchmarkingjob' test_object doesn't support the type({test_object_type}), " + f"the following test object types can be selected: {test_object_types}.") + + if not self.test_object.get(test_object_type): + raise ValueError(f"benchmarkingjob' test_object doesn't find" + f" the field({test_object_type}).") + + def run(self): + """ + run a end-to-end benchmarking job, + includes prepare test env, + run all test cases, + save results of all test cases, + plot the results according to the visualization config of rank. + """ + self.workspace = os.path.join(self.workspace, self.name) + + self.test_env.prepare() + + self.testcase_controller.build_testcases(test_env=self.test_env, + test_object=self.test_object) + + succeed_testcases, test_results = self.testcase_controller.run_testcases(self.workspace) + + if test_results: + self.rank.save(succeed_testcases, test_results, output_dir=self.workspace) + self.rank.plot() + + def _parse_config(self, config: dict): + # pylint: disable=C0103 + for k, v in config.items(): + if k == str.lower(TestEnv.__name__): + self._parse_testenv_config(v) + elif k == str.lower(Rank.__name__): + self._parse_rank_config(v) + else: + if k in self.__dict__: + self.__dict__[k] = v + + self._check_fields() + + def _parse_testenv_config(self, config_file): + if not utils.is_local_file(config_file): + raise Exception(f"not found testenv config file({config_file}) in local") + + try: + config = utils.yaml2dict(config_file) + self.test_env = TestEnv(config) + except Exception as err: + raise Exception(f"testenv config file({config_file}) is not supported, " + f"error: {err}") from err + + def _parse_rank_config(self, config): + self.rank = Rank(config) diff --git a/core/common/__init__.py b/core/common/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/common/constant.py b/core/common/constant.py new file mode 100644 index 0000000..d6bdc3b --- /dev/null +++ b/core/common/constant.py @@ -0,0 +1,55 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base constant""" + +from enum import Enum + + +class DatasetFormat(Enum): + """ + File format of inputting dataset. + Currently, file formats are as follows: txt, csv. + """ + CSV = "csv" + TXT = "txt" + + +class ParadigmType(Enum): + """ + Algorithm paradigm type. + """ + SINGLE_TASK_LEARNING = "singletasklearning" + INCREMENTAL_LEARNING = "incrementallearning" + + +class ModuleType(Enum): + """ + Algorithm module type. + """ + BASEMODEL = "basemodel" + HARD_EXAMPLE_MINING = "hard_example_mining" + + +class SystemMetricType(Enum): + """ + System metric type of ianvs. + """ + SAMPLES_TRANSFER_RATIO = "samples_transfer_ratio" + +class TestObjectType(Enum): + """ + Test object type of ianvs. + """ + ALGORITHMS = "algorithms" diff --git a/core/common/log.py b/core/common/log.py new file mode 100644 index 0000000..5bf81ec --- /dev/null +++ b/core/common/log.py @@ -0,0 +1,44 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base logger""" + +import logging +import colorlog + + +# pylint: disable=too-few-public-methods +class Logger: + """ + Deafult logger in ianvs + Args: + name(str) : Logger name, default is 'ianvs' + """ + + def __init__(self, name: str = "ianvs"): + self.logger = logging.getLogger(name) + + self.format = colorlog.ColoredFormatter( + '%(log_color)s[%(asctime)-15s] %(filename)s(%(lineno)d)' + ' [%(levelname)s]%(reset)s - %(message)s', ) + + self.handler = logging.StreamHandler() + self.handler.setFormatter(self.format) + + self.logger.addHandler(self.handler) + self.logger.setLevel(level=logging.INFO) + self.logger.propagate = False + + +LOGGER = Logger().logger diff --git a/core/common/utils.py b/core/common/utils.py new file mode 100644 index 0000000..92b587b --- /dev/null +++ b/core/common/utils.py @@ -0,0 +1,99 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""This script contains some common tools.""" + +import importlib +import os +import sys +import time + +from importlib import import_module +from inspect import getfullargspec +import yaml + + +def is_local_file(url): + """Check if the url is a file and already exists locally.""" + return os.path.isfile(url) + + +def is_local_dir(url): + """Check if the url is a dir and already exists locally.""" + return os.path.isdir(url) + + +def get_file_format(url): + """Get file format of the url.""" + return os.path.splitext(url)[-1][1:] + + +def parse_kwargs(func, **kwargs): + """Get valid parameters of the func in kwargs.""" + if not callable(func): + return kwargs + need_kw = getfullargspec(func) + if need_kw.varkw == 'kwargs': + return kwargs + return {k: v for k, v in kwargs.items() if k in need_kw.args} + + +def get_local_time(): + """Get local time.""" + return time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + + +def py2dict(url): + """Convert py file to the dict.""" + if url.endswith('.py'): + module_name = os.path.basename(url)[:-3] + config_dir = os.path.dirname(url) + sys.path.insert(0, config_dir) + mod = import_module(module_name) + sys.path.pop(0) + raw_dict = { + name: value + for name, value in mod.__dict__.items() + if not name.startswith('__') + } + sys.modules.pop(module_name) + + return raw_dict + + raise Exception('config file must be the py format') + + +def yaml2dict(url): + """Convert yaml file to the dict.""" + if url.endswith('.yaml') or url.endswith('.yml'): + with open(url, "rb") as file: + raw_dict = yaml.load(file, Loader=yaml.SafeLoader) + + return raw_dict + + raise Exception('config file must be the yaml format') + + +def load_module(url): + """Load python module.""" + module_path, module_name = os.path.split(url) + if os.path.isfile(url): + module_name = module_name.split(".")[0] + + sys.path.insert(0, module_path) + try: + importlib.import_module(module_name) + sys.path.pop(0) + except Exception as err: + raise Exception(f"load module(url={url}) failed, error: {err}") from err diff --git a/core/storymanager/__init__.py b/core/storymanager/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/storymanager/rank/__init__.py b/core/storymanager/rank/__init__.py new file mode 100644 index 0000000..58a4594 --- /dev/null +++ b/core/storymanager/rank/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .rank import Rank diff --git a/core/storymanager/rank/rank.py b/core/storymanager/rank/rank.py new file mode 100644 index 0000000..d21778e --- /dev/null +++ b/core/storymanager/rank/rank.py @@ -0,0 +1,259 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Rank""" + +import copy +import os + +import numpy as np +import pandas as pd + +from core.common import utils +from core.storymanager.visualization import get_visualization_func + + +# pylint: disable=R0902 +class Rank: + """ + Rank: + the output management and presentation of the test case, + e.g., leaderboards + + """ + + def __init__(self, config): + self.sort_by: list = [] + self.visualization: dict = { + "mode": "selected_only", + "method": "print_table" + } + self.selected_dataitem: dict = { + "paradigms": ["all"], + "modules": ["all"], + "hyperparameters": ["all"], + "metrics": ["all"] + } + self.save_mode: str = "selected_and_all" + + self.all_df_header = None + self.all_df = None + self.all_rank_file = None + self.selected_rank_file = None + self._parse_config(config) + + def _parse_config(self, config): + for attribute, value in config.items(): + if attribute in self.__dict__: + self.__dict__[attribute] = value + + self._check_fields() + + def _check_fields(self): + if not self.sort_by and not isinstance(self.sort_by, list): + raise ValueError(f"rank's sort_by({self.sort_by}) must be provided and be list type.") + + if not self.visualization and not isinstance(self.visualization, dict): + raise ValueError(f"rank's visualization({self.visualization}) " + f"must be provided and be dict type.") + + if not self.selected_dataitem and not isinstance(self.selected_dataitem, dict): + raise ValueError(f"rank's selected_dataitem({self.selected_dataitem}) " + f"must be provided and be dict type.") + + if not self.selected_dataitem.get("paradigms"): + raise ValueError("not found paradigms of selected_dataitem in rank.") + + if not self.selected_dataitem.get("modules"): + raise ValueError("not found modules of selected_dataitem in rank.") + + if not self.selected_dataitem.get("metrics"): + raise ValueError("not found metrics of selected_dataitem in rank.") + + if not self.save_mode and not isinstance(self.save_mode, list): + raise ValueError(f"rank's save_mode({self.save_mode}) " + f"must be provided and be list type.") + + @classmethod + def _get_all_metric_names(cls, test_results) -> list: + metrics = set() + # pylint: disable=C0103 + for _, v in test_results.items(): + metrics.update(v[0].keys()) + return list(metrics) + + @classmethod + def _get_all_module_types(cls, test_cases) -> list: + all_module_types = [] + for test_case in test_cases: + modules = test_case.algorithm.modules + for module_type in modules.keys(): + if module_type not in all_module_types: + all_module_types.append(module_type) + return all_module_types + + @classmethod + def _get_algorithm_hyperparameters(cls, algorithm): + hps = {} + for module in algorithm.modules.values(): + hps.update(**module.hyperparameters) + return hps + + def _get_all_hps_names(self, test_cases) -> list: + all_hps_names = [] + for test_case in test_cases: + algorithm = test_case.algorithm + hps = self._get_algorithm_hyperparameters(algorithm) + hps_names = hps.keys() + + for hps_name in hps_names: + if hps_name not in all_hps_names: + all_hps_names.append(hps_name) + return all_hps_names + + def _sort_all_df(self, all_df, all_metric_names): + sort_metric_list = [] + is_ascend_list = [] + for ele in self.sort_by: + metric_name = next(iter(ele)) + + if metric_name not in all_metric_names: + continue + + sort_metric_list.append(metric_name) + is_ascend_list.append(ele.get(metric_name) == "ascend") + + return all_df.sort_values(by=sort_metric_list, ascending=is_ascend_list) + + def _get_all(self, test_cases, test_results) -> pd.DataFrame: + all_df = pd.DataFrame(columns=self.all_df_header) + for i, test_case in enumerate(test_cases): + all_df.loc[i] = [np.NAN for i in range(len(self.all_df_header))] + # fill name column of algorithm + algorithm = test_case.algorithm + all_df.loc[i][0] = algorithm.name + # fill metric columns of algorithm + for metric_name in test_results[test_case.id][0]: + all_df.loc[i][metric_name] = test_results[test_case.id][0].get(metric_name) + + # file paradigm column of algorithm + all_df.loc[i]["paradigm"] = algorithm.paradigm_type + + # fill module columns of algorithm + for module_type, module in algorithm.modules.items(): + all_df.loc[i][module_type] = module.name + + # fill hyperparameters columns of algorithm modules + hps = self._get_algorithm_hyperparameters(algorithm) + + # pylint: disable=C0103 + for k, v in hps.items(): + all_df.loc[i][k] = v + # fill time and output dir of testcase + all_df.loc[i][-2:] = [test_results[test_case.id][1], test_case.output_dir] + + if utils.is_local_file(self.all_rank_file): + old_df = pd.read_csv(self.all_rank_file, delim_whitespace=True, index_col=0) + all_df = all_df.append(old_df) + + return self._sort_all_df(all_df, self._get_all_metric_names(test_results)) + + def _save_all(self): + all_df = copy.deepcopy(self.all_df) + all_df.index = pd.np.arange(1, len(all_df) + 1) + all_df.to_csv(self.all_rank_file, index_label="rank", encoding="utf-8", sep=" ") + + def _get_selected(self, test_cases, test_results) -> pd.DataFrame: + module_types = self.selected_dataitem.get("modules") + if module_types == ["all"]: + module_types = self._get_all_module_types(test_cases) + + hps_names = self.selected_dataitem.get("hyperparameters") + if hps_names == ["all"]: + hps_names = self._get_all_hps_names(test_cases) + + metric_names = self.selected_dataitem.get("metrics") + if metric_names == ["all"]: + metric_names = self._get_all_metric_names(test_results) + + header = ["algorithm", *metric_names, "paradigm", *module_types, *hps_names, "time", "url"] + + all_df = copy.deepcopy(self.all_df) + selected_df = pd.DataFrame(all_df, columns=header) + selected_df = selected_df.drop_duplicates(header[:-2]) + + paradigms = self.selected_dataitem.get("paradigms") + if paradigms != ["all"]: + selected_df = selected_df.loc[selected_df["paradigm"].isin(paradigms)] + return selected_df + + def _save_selected(self, test_cases, test_results): + selected_df = self._get_selected(test_cases, test_results) + selected_df.index = pd.np.arange(1, len(selected_df) + 1) + selected_df.to_csv(self.selected_rank_file, index_label="rank", encoding="utf-8", sep=" ") + + def _prepare(self, test_cases, test_results, output_dir): + all_metric_names = self._get_all_metric_names(test_results) + all_hps_names = self._get_all_hps_names(test_cases) + all_module_types = self._get_all_module_types(test_cases) + self.all_df_header = ["algorithm", *all_metric_names, "paradigm", + *all_module_types, *all_hps_names, "time", "url"] + + rank_output_dir = os.path.join(output_dir, "rank") + if not utils.is_local_dir(rank_output_dir): + os.makedirs(rank_output_dir) + + self.all_rank_file = os.path.join(rank_output_dir, "all_rank.csv") + self.selected_rank_file = os.path.join(rank_output_dir, "selected_rank.csv") + + self.all_df = self._get_all(test_cases, test_results) + + def save(self, test_cases, test_results, output_dir): + """ + save rank according to the save mode, include: + e.g.: "selected_and_all", "selected_only" + + Parameters: + ---------- + test_cases: list + test_results: list + output_dir: string + + """ + + self._prepare(test_cases, test_results, output_dir) + + if self.save_mode == "selected_and_all": + self._save_all() + self._save_selected(test_cases, test_results) + + if self.save_mode == "selected_only": + self._save_selected(test_cases, test_results) + + def plot(self): + """ + plot rank according to the visual method, include + e.g.: print_table + + """ + + method = self.visualization.get("method") + if self.visualization.get("mode") == "selected_only": + try: + func = get_visualization_func(method) + func(self.selected_rank_file) + except Exception as err: + raise Exception( + f"process visualization(method={method}) of " + f"rank file({self.selected_rank_file}) failed, error: {err}.") from err diff --git a/core/storymanager/visualization/__init__.py b/core/storymanager/visualization/__init__.py new file mode 100644 index 0000000..abf528b --- /dev/null +++ b/core/storymanager/visualization/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .visualization import get_visualization_func diff --git a/core/storymanager/visualization/visualization.py b/core/storymanager/visualization/visualization.py new file mode 100644 index 0000000..fab23e2 --- /dev/null +++ b/core/storymanager/visualization/visualization.py @@ -0,0 +1,31 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Visualization""" + +import sys + +from prettytable import from_csv + + +def print_table(rank_file): + """ print rank of the test""" + with open(rank_file, "r", encoding="utf-8") as file: + table = from_csv(file) + print(table) + + +def get_visualization_func(mode): + """ get visualization func """ + return getattr(sys.modules[__name__], mode) diff --git a/core/testcasecontroller/__init__.py b/core/testcasecontroller/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/testcasecontroller/algorithm/__init__.py b/core/testcasecontroller/algorithm/__init__.py new file mode 100644 index 0000000..c70b665 --- /dev/null +++ b/core/testcasecontroller/algorithm/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .algorithm import Algorithm diff --git a/core/testcasecontroller/algorithm/algorithm.py b/core/testcasecontroller/algorithm/algorithm.py new file mode 100644 index 0000000..17943f3 --- /dev/null +++ b/core/testcasecontroller/algorithm/algorithm.py @@ -0,0 +1,147 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Algorithm""" + +import copy + +from core.common.constant import ParadigmType +from core.testcasecontroller.algorithm.module import Module +from core.testcasecontroller.algorithm.paradigm import SingleTaskLearning, IncrementalLearning +from core.testcasecontroller.generation_assistant import get_full_combinations + + +# pylint: disable=too-few-public-methods +class Algorithm: + """ + Algorithm: typical distributed-synergy AI algorithm paradigm. + Notes: + 1. Ianvs serves as testing tools for test objects, e.g., algorithms. + 2. Ianvs does NOT include code directly on test object. + 3. Algorithms serve as typical test objects in Ianvs + and detailed algorithms are thus NOT included in this Ianvs python file. + 4. As for the details of example test objects, e.g., algorithms, + please refer to third party packages in Ianvs example. + For example, AI workflow and interface pls refer to sedna + (sedna docs: https://sedna.readthedocs.io/en/latest/api/lib/index.html), + and module implementation pls refer to `examples' test algorithms`, + e.g., basemodel.py, hard_example_mining.py. + + Parameters + ---------- + name : string + name of the algorithm paradigm + config : dict + config of the algorithm paradigm, includes: paradigm type, modules, etc. + """ + + def __init__(self, name, config): + self.name = name + self.paradigm_type: str = "" + self.incremental_learning_data_setting: dict = { + "train_ratio": 0.8, + "splitting_method": "default" + } + self.initial_model_url: str = "" + self.modules: list = [] + self.modules_list = None + self._parse_config(config) + + def paradigm(self, workspace: str, **kwargs): + """ + get test process of AI algorithm paradigm. + + Parameters: + ---------- + workspace: string + the output of test + kwargs: dict + config required for the test process of AI algorithm paradigm. + + Returns: + ------- + the process of AI algorithm paradigm: instance + + """ + + config = kwargs + # pylint: disable=C0103 + for k, v in self.__dict__.items(): + config.update({k: v}) + + if self.paradigm_type == ParadigmType.SINGLE_TASK_LEARNING.value: + return SingleTaskLearning(workspace, **config) + + if self.paradigm_type == ParadigmType.INCREMENTAL_LEARNING.value: + return IncrementalLearning(workspace, **config) + + return None + + def _check_fields(self): + if not self.name and not isinstance(self.name, str): + raise ValueError(f"algorithm name({self.name}) must be provided and be string type.") + + if not self.paradigm_type and not isinstance(self.paradigm_type, str): + raise ValueError( + f"algorithm paradigm({self.paradigm_type}) must be provided and be string type.") + + paradigm_types = [e.value for e in ParadigmType.__members__.values()] + if self.paradigm_type not in paradigm_types: + raise ValueError(f"not support paradigm({self.paradigm_type})." + f"the following paradigms can be selected: {paradigm_types}") + + if not isinstance(self.incremental_learning_data_setting, dict): + raise ValueError( + f"algorithm incremental_learning_data_setting" + f"({self.incremental_learning_data_setting} must be dictionary type.") + + if not isinstance(self.initial_model_url, str): + raise ValueError( + f"algorithm initial_model_url({self.initial_model_url}) must be string type.") + + def _parse_config(self, config): + config_dict = config[str.lower(Algorithm.__name__)] + # pylint: disable=C0103 + for k, v in config_dict.items(): + if k == str.lower(Module.__name__ + "s"): + self.modules_list = self._parse_modules_config(v) + if k in self.__dict__: + self.__dict__[k] = v + self._check_fields() + + @classmethod + def _parse_modules_config(cls, config): + modules = [] + for module_config in config: + module = Module(module_config) + modules.append(module) + + modules_list = [] + for module in modules: + hps_list = module.hyperparameters_list + if not hps_list: + modules_list.append((module.type, None)) + continue + + module_list = [] + for hps in hps_list: + new_module = copy.deepcopy(module) + new_module.hyperparameters = hps + module_list.append(new_module) + + modules_list.append((module.type, module_list)) + + module_combinations_list = get_full_combinations(modules_list) + + return module_combinations_list diff --git a/core/testcasecontroller/algorithm/module/__init__.py b/core/testcasecontroller/algorithm/module/__init__.py new file mode 100644 index 0000000..c3049ea --- /dev/null +++ b/core/testcasecontroller/algorithm/module/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .module import Module diff --git a/core/testcasecontroller/algorithm/module/module.py b/core/testcasecontroller/algorithm/module/module.py new file mode 100644 index 0000000..7298b18 --- /dev/null +++ b/core/testcasecontroller/algorithm/module/module.py @@ -0,0 +1,192 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" Algorithm Module""" + +import copy + +from sedna.common.class_factory import ClassFactory, ClassType + +from core.common import utils +from core.common.constant import ModuleType +from core.testcasecontroller.generation_assistant import get_full_combinations + + +class Module: + """ + Algorithm Module: + provide the configuration and the calling functions of the algorithm module. + Notes: + 1. Ianvs serves as testing tools for test objects, e.g., algorithms. + 2. Ianvs does NOT include code directly on test object. + 3. Algorithms serve as typical test objects in Ianvs + and detailed algorithms are thus NOT included in this Ianvs python file. + 4. As for the details of example test objects, e.g., algorithms, + please refer to third party packages in Ianvs example. + For example, AI workflow and interface pls refer to sedna + (sedna docs: https://sedna.readthedocs.io/en/latest/api/lib/index.html), + and module implementation pls refer to `examples' test algorithms`, + e.g., basemodel.py, hard_example_mining.py. + + Parameters + ---------- + config : dict + config of the algorithm module, includes: type, name, + url of the python file that defines algorithm module, + hyperparameters of the calling functions of algorithm module, etc. + + """ + + def __init__(self, config): + self.type: str = "" + self.name: str = "" + self.url: str = "" + self.hyperparameters = None + self.hyperparameters_list = None + self._parse_config(config) + + def _check_fields(self): + if not self.type and not isinstance(self.type, str): + raise ValueError(f"module type({self.type}) must be provided and be string type.") + + types = [e.value for e in ModuleType.__members__.values()] + if self.type not in types: + raise ValueError(f"not support module type({self.type}." + f"the following paradigms can be selected: {types}") + + if not self.name and not isinstance(self.name, str): + raise ValueError(f"module name({self.name}) must be provided and be string type.") + + if not isinstance(self.url, str): + raise ValueError(f"module url({self.url}) must be string type.") + + def basemodel_func(self): + """ + get basemodel module function of the module. + + Returns + -------- + function + + """ + + if not self.url: + raise ValueError(f"url({self.url}) of basemodel module must be provided.") + + try: + utils.load_module(self.url) + # pylint: disable=E1134 + basemodel = ClassFactory.get_cls(type_name=ClassType.GENERAL, + t_cls_name=self.name)(**self.hyperparameters) + except Exception as err: + raise Exception(f"basemodel module loads class(name={self.name}) failed, " + f"error: {err}.") from err + + return basemodel + + def hard_example_mining_func(self): + """ + get hard example mining function of the module. + + Returns: + -------- + function + + """ + + if self.url: + try: + utils.load_module(self.url) + # pylint: disable=E1134 + func = ClassFactory.get_cls( + type_name=ClassType.HEM, t_cls_name=self.name)(**self.hyperparameters) + + return func + except Exception as err: + raise Exception(f"hard_example_mining module loads class" + f"(name={self.name}) failed, error: {err}.") from err + + # call built-in hard example mining function + hard_example_mining = {"method": self.name} + if self.hyperparameters: + hard_example_mining["param"] = self.hyperparameters + + return hard_example_mining + + def get_module_func(self, module_type): + """ + get function of algorithm module by using module type + + Parameters + --------- + module_type: string + module type, e.g.: basemodel, hard_example_mining, etc. + + Returns + ------ + function + + """ + func_name = f"{module_type}_func" + return getattr(self, func_name) + + def _parse_config(self, config): + # pylint: disable=C0103 + for k, v in config.items(): + if k == "hyperparameters": + self.hyperparameters_list = self._parse_hyperparameters(v) + if k in self.__dict__: + self.__dict__[k] = v + + self._check_fields() + + def _parse_hyperparameters(self, config): + # hp is short for hyperparameters + base_hps = {} + hp_name_values_list = [] + for ele in config: + hp_config = ele.popitem() + hp_name = hp_config[0] + hp_values = hp_config[1].get("values") + if hp_name == "other_hyperparameters": + base_hps = self._parse_other_hyperparameters(hp_values) + else: + hp_name_values_list.append((hp_name, hp_values)) + + hp_combinations_list = get_full_combinations(hp_name_values_list) + + hps_list = [] + for hp_combinations in hp_combinations_list: + base_hps_copy = copy.deepcopy(base_hps) + base_hps_copy.update(**hp_combinations) + hps_list.append(base_hps_copy) + + return hps_list + + @classmethod + def _parse_other_hyperparameters(cls, config_files): + base_hps = {} + for hp_config_file in config_files: + if not utils.is_local_file(hp_config_file): + raise Exception(f"not found other hyperparameters config file" + f"({hp_config_file}) in local") + + try: + other_hps = utils.yaml2dict(hp_config_file) + base_hps.update(**other_hps) + except Exception as err: + raise Exception( + f"other hyperparameters config file({hp_config_file}) is unvild, " + f"error: {err}") from err + return base_hps diff --git a/core/testcasecontroller/algorithm/paradigm/__init__.py b/core/testcasecontroller/algorithm/paradigm/__init__.py new file mode 100644 index 0000000..b580470 --- /dev/null +++ b/core/testcasecontroller/algorithm/paradigm/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .incremental_learning import IncrementalLearning +from .singletask_learning import SingleTaskLearning diff --git a/core/testcasecontroller/algorithm/paradigm/base.py b/core/testcasecontroller/algorithm/paradigm/base.py new file mode 100644 index 0000000..726f4bb --- /dev/null +++ b/core/testcasecontroller/algorithm/paradigm/base.py @@ -0,0 +1,102 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Paradigm Base""" + +import os + +from sedna.core.incremental_learning import IncrementalLearning + +from core.common.constant import ModuleType, ParadigmType + + +class ParadigmBase: + """ + Paradigm Base + Notes: + 1. Ianvs serves as testing tools for test objects, e.g., algorithms. + 2. Ianvs does NOT include code directly on test object. + 3. Algorithms serve as typical test objects in Ianvs + and detailed algorithms are thus NOT included in this Ianvs python file. + 4. As for the details of example test objects, e.g., algorithms, + please refer to third party packages in Ianvs example. + For example, AI workflow and interface pls refer to sedna + (sedna docs: https://sedna.readthedocs.io/en/latest/api/lib/index.html), + and module implementation pls refer to `examples' test algorithms`, + e.g., basemodel.py, hard_example_mining.py. + + Parameters + --------- + workspace: string + the output required for test process of AI algorithm paradigm. + kwargs: dict + config required for the test process of AI algorithm paradigm, + e.g.: algorithm modules, dataset, etc. + + """ + + def __init__(self, workspace, **kwargs): + self.modules = kwargs.get("modules") + self.dataset = kwargs.get("dataset") + self.workspace = workspace + self.system_metric_info = {} + self.modules_funcs = self._get_module_funcs() + os.environ["LOCAL_TEST"] = "TRUE" + + def dataset_output_dir(self): + """ + get output dir of dataset in test process + + Returns + ------ + str + + """ + output_dir = os.path.join(self.workspace, "dataset") + if not os.path.exists(output_dir): + os.makedirs(output_dir) + return output_dir + + def _get_module_funcs(self): + module_funcs = {} + for module_type, module in self.modules.items(): + func = module.get_module_func(module_type) + if callable(func): + module_funcs.update({module_type: func}) + return module_funcs + + def build_paradigm_job(self, paradigm_type): + """ + build paradigm job instance according to paradigm type. + this job instance provides the test flow of some algorithm modules. + + Parameters + --------- + paradigm_type: str + + Returns + ------- + instance + + """ + if paradigm_type == ParadigmType.SINGLE_TASK_LEARNING.value: + return self.modules_funcs.get(ModuleType.BASEMODEL.value)() + + if paradigm_type == ParadigmType.INCREMENTAL_LEARNING.value: + return IncrementalLearning( + estimator=self.modules_funcs.get(ModuleType.BASEMODEL.value)(), + hard_example_mining=self.modules_funcs.get( + ModuleType.HARD_EXAMPLE_MINING.value)()) + + return None diff --git a/core/testcasecontroller/algorithm/paradigm/incremental_learning/__init__.py b/core/testcasecontroller/algorithm/paradigm/incremental_learning/__init__.py new file mode 100644 index 0000000..9488f4d --- /dev/null +++ b/core/testcasecontroller/algorithm/paradigm/incremental_learning/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .incremental_learning import IncrementalLearning diff --git a/core/testcasecontroller/algorithm/paradigm/incremental_learning/incremental_learning.py b/core/testcasecontroller/algorithm/paradigm/incremental_learning/incremental_learning.py new file mode 100644 index 0000000..3cbf40d --- /dev/null +++ b/core/testcasecontroller/algorithm/paradigm/incremental_learning/incremental_learning.py @@ -0,0 +1,231 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Incremental Learning Paradigm""" + +import os +import shutil +import tempfile + +import numpy as np + +from core.common.constant import ParadigmType, SystemMetricType +from core.testcasecontroller.algorithm.paradigm.base import ParadigmBase +from core.testcasecontroller.metrics import get_metric_func +from core.common.utils import get_file_format, is_local_dir + + +class IncrementalLearning(ParadigmBase): + """ + IncrementalLearning + provide the flow of incremental learning paradigm. + Notes: + 1. Ianvs serves as testing tools for test objects, e.g., algorithms. + 2. Ianvs does NOT include code directly on test object. + 3. Algorithms serve as typical test objects in Ianvs + and detailed algorithms are thus NOT included in this Ianvs python file. + 4. As for the details of example test objects, e.g., algorithms, + please refer to third party packages in Ianvs example. + For example, AI workflow and interface pls refer to sedna + (sedna docs: https://sedna.readthedocs.io/en/latest/api/lib/index.html), + and module implementation pls refer to `examples' test algorithms`, + e.g., basemodel.py, hard_example_mining.py. + + Parameters + --------- + workspace: string + the output required for single task learning paradigm. + kwargs: dict + config required for the test process of incremental learning paradigm, + e.g.: algorithm modules, dataset, initial model, incremental rounds, + model eval config, etc. + + """ + + def __init__(self, workspace, **kwargs): + ParadigmBase.__init__(self, workspace, **kwargs) + + self.incremental_learning_data_setting = kwargs.get("incremental_learning_data_setting") + self.initial_model = kwargs.get("initial_model_url") + + self.incremental_rounds = kwargs.get("incremental_rounds", 2) + self.model_eval_config = kwargs.get("model_eval") + + self.system_metric_info = {SystemMetricType.SAMPLES_TRANSFER_RATIO.value: []} + + def run(self): + """ + run the test flow of incremental learning paradigm. + + Returns + ------ + test result: numpy.ndarray + system metric info: dict + information needed to compute system metrics. + + """ + + rounds = self.incremental_rounds + samples_transfer_ratio_info = self.system_metric_info.get( + SystemMetricType.SAMPLES_TRANSFER_RATIO.value) + dataset_files = self._preprocess_dataset(splitting_dataset_times=rounds) + current_model = self.initial_model + + # pylint: disable=C0103 + for r in range(1, rounds + 1): + inference_dataset_file, eval_dataset_file = dataset_files[r - 1] + + inference_results, hard_examples = self._inference(current_model, + inference_dataset_file, + r) + + samples_transfer_ratio_info.append((inference_results, hard_examples)) + + # If no hard examples in the first round, starting the next round + if len(hard_examples) <= 0: + continue + + train_dataset_file = self._get_train_dataset(hard_examples, inference_dataset_file) + + new_model = self._train(current_model, train_dataset_file, r) + + eval_results = self._eval(new_model, current_model, eval_dataset_file) + + if self._trigger_model_update(eval_results): + current_model = new_model + + test_res, hard_examples = self._inference(current_model, self.dataset.test_url, "test") + samples_transfer_ratio_info.append((test_res, hard_examples)) + + return test_res, self.system_metric_info + + def _prepare_inference(self, model, rounds): + inference_output_dir = os.path.join(self.workspace, + f"output/inference/results/{rounds}") + if not is_local_dir(inference_output_dir): + os.makedirs(inference_output_dir) + + hard_example_saved_dir = os.path.join(self.workspace, + f"output/inference/hard_examples/{rounds}") + if not is_local_dir(hard_example_saved_dir): + os.makedirs(hard_example_saved_dir) + + os.environ["RESULT_SAVED_URL"] = inference_output_dir + os.environ["MODEL_URL"] = model + + return hard_example_saved_dir + + def _inference(self, model, data_index_file, rounds): + hard_example_saved_dir = self._prepare_inference(model, rounds) + + job = self.build_paradigm_job(ParadigmType.INCREMENTAL_LEARNING.value) + inference_dataset = self.dataset.load_data(data_index_file, "inference") + inference_dataset_x = inference_dataset.x + + inference_results = {} + hard_examples = [] + for _, data in enumerate(inference_dataset_x): + res, _, is_hard_example = job.inference([data]) + inference_results.update(res) + if is_hard_example: + shutil.copy(data, hard_example_saved_dir) + new_hard_example = os.path.join(hard_example_saved_dir, os.path.basename(data)) + hard_examples.append((data, new_hard_example)) + del job + + return inference_results, hard_examples + + def _get_train_dataset(self, hard_examples, data_label_file): + data_labels = self.dataset.load_data(data_label_file, "train label") + temp_dir = tempfile.mkdtemp() + train_dataset_file = os.path.join(temp_dir, os.path.basename(data_label_file)) + with open(train_dataset_file, "w", encoding="utf-8") as file: + for old, new in hard_examples: + index = np.where(data_labels.x == old) + if len(index[0]) == 1: + label = data_labels.y[index[0][0]] + file.write(f"{new} {label}\n") + + return train_dataset_file + + def _train(self, model, data_index_file, rounds): + train_output_dir = os.path.join(self.workspace, f"output/train/{rounds}") + if not is_local_dir(train_output_dir): + os.makedirs(train_output_dir) + + os.environ["MODEL_URL"] = train_output_dir + os.environ["BASE_MODEL_URL"] = model + + job = self.build_paradigm_job(ParadigmType.INCREMENTAL_LEARNING.value) + train_dataset = self.dataset.load_data(data_index_file, "train") + new_model = job.train(train_dataset) + del job + + return new_model + + def _eval(self, new_model, old_model, data_index_file): + os.environ["MODEL_URLS"] = f"{new_model};{old_model}" + model_eval_info = self.model_eval_config + model_metric = model_eval_info.get("model_metric") + + job = self.build_paradigm_job(ParadigmType.INCREMENTAL_LEARNING.value) + eval_dataset = self.dataset.load_data(data_index_file, "eval") + eval_results = job.evaluate(eval_dataset, metric=get_metric_func(model_metric)) + del job + + return eval_results + + def _trigger_model_update(self, eval_results): + model_eval_info = self.model_eval_config + model_metric = model_eval_info.get("model_metric") + metric_name = model_metric.get("name") + operator = model_eval_info.get("operator") + threshold = model_eval_info.get("threshold") + + operator_map = { + ">": lambda x, y: x > y, + "<": lambda x, y: x < y, + "=": lambda x, y: x == y, + ">=": lambda x, y: x >= y, + "<=": lambda x, y: x <= y, + } + + if operator not in operator_map: + raise ValueError(f"operator {operator} use to compare is not allow, set to <") + + operator_func = operator_map[operator] + + if len(eval_results) != 2: + raise Exception(f"two models of evaluation should have two results." + f" the eval results: {eval_results}") + + metric_values = [0, 0] + for i, result in enumerate(eval_results): + metrics = result.get("metrics") + metric_values[i] = metrics.get(metric_name) + + metric_delta = metric_values[0] - metric_values[1] + return operator_func(metric_delta, threshold) + + def _preprocess_dataset(self, splitting_dataset_times=1): + train_dataset_ratio = self.incremental_learning_data_setting.get("train_ratio") + splitting_dataset_method = self.incremental_learning_data_setting.get("splitting_method") + + return self.dataset.split_dataset(self.dataset.train_url, + get_file_format(self.dataset.train_url), + train_dataset_ratio, + method=splitting_dataset_method, + dataset_types=("model_inference", "model_evaluation"), + output_dir=self.dataset_output_dir(), + times=splitting_dataset_times) diff --git a/core/testcasecontroller/algorithm/paradigm/singletask_learning/__init__.py b/core/testcasecontroller/algorithm/paradigm/singletask_learning/__init__.py new file mode 100644 index 0000000..f899232 --- /dev/null +++ b/core/testcasecontroller/algorithm/paradigm/singletask_learning/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .singletask_learning import SingleTaskLearning diff --git a/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py b/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py new file mode 100644 index 0000000..1997253 --- /dev/null +++ b/core/testcasecontroller/algorithm/paradigm/singletask_learning/singletask_learning.py @@ -0,0 +1,88 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Single Task Learning Paradigm""" + +import os + +from core.common.constant import ParadigmType +from core.testcasecontroller.algorithm.paradigm.base import ParadigmBase + + +class SingleTaskLearning(ParadigmBase): + """ + SingleTaskLearning: + provide the flow of single task learning paradigm. + Notes: + 1. Ianvs serves as testing tools for test objects, e.g., algorithms. + 2. Ianvs does NOT include code directly on test object. + 3. Algorithms serve as typical test objects in Ianvs + and detailed algorithms are thus NOT included in this Ianvs python file. + 4. As for the details of example test objects, e.g., algorithms, + please refer to third party packages in Ianvs example. + For example, AI workflow and interface pls refer to sedna + (sedna docs: https://sedna.readthedocs.io/en/latest/api/lib/index.html), + and module implementation pls refer to `examples' test algorithms`, + e.g., basemodel.py, hard_example_mining.py. + + Parameters + --------- + workspace: string + the output required for single task learning paradigm. + kwargs: dict + config required for the test process of single task learning paradigm, + e.g.: algorithm modules, dataset, initial model, etc. + + """ + + def __init__(self, workspace, **kwargs): + ParadigmBase.__init__(self, workspace, **kwargs) + self.initial_model = kwargs.get("initial_model_url") + + def run(self): + """ + run the test flow of single task learning paradigm. + + Returns + ------ + test result: numpy.ndarray + system metric info: dict + information needed to compute system metrics. + + """ + + job = self.build_paradigm_job(ParadigmType.SINGLE_TASK_LEARNING.value) + + trained_model = self._train(job, self.initial_model) + + inference_result = self._inference(job, trained_model) + + return inference_result, self.system_metric_info + + def _train(self, job, initial_model): + train_output_dir = os.path.join(self.workspace, "output/train/") + os.environ["BASE_MODEL_URL"] = initial_model + + train_dataset = self.dataset.load_data(self.dataset.train_url, "train") + job.train(train_dataset) + trained_model_path = job.save(train_output_dir) + return trained_model_path + + def _inference(self, job, trained_model): + inference_dataset = self.dataset.load_data(self.dataset.test_url, "inference") + inference_output_dir = os.path.join(self.workspace, "output/inference/") + os.environ["RESULT_SAVED_URL"] = inference_output_dir + job.load(trained_model) + infer_res = job.predict(inference_dataset.x) + return infer_res diff --git a/core/testcasecontroller/generation_assistant/__init__.py b/core/testcasecontroller/generation_assistant/__init__.py new file mode 100644 index 0000000..f1a296e --- /dev/null +++ b/core/testcasecontroller/generation_assistant/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .generation_assistant import get_full_combinations diff --git a/core/testcasecontroller/generation_assistant/generation_assistant.py b/core/testcasecontroller/generation_assistant/generation_assistant.py new file mode 100644 index 0000000..145f007 --- /dev/null +++ b/core/testcasecontroller/generation_assistant/generation_assistant.py @@ -0,0 +1,51 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Generation Assistant: + assist users to generate test cases based on certain rules or constraints, + e.g., the range of parameters +""" + +from itertools import product + + +def get_full_combinations(name_values_list): + """ + get full combinations of multiple arrays + + Parameters + ------- + name_values_list : List + e.g.: [(name1, [value1, value2]), (name2, [value3, value4])] + + Returns + ------- + List + e.g.: [{name1:value1, name2:value3}, {name1:value1, name2:value4}, + {name1:value2, name2:value3}, {name1:value2, name2:value4}] + """ + + name_list = [] + values_list = [] + for name, values in name_values_list: + name_list.append(name) + values_list.append(values) + + name_value_dict_list = [] + for combination_value_list in product(*values_list): + name_value_dict = dict(zip(name_list, combination_value_list)) + name_value_dict_list.append(name_value_dict) + + return name_value_dict_list diff --git a/core/testcasecontroller/metrics/__init__.py b/core/testcasecontroller/metrics/__init__.py new file mode 100644 index 0000000..437f7ab --- /dev/null +++ b/core/testcasecontroller/metrics/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .metrics import get_metric_func diff --git a/core/testcasecontroller/metrics/metrics.py b/core/testcasecontroller/metrics/metrics.py new file mode 100644 index 0000000..b4f0550 --- /dev/null +++ b/core/testcasecontroller/metrics/metrics.py @@ -0,0 +1,77 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base Metrics""" + +import sys + +from sedna.common.class_factory import ClassFactory, ClassType + +from core.common.constant import SystemMetricType +from core.common.utils import load_module + + +def samples_transfer_ratio_func(system_metric_info: dict): + """ + compute samples transfer ratio: + ratio = nums of all inference samples / nums of all transfer samples + + Parameters + ---------- + system_metric_info: dict + information needed to compute system metrics. + + Returns + ------- + float + e.g.: 0.92 + + """ + + info = system_metric_info.get(SystemMetricType.SAMPLES_TRANSFER_RATIO.value) + inference_num = 0 + transfer_num = 0 + for inference_data, transfer_data in info: + inference_num += len(inference_data) + transfer_num += len(transfer_data) + return round(float(transfer_num) / inference_num, 4) + + +def get_metric_func(metric_dict: dict): + """ + get metric func by metric info + + Parameters: + ---------- + metric_dict: dict + metric info, e.g.: {"name": "f1_score", "url": "/metrics/f1_score.py"} + + Returns: + ------- + name: string + metric name + metric_func: function + """ + + name = metric_dict.get("name") + url = metric_dict.get("url") + if url: + load_module(url) + try: + metric_func = ClassFactory.get_cls(type_name=ClassType.GENERAL, t_cls_name=name) + return name, metric_func + except Exception as err: + raise Exception(f"get metric func(url={url}) failed, error: {err}.") from err + + return name, getattr(sys.modules[__name__], str.lower(name) + "_func") diff --git a/core/testcasecontroller/testcase/__init__.py b/core/testcasecontroller/testcase/__init__.py new file mode 100644 index 0000000..d689ee6 --- /dev/null +++ b/core/testcasecontroller/testcase/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .testcase import TestCase diff --git a/core/testcasecontroller/testcase/testcase.py b/core/testcasecontroller/testcase/testcase.py new file mode 100644 index 0000000..80cfd74 --- /dev/null +++ b/core/testcasecontroller/testcase/testcase.py @@ -0,0 +1,119 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Case""" + +import os +import uuid + +from core.common.constant import SystemMetricType +from core.testcasecontroller.metrics import get_metric_func + + +class TestCase: + """ + Test Case: + Consists of a test environment and a test algorithm + + Parameters + ---------- + test_env : instance + The test environment of benchmarking, + including dataset, Post-processing algorithms like metric computation. + algorithm : instance + Typical distributed-synergy AI algorithm paradigm. + """ + + def __init__(self, test_env, algorithm): + # pylint: disable=C0103 + self.id = uuid.uuid1() + self.test_env = test_env + self.algorithm = algorithm + self.output_dir = None + + def _get_output_dir(self, workspace): + output_dir = os.path.join(workspace, self.algorithm.name) + flag = True + while flag: + output_dir = os.path.join(workspace, self.algorithm.name, str(self.id)) + if not os.path.exists(output_dir): + flag = False + return output_dir + + def run(self, workspace): + """ + Run the test case + + Returns + ------- + test result: dict + e.g.: {"f1_score": 0.89} + """ + + try: + dataset = self.test_env.dataset + test_env_config = {} + # pylint: disable=C0103 + for k, v in self.test_env.__dict__.items(): + test_env_config[k] = v + + self.output_dir = self._get_output_dir(workspace) + paradigm = self.algorithm.paradigm(workspace=self.output_dir, + **test_env_config) + res, system_metric_info = paradigm.run() + test_result = self.compute_metrics(res, dataset, **system_metric_info) + + except Exception as err: + paradigm_type = self.algorithm.paradigm_type + raise Exception( + f"(paradigm={paradigm_type}) pipeline runs failed, error: {err}") from err + return test_result + + def compute_metrics(self, paradigm_result, dataset, **kwargs): + """ + Compute metrics of paradigm result + + Parameters + ---------- + paradigm_result: numpy.ndarray + dataset: instance + kwargs: dict + information needed to compute system metrics. + + Returns + ------- + dict + e.g.: {"f1_score": 0.89} + """ + + metric_funcs = {} + for metric_dict in self.test_env.metrics: + metric_name, metric_func = get_metric_func(metric_dict=metric_dict) + if callable(metric_func): + metric_funcs.update({metric_name: metric_func}) + + test_dataset_file = dataset.test_url + test_dataset = dataset.load_data(test_dataset_file, + data_type="eval overall", + label=dataset.label) + + metric_res = {} + system_metric_types = [e.value for e in SystemMetricType.__members__.values()] + for metric_name, metric_func in metric_funcs.items(): + if metric_name in system_metric_types: + metric_res[metric_name] = metric_func(kwargs) + else: + metric_res[metric_name] = metric_func(test_dataset.y, paradigm_result) + + return metric_res diff --git a/core/testcasecontroller/testcasecontroller.py b/core/testcasecontroller/testcasecontroller.py new file mode 100644 index 0000000..5c90681 --- /dev/null +++ b/core/testcasecontroller/testcasecontroller.py @@ -0,0 +1,87 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Case Controller""" + +import copy + +from core.common import utils +from core.common.constant import TestObjectType +from core.testcasecontroller.algorithm import Algorithm +from core.testcasecontroller.testcase import TestCase + + +class TestCaseController: + """ + Test Case Controller: + Control the runtime behavior of test cases like instance generation and vanish. + """ + + def __init__(self): + self.test_cases = [] + + def build_testcases(self, test_env, test_object): + """ + Build multiple test cases by Using a test environment and multiple test algorithms. + """ + + test_object_type = test_object.get("type") + test_object_config = test_object.get(test_object_type) + if test_object_type == TestObjectType.ALGORITHMS.value: + algorithms = self._parse_algorithms_config(test_object_config) + for algorithm in algorithms: + self.test_cases.append(TestCase(test_env, algorithm)) + + def run_testcases(self, workspace): + """ + Run all test cases. + """ + succeed_results = {} + succeed_testcases = [] + for testcase in self.test_cases: + try: + res, time = (testcase.run(workspace), utils.get_local_time()) + except Exception as err: + raise Exception(f"testcase(id={testcase.id}) runs failed, error: {err}") from err + + succeed_results[testcase.id] = (res, time) + succeed_testcases.append(testcase) + + return succeed_testcases, succeed_results + + @classmethod + def _parse_algorithms_config(cls, config): + algorithms = [] + for algorithm_config in config: + name = algorithm_config.get("name") + config_file = algorithm_config.get("url") + if not utils.is_local_file(config_file): + raise Exception(f"not found algorithm config file({config_file}) in local") + + try: + config = utils.yaml2dict(config_file) + algorithm = Algorithm(name, config) + algorithms.append(algorithm) + except Exception as err: + raise Exception(f"algorithm config file({config_file} is not supported, " + f"error: {err}") from err + + new_algorithms = [] + for algorithm in algorithms: + for modules in algorithm.modules_list: + new_algorithm = copy.deepcopy(algorithm) + new_algorithm.modules = modules + new_algorithms.append(new_algorithm) + + return new_algorithms diff --git a/core/testenvmanager/__init__.py b/core/testenvmanager/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/testenvmanager/dataset/__init__.py b/core/testenvmanager/dataset/__init__.py new file mode 100644 index 0000000..45b7a70 --- /dev/null +++ b/core/testenvmanager/dataset/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .dataset import Dataset diff --git a/core/testenvmanager/dataset/dataset.py b/core/testenvmanager/dataset/dataset.py new file mode 100644 index 0000000..1d0e9b7 --- /dev/null +++ b/core/testenvmanager/dataset/dataset.py @@ -0,0 +1,256 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Dataset""" + +import os +import tempfile + +import pandas as pd +from sedna.datasources import CSVDataParse, TxtDataParse + +from core.common import utils +from core.common.constant import DatasetFormat + + +class Dataset: + """ + Data: + provide the configuration and handle functions of dataset. + + Parameters + ---------- + config : dict + config of dataset, include: train url, test url and label, etc. + """ + + def __init__(self, config): + self.train_url: str = "" + self.test_url: str = "" + self.label: str = "" + self._parse_config(config) + + def _check_fields(self): + self._check_dataset_url(self.train_url) + self._check_dataset_url(self.test_url) + + def _parse_config(self, config): + for attr, value in config.items(): + if attr in self.__dict__: + self.__dict__[attr] = value + + self._check_fields() + + @classmethod + def _check_dataset_url(cls, url): + if not utils.is_local_file(url) and not os.path.isabs(url): + raise ValueError(f"dataset file({url}) is not a local file and not a absolute path.") + + file_format = utils.get_file_format(url) + if file_format not in [v.value for v in DatasetFormat.__members__.values()]: + raise ValueError(f"dataset file({url})'s format({file_format}) is not supported.") + + @classmethod + def _process_txt_index_file(cls, file_url): + """ + convert the index info of data from relative path to absolute path in txt index file + """ + flag = False + new_file = file_url + with open(file_url, "r", encoding="utf-8") as file: + lines = file.readlines() + for line in lines: + if not os.path.isabs(line.split(" ")[0]): + flag = True + break + if flag: + root = os.path.dirname(file_url) + tmp_file = os.path.join(tempfile.mkdtemp(), "index.txt") + with open(tmp_file, "w", encoding="utf-8") as file: + for line in lines: + front, back = line.split(" ") + file.writelines( + f"{os.path.abspath(os.path.join(root, front))} " + f"{os.path.abspath(os.path.join(root, back))}") + + new_file = tmp_file + + return new_file + + def _process_index_file(self, file_url): + file_format = utils.get_file_format(file_url) + if file_format == DatasetFormat.TXT.value: + return self._process_txt_index_file(file_url) + + return None + + def process_dataset(self): + """ + process dataset: + process train dataset and test dataset for testcase; + e.g.: convert the index info of data from relative path to absolute path + in the index file(e.g.: txt index file). + + """ + + self.train_url = self._process_index_file(self.train_url) + self.test_url = self._process_index_file(self.test_url) + + # pylint: disable=too-many-arguments + def split_dataset(self, dataset_url, dataset_format, ratio, method="default", + dataset_types=None, output_dir=None, times=1): + """ + split dataset: + step1: divide all data N(N = times) times to generate N pieces of data. + step2: divide every pieces of data 1 time using the special method. + + Parameters: + ----------- + dataset_url: str + the address url of dataset. + dataset_format: str + the format of dataset, e.g.: txt and csv. + ratio: float + the float of splitting dataset + method: string + the method of splitting dataset. + default value is "default": divide the data equally and proportionally. + dataset_types: tuple + divide every pieces of data 1 time to generate 2 small pieces of data + for special types of tasks. + e.g.: ("train", "eval") + output_dir: str + the output dir of splitting dataset. + times: int + the times of dividing all data in step1. + + Returns + ------- + list + the result of splitting dataset. + e.g.: [("/dataset/train.txt", "/dataset/eval.txt")] + + """ + + if method == "default": + return self._splitting_more_times(dataset_url, dataset_format, ratio, + data_types=dataset_types, + output_dir=output_dir, + times=times) + + raise ValueError(f"dataset splitting method({method}) is not supported," + f"currently, method supports 'default'.") + + @classmethod + def _get_file_url(cls, output_dir, dataset_type, dataset_id, file_format): + return os.path.join(output_dir, f"{dataset_type}-{dataset_id}.{file_format}") + + @classmethod + def _write_data_file(cls, data, data_file, data_format): + if data_format == DatasetFormat.TXT.value: + with open(data_file, "w", encoding="utf-8") as file: + for line in data: + file.writelines(line + "\n") + if data_format == DatasetFormat.CSV.value: + data.to_csv(data_file, index=None) + + @classmethod + def _read_data_file(cls, data_file, data_format): + data = None + + if data_format == DatasetFormat.TXT.value: + with open(data_file, "r", encoding="utf-8") as file: + data = [line.strip() for line in file.readlines()] + + if data_format == DatasetFormat.CSV.value: + data = pd.read_csv(data_file) + + return data + + def _get_dataset_file(self, data, output_dir, dataset_type, index, dataset_format): + data_file = self._get_file_url(output_dir, dataset_type, index, dataset_format) + + self._write_data_file(data, data_file, dataset_format) + + return data_file + + def _splitting_more_times(self, data_file, data_format, ratio, + data_types=None, output_dir=None, times=1): + if not data_types: + data_types = ("train", "eval") + + if not output_dir: + output_dir = tempfile.mkdtemp() + + all_data = self._read_data_file(data_file, data_format) + + data_files = [] + + all_num = len(all_data) + step = int(all_num / times) + index = 1 + while index <= times: + if index == times: + new_dataset = all_data[step * (index - 1):] + else: + new_dataset = all_data[step * (index - 1):step * index] + + new_num = len(new_dataset) + + data_files.append(( + self._get_dataset_file(new_dataset[:int(new_num * ratio)], output_dir, + data_types[0], index, data_format), + self._get_dataset_file(new_dataset[int(new_num * ratio):], output_dir, + data_types[1], index, data_format))) + + index += 1 + + return data_files + + @classmethod + def load_data(cls, file: str, data_type: str, label=None, use_raw=False, feature_process=None): + """ + load data + + Parameters + --------- + file: str + the address url of data file. + data_type: str + the type of data for special type task. + label: str + specify label of data. + use_raw: bool + if true, use all of raw data. + feature_process: function + feature processing on all of raw data. + + Returns + ------- + instance + e.g.: TxtDataParse, CSVDataParse. + + """ + data_format = utils.get_file_format(file) + + data = None + if data_format == DatasetFormat.CSV.value: + data = CSVDataParse(data_type=data_type, func=feature_process) + data.parse(file, label=label) + + if data_format == DatasetFormat.TXT.value: + data = TxtDataParse(data_type=data_type, func=feature_process) + data.parse(file, use_raw=use_raw) + + return data diff --git a/core/testenvmanager/testenv/__init__.py b/core/testenvmanager/testenv/__init__.py new file mode 100644 index 0000000..21d8da5 --- /dev/null +++ b/core/testenvmanager/testenv/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# pylint: disable=missing-module-docstring +from .testenv import TestEnv diff --git a/core/testenvmanager/testenv/testenv.py b/core/testenvmanager/testenv/testenv.py new file mode 100644 index 0000000..c2d6466 --- /dev/null +++ b/core/testenvmanager/testenv/testenv.py @@ -0,0 +1,73 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test Env""" + +from core.testenvmanager.dataset import Dataset + + +# pylint: disable=too-few-public-methods +class TestEnv: + """ + TestEnv: + the test environment of benchmarking, + including dataset, Post-processing algorithms like metric computation. + + Parameters + ---------- + config: dict + config of the test environment of benchmarking, includes: dataset, metrics, etc. + + """ + + def __init__(self, config): + self.model_eval = { + "model_metric": { + "name": "", + "url": "", + }, + "threshold": 0.9, + "operator": ">" + } + self.metrics = [] + self.incremental_rounds = 2 + self.dataset = None + self._parse_config(config) + + def _check_fields(self): + if not self.metrics: + raise ValueError(f"not found testenv metrics({self.metrics}).") + + if not isinstance(self.incremental_rounds, int) or self.incremental_rounds < 2: + raise ValueError(f"testenv incremental_rounds(value={self.incremental_rounds})" + f" must be int type and not less than 2.") + + def _parse_config(self, config): + config_dict = config[str.lower(TestEnv.__name__)] + # pylint: disable=C0103 + for k, v in config_dict.items(): + if k == str.lower(Dataset.__name__): + self.dataset = Dataset(v) + else: + if k in self.__dict__: + self.__dict__[k] = v + + self._check_fields() + + def prepare(self): + """ prepare env""" + try: + self.dataset.process_dataset() + except Exception as err: + raise Exception(f"prepare dataset failed, error: {err}.") from err diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..33d7772 --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,33 @@ +# Minimal makefile for Sphinx documentation +# This script is only used for Ianvs documents maintainer, +# who can use this script to generate Sphinx html for local preview. +# It can be ignored for Ianvs User and Developer. + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +clean: + rm -rf "$(BUILDDIR)" + +api: + rm -rf ./source/api/* + +html: + @$(SPHINXBUILD) -M html "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +all: clean api html diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..680f4b8 --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,217 @@ +# Configuration file for the Sphinx documentation builder. +# This script is only used for Ianvs documents maintainer, +# who can use this script to configure the page display effect. +# It can be ignored for Ianvs User and Developer. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +import os +import re +import shutil +import subprocess +import sys + +import sphinx_rtd_theme + +try: + import m2r2 +except ModuleNotFoundError: + subprocess.check_call([sys.executable, "-m", "pip", "install", "m2r2"]) +_base_path = os.path.abspath('..') +BASE_URL = 'https://github.com/kubeedge-sedna/ianvs/' + +sys.path.append(_base_path) + +extra_paths = [ + os.path.join(_base_path, "examples"), +] +for p in extra_paths: + dst = os.path.join( + _base_path, "docs", + os.path.basename(p) + ) + if os.path.isfile(dst): + os.remove(dst) + elif os.path.isdir(dst): + shutil.rmtree(dst) + if os.path.isdir(p): + shutil.copytree(p, dst) + else: + shutil.copy2(p, dst) + +# -- Project information ----------------------------------------------------- + +project = 'Ianvs' +copyright = '2022, KubeEdge SIG AI' +author = 'KubeEdge SIG AI' + +# The full version, including alpha/beta/rc tags +release = 'v0.1' + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = [ + "m2r2", + "sphinx.ext.autodoc", + "sphinx.ext.todo", + "sphinx.ext.coverage", + "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", + "sphinx.ext.autosummary", + "sphinx.ext.napoleon" +] + +autodoc_inherit_docstrings = False +autodoc_member_order = "bysource" +# If true, `todo` and `todoList` produce output, else they produce nothing. +todo_include_todos = True + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + +# The name of the Pygments (syntax highlighting) style to use. +pygments_style = 'sphinx' + +html_static_path = ['_static'] + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'sphinx_rtd_theme' +html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] +html_last_updated_fmt = "%b %d, %Y" +html_theme_options = { + 'prev_next_buttons_location': 'both' +} + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +# html_static_path = ['_static'] + +source_suffix = { + '.rst': 'restructuredtext', + '.txt': 'markdown', + '.md': 'markdown', +} + +extlinks = { + "issue": f"{BASE_URL}issues", + "pr": f"{BASE_URL}pull" +} + + +# hack to replace file link to html link in markdown +def ultimateReplace(app, docname, source): + """ + In the rendering with Sphinx, as some file links in markdown + can not be automatically redirected, and 404 response during + access, here define a regular to handle these links. + """ + path = app.env.doc2path(docname) # get current path + + # INLINE_LINK_EXAMPLE: [Ianvs repository](https://github.com/kubeedge/ianvs) + INLINE_LINK_RE = re.compile(r'\[[^\]]+\]\(([^)]+)\)') + # FOOTNOTE_LINK_EXAMPLE: [Ianvs repository]: https://github.com/kubeedge/ianvs + FOOTNOTE_LINK_URL_RE = re.compile(r'\[[^\]]+\](?:\s+)?:(?:\s+)?(\S+)') + if path.endswith('.md'): + new_line = [] + + docs_url = os.path.join(_base_path, "docs") + for line in source[0].split('\n'): + line = re.sub( + "\[`([^\]]+)`\]\[", "[\g<1>][", line + ) # fix html render error: [`title`] + replace_line = [] + prev_start = 0 + href_list = ( + list(INLINE_LINK_RE.finditer(line)) + + list(FOOTNOTE_LINK_URL_RE.finditer(line)) + ) + for href in href_list: + pstart = href.start(1) + pstop = href.end(1) + if pstart == -1 or pstop == -1: + continue + link = line[pstart: pstop] + if not link or link.startswith("http"): + continue + if link.startswith("/"): + tmp = _base_path + else: + tmp = os.path.abspath(os.path.dirname(path)) + + _relpath = os.path.abspath(os.path.join(tmp, link.lstrip("/"))) + for sp in extra_paths: # these docs will move into `docs` + sp = os.path.abspath(sp).rstrip("/") + + if not _relpath.startswith(sp): + continue + if os.path.isdir(sp): + sp += "/" + _relpath = os.path.join( + docs_url, _relpath[len(_base_path):].lstrip("/") + ) + break + + # If relative path looks like: + # `docs/path/` + # those paths will change to `docs/path.html` + # So it is suggested to give absolute github address if the path is directory. + # `docs/path/file.md` + # those paths will change to `docs/path/file.html` + if _relpath.startswith(docs_url) and ( + os.path.isdir(_relpath) or + os.path.splitext(_relpath)[-1].lower().startswith( + ( + ".md", ".rst", ".txt", "html", + ".png", ".jpg", ".jpeg", ".svg", ".gif" + ) + ) + ): + link = os.path.relpath(_relpath, + os.path.dirname(path)) + if not os.path.isdir(_relpath): # suffix edit + link = re.sub( + "(?:\.md|\.rst|\.txt)(\W+\w+)?$", + ".html\g<1>", link + ) + else: # redirect to `github` + _relpath = os.path.abspath( + os.path.join(tmp, link.lstrip("/")) + ) + _rel_root = os.path.relpath(_relpath, _base_path) + link = f"{BASE_URL}tree/main/{_rel_root}" + p_line = f"{line[prev_start:pstart]}{link}" + prev_start = pstop + replace_line.append(p_line) + replace_line.append(line[prev_start:]) + new_line.append("".join(replace_line)) + source[0] = "\n".join(new_line) + + +def setup(app): + app.add_config_value('ultimate_replacements', {}, True) + # Emitted when a source file has been read. + # The source argument is a list whose single element is the contents of the source file. + # https://www.sphinx-doc.org/en/master/extdev/appapi.html#event-source-read + app.connect('source-read', ultimateReplace) + app.add_css_file('css/custom.css') diff --git a/docs/distributed-synergy-ai-benchmarking.md b/docs/distributed-synergy-ai-benchmarking.md new file mode 100644 index 0000000..08a921f --- /dev/null +++ b/docs/distributed-synergy-ai-benchmarking.md @@ -0,0 +1,102 @@ +# Distributed Synergy AI Benchmarking +Edge computing emerges as a promising technical framework to overcome the challenges in cloud computing. In this machine-learning era, the AI application becomes one of the most critical types of applications on the edge. Driven by the increasing computation power of edge devices and the increasing amount of data generated from the edge, edge-cloud synergy AI and distributed synergy AI techniques have received more and more attention for the sake of device, edge, and cloud intelligence enhancement. + +Nevertheless, distributed synergy AI is at its initial stage. For the time being, the comprehensive evaluation standard is not yet available for scenarios with various AI paradigms on all three layers of edge computing systems. According to the landing challenge survey 2022, developers suffer from the lack of support on related datasets and algorithms; while end users are lost in the sea of mismatched solutions. That limits the wide application of related techniques and hinders a prosperous ecosystem of distributed synergy AI. A comprehensive end-to-end distributed synergy AI benchmark suite is thus needed to measure and optimize the systems and applications. + +Ianvs thus provides a basic benchmark suite for distributed synergy AI, so that AI developers and end users can benefit from efficient development support and best practice discovery. + +## Goals +For developers or end users of distributed synergy AI solutions, the goals of the distributed synergy AI framework are: +- Facilitating efficient development for developers by preparing + - test cases including dataset and corresponding tools + - benchmarking tools including simulation and hyper-parameter searching +- Revealing best practices for developers and end users + - presentation tools including leaderboards and test reports + + +## Scope +The distributed synergy AI benchmarking ianvs aims to test the performance of distributed synergy AI solutions following recognized standards, in order to facilitate more efficient and effective development. + +The scope of ianvs includes +- Providing end-to-end benchmark toolkits across devices, edge nodes and cloud nodes based on typical distributed-synergy AI paradigms and applications. + - Tools to manage test environment. For example, it would be necessary to support the CRUD (Create, Read, Update and Delete) actions in test environments. Elements of such test environments include algorithm-wise and system-wise configuration. + - Tools to control test cases. Typical examples include paradigm templates, simulation tools, and hyper-parameter-based assistant tools. + - Tools to manage benchmark presentation, e.g., leaderboard and test report generation. +- Cooperation with other organizations or communities, e.g., in KubeEdge SIG AI, to establish comprehensive benchmarks and developed related applications, which can include but are not limited to + - Dataset collection, re-organization, and publication + - Formalized specifications, e.g., standards + - Holding competitions or coding events, e.g., open source promotion plan + - Maintaining solution leaderboards or certifications for commercial usage + +Targeting users +- Developers: Build and publish edge-cloud collaborative AI solutions efficiently from scratch +- End users: view and compare distributed synergy AI capabilities of solutions + +The scope of ianvs does NOT include to +- Re-invent existing edge platform, i.e., kubeedge, etc. +- Re-invent existing AI framework, i.e., tensorflow, pytorch, mindspore, etc. +- Re-invent existing distributed synergy AI framework, i.e., kubeedge-sedna, etc. +- Re-invent existing UI or GUI toolkits, i.e., prometheus, grafana, matplotlib, etc. + +## Design Details +### Architecture and Modules +The architectures and related concepts are shown in the below figure. The ianvs is designed to run within a single node. Critical components include +- ``Test Environment Manager``: the CRUD of test environments serving for global usage +- ``Test Case Controller``: control the runtime behavior of test cases like instance generation and vanish + - ``Generation Assistant``: assist users to generate test cases based on certain rules or constraints, e.g., the range of parameters + - ``Simulation Controller``: control the simulation process of edge-cloud synergy AI, including the instance generation and vanishment of simulation containers +- ``Story Manager``: the output management and presentation of the test case, e.g., leaderboards + +![](guides/images/ianvs_arch.png) + +Ianvs includes Test-Environment Management, Test-case Controller and Story Manager in the Distributed Synergy AI benchmarking toolkits, where +1. Test-Environment Manager basically includes + - Algorithm-wise configuration + - Public datasets + - Pre-processing algorithms + - Feature engineering algorithms + - Post-processing algorithms like metric computation + - System-wise configuration + - Overall architecture + - System constraints or budgets + - End-to-end cross-node + - Per node +2. Test-case Controller includes but is not limited to the following components + - Templates of common distributed-synergy-AI paradigms, which can help the developer to prepare their test case without too much effort. Such paradigms include edge-cloud synergy joint inference, incremental learning, federated learning, and lifelong learning. + - Simulation tools. Develop simulated test environments for test cases + - Note that simulation tools are not yet available in early versions until v0.5 + - It is NOT in scope of this open-sourced Ianvs to simulate different hardware devices, e.g., simulating NPU with GPU and even CPU + - Other tools to assist test-case generation. For instance, prepare test cases based on a given range of hyper-parameters. +3. Story Manager includes but is not limited to the following components + - Leaderboard generation + - Test report generation + + +### Definitions of Objects + +Quite a few terms exist in ianvs, which include the detailed modules and objects. To facilitate easier concept understanding, we show a hierarchical table of terms in the following figures, where the top item contains the items below it. +![](guides/images/ianvs_concept.png) + +The concept definition of modules has been shown in the Architecture Section. In the following, we introduce the concepts of objects for easier understanding. +- ``Benchmark``: standardized evaluation process recognized by the academic or industry. +- ``Benchmarking Job``: the serving instance for an individual benchmarking with ianvs, which takes charge of the lifetime management of all possible ianvs components. + - Besides components, a benchmarking job includes instances of a test environment, one or more test cases, a leaderboard, or a test report. + - Different test environments lead to different benchmarking jobs and leaderboards. A benchmarking job can include multiple test cases. +- ``Test Object``: the targeted instance under benchmark testing. A typical example would be a particular algorithm or system. +- ``Test Environment``: setups or configurations for benchmarking, typically excluding the test object. + - It can include algorithm-wise and system-wise configurations. + - It serves as the unique descriptor of a benchmarking job. Different test environments thus lead to different benchmarking jobs. +- ``Test Case``: the executable instance to evaluate the performance of the test object under a particular test environment. Thus, the test case is usually generated with a particular test environment and outputs testing results if executed. + - It is the atomic unit of a benchmark. That is, a benchmarking job can include quite a few test cases. +- ``Attribute (Attr.) of Test Case``: Attributes or descriptors of a test case, e.g., id, name, and time stamp. +- ``Algorithm Paradigm``: acknowledged AI process which usually includes quite a few modules that can be implemented with replaceable algorithms, e.g., federated learning which includes modules of local train and global aggregation. +- ``Algorithm Module``: the component of the algorithm paradigm, e.g., the global aggregation module of the federated learning paradigm. +- ``Leaderboard``: the ranking of the test object under a specific test environment. + - The local node holds the local leaderboard for private usage. + - The global leaderboard is shared (e.g., via GitHub) by acknowledge organization. +- ``Test Report``: the manuscript recording how the testing is conducted. + + + + + diff --git a/docs/guides/how-to-contribute-algorithms.md b/docs/guides/how-to-contribute-algorithms.md new file mode 100644 index 0000000..c637d32 --- /dev/null +++ b/docs/guides/how-to-contribute-algorithms.md @@ -0,0 +1,22 @@ +# How to contributrbute an algorithm to Ianvs + +Ianvs serves as testing tools for test objects, e.g., algorithms. Ianvs does NOT include code directly on test object. Algorithms serve as typical test objects in Ianvs and detailed algorithms are thus NOT included in this Ianvs python file. As for the details of example test objects, e.g., algorithms, please refer to third party packages in Ianvs example. For example, AI workflow and interface please refer to sedna and module implementation please refer to third party package like FPN_TensorFlow and Sedna IBT algorithm. + + +For algorithm contributors, you can: +1. Release a repo independent of ianvs, but interface should still follow the SIG AI algorithm interface to launch ianvs. + Here are two examples show how to development algorithm for testing in Ianvs. + * [incremental-learning] + * [single-task-learning] +2. Integrated the targeted algorithm into sedna so that ianvs can use directly. in this case, you can connect sedna owners for help. + + +Also, if new algorithm has already bee integrated to Sedna, it can be used in Ianvs directly. + + + +[Sedna Lib]: https://github.com/kubeedge/sedna/tree/main/lib +[incremental-learning]: ../proposals/algorithms/incremental-learning/basicIL-fpn.md +[single-task-learning]: ../proposals/algorithms/single-task-learning/fpn.md +[examples directory]: ../../../../examples +[Sedna repository]: https://github.com/kubeedge/sedna \ No newline at end of file diff --git a/docs/guides/how-to-contribute-leaderboards-or-test-reports.md b/docs/guides/how-to-contribute-leaderboards-or-test-reports.md new file mode 100644 index 0000000..7a98bdf --- /dev/null +++ b/docs/guides/how-to-contribute-leaderboards-or-test-reports.md @@ -0,0 +1,72 @@ +# How to contribute test reports or leaderboards + +This document helps you to contribute stories, i.e., test reports or leaderboards, for Ianvs. +If you follow this guide and find some problem, it is appreciated to submit an issue to update this file. + +## Test Reports +Everyone is welcome to submit and share your own test report to the community. + +### 1. Setup and Testing + +Ianvs is managed with [git], and to develop locally you +will need to install `git`. + +You can check if `git` is already on your system and properly installed with +the following command: + +``` +git --version +``` + +Clone the `Ianvs` repo.: + +```shell +git clone http://github.com/kubeedge/ianvs.git +``` + +Please follow [Ianvs setup] to install Ianvs, and then run your own algorithm to output test reports. + + +### 2. Declare your grades +You may want to compare your testing result and those results on the [leaderboard]. + +Test reports are welcome after benchmarking. It can be submitted to [there](../proposals/test-reports) for further review. + + + +## Leaderboards +Leaderboards, i.e., rankings of test object, are public to everyone to visit. Examples are as [leaderboard]. + +Except [Ianvs Owners], there are mainly two roles for a leaderboard publication: +1. Developer: submit the test object for benchmarking, including but not limitted to materials like algorithm, test case following Ianvs settings and interfaces. +2. Maintainer: testing materials provided from developers and release the updated leaderboard to public. + +For potenial developers, +- Develop your algorithm with ianvs and choose the algorithm to submit. +- Make sure the submitted test object runs properly under the latest version of Ianvs before submission. Maintainers are not reponsible to debug for the submitted objects. +- Do NOT need to submit the new leaderboard. Maintainers are responsible to make test environment consistent for all test objects under the same leaderboard and execute the test object to generate new leaderboard. +- If the test object is ready, you are welcome to contact [Ianvs Owners]. Ianvs owners will connect you and maintainers, in order to receive your test object. Note that when developers submit the test object, developers give maintainers the right to test them. + +For potential maintainers, +- To maintain the consistence of test environments and test objects, the [leaderboard] submssion is at present calling for acknowledged organizations to apply in charge. Please contact +- Maintainers should be responsible for the result summitted. +- Maintainers should update the leaderboard in a monthly manner. +- Maintainers are NOT allowed to use the test object in purpose out of Ianvs benchmarking without formal authorization from developers. +- Besides submitted objects, maintainers are suggested to test objects released in KubeEdge SIG AI or other classic solutions released in public. + + + + +[git]: https://git-scm.com/ +[framework]: /docs/proposals/architecture.md#architecture +[github]: https://github.com/ +[golang]: https://golang.org/doc/install +[k8s-setup]: https://kubernetes.io/docs/setup/ +[k8s-tools]: https://kubernetes.io/docs/tasks/tools +[minikube]: https://minikube.sigs.k8s.io/docs/start/ +[kind]: https://kind.sigs.k8s.io +[kubeedge]: https://kubeedge.io/en/docs/ +[kubeedge-k8s-compatibility]: https://github.com/kubeedge/kubeedge#kubernetes-compatibility +[Ianvs Setup]: how-to-install-ianvs.md +[leaderboard]: ../leaderboards/ +[Ianvs Owners]: ../../OWNERS \ No newline at end of file diff --git a/docs/guides/how-to-contribute-test-environments.md b/docs/guides/how-to-contribute-test-environments.md new file mode 100644 index 0000000..0614f4a --- /dev/null +++ b/docs/guides/how-to-contribute-test-environments.md @@ -0,0 +1,88 @@ +# How to Contribute Test Environments + + +## Overall contribution workflow + +1. Apply for a topic. + Once you have new idea about test environment, you can apply for a topic to discuss it on [SIG AI weekly meeting](http://github.com/kubeedge/ianvs.git). +2. Submit proposal. + After the idea is fully discussed, the former proposal PR is needed to submit to [Ianvs repository](http://github.com/kubeedge/ianvs.git). +3. Fix proposal review comments. + If other Ianvs maintainer leave review comments to the PR, you need fix them and get at least 2 reviewers' `/lgtm`, and 1 approver's `/approve`. +4. Submit code. + Then you can implement your code, and good code style is encouraged. +5. Fix code review comments. + Besides the merge requirements of proposal, CI passing is needed before review in this step. + + +The following is a typical testenv: +```yaml +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # model eval configuration of incremental learning; + model_eval: + # metric used for model evaluation + model_metric: + # metric name; string type; + name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + + # condition of triggering inference model to update + # threshold of the condition; types are float/int + threshold: 0.01 + # operator of the condition; string type; + # values are ">=", ">", "<=", "<" and "="; + operator: ">=" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + - name: "samples_transfer_ratio" + + # incremental rounds setting for incremental learning paradigm.; int type; default value is 2; + incremental_rounds: 2 +``` +It can be found that, for a test we need to setup the three fields: +- dataset +- model_eval +- metrics + +That means, if you want to test on different dataset, different model or different metrics, you need a new test environment. + + + +## Add a new test environment + +Please refer to the examples directory, [pcb-aoi] is a scenario for testing. +We can regard it as a subject for a student that need to take an exam, the test env is like examination paper, +and the test job is like the student. + +For a subject `pcb-aoi`, a new examination paper could be added to the subdirectory, on the same level as `benchmarkingjob`. +The detailed steps could be the following: +1. Copy `benchmarkingjob` and named `benchmarkingjob_2` or any other intuitive name. +2. Add new algorithm to `testalgorithms`, or Keep the useful algorithm. It can refer to [contribute algorithm] section to develop your own algorithm. +3. Copy `testenv/testnev.yaml`, and modify it based on what you need to test, with different dataset, model, metrics and so on. + +If all things have been done, and you think that would be a nice "examination paper", you can create PR to ianvs, to publish your paper. + +Interested "student" from our comunity will take the exam. + + + + + + + + +[pcb-aoi]: ../../examples/pcb-aoi +[contribute algorithm]: how-to-contribute-algorithms.md \ No newline at end of file diff --git a/docs/guides/how-to-install-ianvs.md b/docs/guides/how-to-install-ianvs.md new file mode 100644 index 0000000..5866ffd --- /dev/null +++ b/docs/guides/how-to-install-ianvs.md @@ -0,0 +1,70 @@ +# How to install Ianvs + +It is recommended to use Ianvs on Linux machine. But for quick algorith development, windows is also planed to support, to reduce the configuration cost of development environment. + +This guide covers how to install Ianvs on a Linux environment. + +## Prerequisites +- One machine is all you need, i.e., a laptop or a virtual machine is sufficient and cluster is not necessary +- 2 CPUs or more +- 4GB+ free memory, depends on algorithm and simulation setting +- 10GB+ free disk space +- Internet connection for github and pip, etc +- Python 3.6+ installed + +you can check the python version by the following command: +``` +python -V +``` +after doing that, the output will be like this, that means your version fits the bill. +``` +Python 3.6.9 +``` + +## Install ianvs on Linux + + +### Create virtualenv +```shell +sudo apt install -y virtualenv +mkdir ~/venv +virtualenv -p python3 ~/venv/ianvs +source ~/venv/ianvs/bin/activate +``` + +> If you prefer conda, you can create a python environment by referring to the [creating steps](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-with-commands) provided by conda. + +### Download ianvs project +``` +cd ~ +git clone https://github.com/JimmyYang20/ianvs.git +``` + +### Install third-party dependencies +``` +sudo apt update +sudo apt install libgl1-mesa-glx -y +cd ~/ianvs +python -m pip install ./examples/resources/third_party/* +python -m pip install -r requirements.txt +``` + +### Install ianvs +``` +python setup.py install +``` + +### Check the installation +```shell +ianvs -v +``` +If the version information is printed, Ianvs is installed successful. + + + + +## About Windows + +At the time being, package requirements of Ianvs is only applicable for Linux, to ensure comprehensive support from Linux ecosystem and to ease the burden of manual installation for users in Windows. + +If you are more used to develop on Windows, you can still do so with remote connections like SSH from Windows connecting to a Linux machine with ianvs installed. Such remote connection is already supported in common Python coding tools like VScode, Pycharm etc. By doing so, it helps to provide efficient installation and robust functionality of Ianvs. \ No newline at end of file diff --git a/docs/guides/how-to-test-algorithms.md b/docs/guides/how-to-test-algorithms.md new file mode 100644 index 0000000..89f0976 --- /dev/null +++ b/docs/guides/how-to-test-algorithms.md @@ -0,0 +1,383 @@ +[Quick Start]: ./quick-start.md +[Links of scenarios]: ../proposals/scenarios/ +[the PCB-AoI public dataset]: https://www.kaggle.com/datasets/kubeedgeianvs/pcb-aoi +[details of PCB-AoI dataset]: ../proposals/scenarios/industrial-defect-detection/pcb-aoi.md + +# How to test algorithms with Ianvs + +With Ianvs installed and related environment prepared, an algorithm developer is then able to test his/her own targeted algorithm using the following steps. + +Note that: +- If you are testing an algorithm summitted in Ianvs repository, e.g., FPN for single task learning, the test environment and the test case are both ready to use and you can directly refer to [Quick Start]. +- Otherwise, if the user has a test algorithm which is new to Ianvs repository, i.e., the test environment and the test case are not ready for the targeted algorithm, you might test the algorithm in Ianvs following the next steps from scratch. + +## Step 1. Test Environment Preparation + +First, the user need to prepare the dataset according to the targeted scenario, from source links (e.g., from Kaggle) provided by Ianvs. Scenarios with dataset are available [Links of scenarios]. As an example in this document, we are using [the PCB-AoI Public Dataset] released by KubeEdge SIG AI members on Kaggle. See [details of PCB-AoI dataset] for more information of this dataset. + +You might wonder why not put the dataset on Github repository of Ianvs: Datasets can be large. To avoid over-size projects in the Github repository of Ianvs, the Ianvs code base do not include origin datasets and developers might want to download uneeded datasets. + +The URL address of this dataset then should be filled in the configuration file ``testenv.yaml``. + +``` yaml +# testenv.yaml +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # model eval configuration of incremental learning; + model_eval: + # metric used for model evaluation + model_metric: + # metric name; string type; + name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + + # condition of triggering inference model to update + # threshold of the condition; types are float/int + threshold: 0.01 + # operator of the condition; string type; + # values are ">=", ">", "<=", "<" and "="; + operator: ">=" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + - name: "samples_transfer_ratio" + + # incremental rounds setting for incremental learning paradigm.; int type; default value is 2; + incremental_rounds: 2 +``` + +The URL address of this test environment, i.e., testenv.yaml, then should be filled in the configuration file in the following Step 3. For example, +``` yaml +# benchmarkingJob.yaml + testenv: "/home/yj/ianvs/examples/pcb-aoi/benchmarkingjob/testenv/testenv.yaml" +``` + +## Step 2. Test Case Preparation + +Note that the tested algorithm should follow the ianvs interface to ensure functional benchmarking. +That is, when a new algorithm is needed for testing, it should to be extended based on the basic classes, i.e., `class_factory.py`. +The class factory helps to make the algorithm pluggable in Ianvs +and two classes are defined in `class_factory.py`, namely `ClassType` and `ClassFactory`. +`ClassFactory` can register the modules you want to reuse through decorators. +The the user may develop the targeted algorithm as usual using the algorithm interface in class factory. + +Currently, Ianvs is using the `class_factory.py` defined in KubeEdge SIG AI ([source link](https://github.com/kubeedge/sedna/blob/main/lib/sedna/common/class_factory.py)). If you want to contribute a new type of modules to KubeEdge SIG AI, i.e., a new classtype, please refer to the guide of [how to contribute algorithms](./how-to-contribute-algorithms.md). + + +### Example 1. Testing a hard-example-mining algorithm in incremental learning + +As the first example, we describe how to test an algorithm `Threshold-based-HEM` for HEM (Hard Example Mining) module in incremental learning. +For this new algorithm in `ClassType.HEM`, the code in the algorithm file is as follows: + +```python +@ClassFactory.register(ClassType.HEM, alias="Threshold-based-HEM") +class ThresholdFilter(BaseFilter, abc.ABC): + def __init__(self, threshold=0.5, **kwargs): + self.threshold = float(threshold) + + def __call__(self, infer_result=None): + return Threshold-based-HEM(infer_result) +``` + +With the above algorithm interface, one may develop the targeted algorithm as usual in the same algorithm file: + +```python +def Threshold-based-HEM(infer_result=None): + # if invalid input, return False + if not (infer_result + and all(map(lambda x: len(x) > 4, infer_result))): + return False + + image_score = 0 + + for bbox in infer_result: + image_score += bbox[4] + + average_score = image_score / (len(infer_result) or 1) + return average_score < self.threshold +``` + +### Example 2. Testing a neural-network-based modeling algorithm in incremental learning + +As the second example, we describe how to test a neural network `FPN` for HEM (Hard Example Mining) module in incremental learning. +For this new algorithm in `ClassType.GENERAL`, the code in the algorithm file is as follows: + +```python + +@ClassFactory.register(ClassType.GENERAL, alias="FPN") +class BaseModel: + + def __init__(self, **kwargs): + """ + initialize logging configuration + """ + + self.has_fast_rcnn_predict = False + + self._init_tf_graph() + + self.temp_dir = tempfile.mkdtemp() + if not os.path.isdir(self.temp_dir): + mkdir(self.temp_dir) + + os.environ["MODEL_NAME"] = "model.zip" + cfgs.LR = kwargs.get("learning_rate", 0.0001) + cfgs.MOMENTUM = kwargs.get("momentum", 0.9) + cfgs.MAX_ITERATION = kwargs.get("max_iteration", 5) + + def train(self, train_data, valid_data=None, **kwargs): + + if train_data is None or train_data.x is None or train_data.y is None: + raise Exception("Train data is None.") + + with tf.Graph().as_default(): + + img_name_batch, train_data, gtboxes_and_label_batch, num_objects_batch, data_num = \ + next_batch_for_tasks( + (train_data.x, train_data.y), + dataset_name=cfgs.DATASET_NAME, + batch_size=cfgs.BATCH_SIZE, + shortside_len=cfgs.SHORT_SIDE_LEN, + is_training=True, + save_name="train" + ) + + # ... ... + # several lines are omitted here. + + return self.checkpoint_path + + def save(self, model_path): + if not model_path: + raise Exception("model path is None.") + + model_dir, model_name = os.path.split(self.checkpoint_path) + models = [model for model in os.listdir(model_dir) if model_name in model] + + if os.path.splitext(model_path)[-1] != ".zip": + model_path = os.path.join(model_path, "model.zip") + + if not os.path.isdir(os.path.dirname(model_path)): + os.makedirs(os.path.dirname(model_path)) + + with zipfile.ZipFile(model_path, "w") as f: + for model_file in models: + model_file_path = os.path.join(model_dir, model_file) + f.write(model_file_path, model_file, compress_type=zipfile.ZIP_DEFLATED) + + return model_path + + def predict(self, data, input_shape=None, **kwargs): + if data is None: + raise Exception("Predict data is None") + + inference_output_dir = os.getenv("RESULT_SAVED_URL") + + with self.tf_graph.as_default(): + if not self.has_fast_rcnn_predict: + self._fast_rcnn_predict() + self.has_fast_rcnn_predict = True + + restorer = self._get_restorer() + + config = tf.ConfigProto() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + with tf.Session(config=config) as sess: + sess.run(init_op) + + # ... ... + # several lines are omitted here. + + return predict_dict + + def load(self, model_url=None): + if model_url: + model_dir = os.path.split(model_url)[0] + with zipfile.ZipFile(model_url, "r") as f: + f.extractall(path=model_dir) + ckpt_name = os.path.basename(f.namelist()[0]) + index = ckpt_name.find("ckpt") + ckpt_name = ckpt_name[:index + 4] + self.checkpoint_path = os.path.join(model_dir, ckpt_name) + + else: + raise Exception(f"model url is None") + + return self.checkpoint_path + + def evaluate(self, data, model_path, **kwargs): + if data is None or data.x is None or data.y is None: + raise Exception("Prediction data is None") + + self.load(model_path) + predict_dict = self.predict(data.x) + metric_name, metric_func = kwargs.get("metric") + if callable(metric_func): + return {"f1_score": metric_func(data.y, predict_dict)} + else: + raise Exception(f"not found model metric func(name={metric_name}) in model eval phase") +``` + +With the above algorithm interface, one may develop the targeted algorithm of FPN as usual in the same algorithm file. +The ``FPN_TensorFlow`` is also open sourced. For those interested in ``FPN_TensorFlow``, an example implementation is available [here](https://github.com/DetectionTeamUCAS/FPN_Tensorflow) and extended with the algorithm inferface [here](https://github.com/kubeedge-sedna/FPN_Tensorflow). + +Then we can fill the ``algorithm.yaml``: +``` yaml +algorithm: + # paradigm type; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "incrementallearning" + incremental_learning_data_setting: + # ratio of training dataset; float type; + # the default value is 0.8. + train_ratio: 0.8 + # the method of splitting dataset; string type; optional; + # currently the options of value are as follows: + # 1> "default": the dataset is evenly divided based train_ratio; + splitting_method: "default" + # the url address of initial model for model pre-training; string url; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + # type of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel": contains important interfaces such as train, eval, predict and more; required module; + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + - learning_rate: + values: + - 0.1 + # 2> "hard_example_mining": check hard example when predict ; optional module; + - type: "hard_example_mining" + # name of python module; string type; + name: "IBT" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py" + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + # threshold of image; value is [0, 1] + - threshold_img: + values: + - 0.9 + # predict box of image; value is [0, 1] + - threshold_box: + values: + - 0.9 +``` + + +The URL address of this algorithm then should be filled in the configuration file of ``benchmarkingJob.yaml`` in the following Step 3. Two examples are as follows: +``` yaml + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "fpn_incremental_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml" +``` + +or + +``` yaml + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "fpn_singletask_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml; + url: "./examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml" +``` + +## Step 3. ianvs Configuration + +Now we comes to the final configuration on ``benchmarkingJob.yaml`` before running ianvs. + +First, the user can configure the workspace to reserve the output of tests. +``` yaml +# benchmarkingJob.yaml + workspace: "/ianvs/pcb-aoi/workspace/" +``` + +Then, the user fill in the test environment and algorithm configured in previous steps. +``` yaml +# benchmarkingJob.yaml + testenv: "/home/yj/ianvs/examples/pcb-aoi/benchmarkingjob/testenv/testenv.yaml" +``` +``` yaml + algorithms: + - name: "fpn_incremental_learning" + url: "/home/yj/ianvs/examples/pcb-aoi/benchmarkingjob/testalgorithms/fpn_incremental_learning/fpn_algorithm.yaml" +``` + +As the final leaderboard, the user can configure how to rank the leaderboard with the specific metric and order. +``` yaml +# benchmarkingJob.yaml + rank: + sort_by: [ { "f1_score": "descend" } ] +``` + +There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. In the leaderboard, we provide the ``selected_only`` mode for the user to configure what is shown or is not shown. The user can add his/her interested dataitems in terms of ``paradigms``, ``modules``, ``hyperparameters`` and ``metrics``, so that the selected columns will be shown. +``` yaml + visualization: + mode: "selected_only" + method: "print_table" + + selected_dataitem: + paradigms: [ "all" ] + modules: [ "all" ] + hyperparameters: [ "all" ] + metrics: [ "f1_score" ] + + save_mode: "selected_and_all" +``` + + +## Step 4. Execution and Presentation + +Finally, the user can run ianvs for benchmarking. + +The benchmarking result of the targeted algorithms will be shown after evaluation is done. Leaderboard examples can be found [here](../proposals/leaderboards). diff --git a/docs/guides/images/ianvs_arch.png b/docs/guides/images/ianvs_arch.png new file mode 100644 index 0000000..4b6c4e8 Binary files /dev/null and b/docs/guides/images/ianvs_arch.png differ diff --git a/docs/guides/images/ianvs_concept.png b/docs/guides/images/ianvs_concept.png new file mode 100644 index 0000000..b394c4c Binary files /dev/null and b/docs/guides/images/ianvs_concept.png differ diff --git a/docs/guides/images/user_flow.png b/docs/guides/images/user_flow.png new file mode 100644 index 0000000..71b9122 Binary files /dev/null and b/docs/guides/images/user_flow.png differ diff --git a/docs/guides/quick-start.md b/docs/guides/quick-start.md new file mode 100644 index 0000000..b32881b --- /dev/null +++ b/docs/guides/quick-start.md @@ -0,0 +1,132 @@ +[Links of scenarios]: ../proposals/scenarios/ +[the PCB-AoI public dataset]: https://www.kaggle.com/datasets/kubeedgeianvs/pcb-aoi +[Details of PCB-AoI dataset]: ../proposals/scenarios/industrial-defect-detection/pcb-aoi.md +[XFTP]: https://www.xshell.com/en/xftp/ +[FPN-model]: https://kubeedge.obs.cn-north-1.myhuaweicloud.com:443/ianvs/pcb-aoi/model.zip +[How to test algorithms]: how-to-test-algorithms.md +[How to contribute algorithms]: how-to-contribute-algorithms.md +[How to contribute test environments]: how-to-contribute-test-environments.md +[testenv.yaml]: how-to-test-algorithms.md#step-1-test-environment-preparation +[algorithm.yaml]: how-to-test-algorithms.md#step-2-test-case-preparation +[benchmarkingJob.yaml]: how-to-test-algorithms.md#step-3-ianvs-configuration + +# Quick Start + +Welcome to Ianvs! Ianvs aims to test the performance of distributed synergy AI solutions following recognized standards, +in order to facilitate more efficient and effective development. Quick start helps you to test your algorithm on Ianvs +with a simple example on industrial defect detection. You can reduce manual procedures to just a few steps so that you can +building and start your distributed synergy AI solution development within minutes. + +Before using Ianvs, you might want to have the device ready: +- One machine is all you need, i.e., a laptop or a virtual machine is sufficient and cluster is not necessary +- 2 CPUs or more +- 4GB+ free memory, depends on algorithm and simulation setting +- 10GB+ free disk space +- Internet connection for github and pip, etc +- Python 3.6+ installed + +In this example, we are using Linux platform with Python 3.6.9. If you are using Windows, most steps should still apply but a few like commands and package requirements might be different. + +## Step 1. Ianvs Preparation + +First, we download the code of Ianvs. Assuming that we are using `/home/ianvs-qs` as workspace, Ianvs can be cloned with `Git` as: +``` shell +/home$ cd /home/ianvs-qs #One might use other path preferred + +/home/ianvs-qs$ mkdir -p ./project/ +/home/ianvs-qs$ cd ./project/ +/home/ianvs-qs/project$ git clone https://github.com/kubeedge/ianvs.git +``` + + + +Then, we install third-party dependencies for ianvs. +``` shell +/home/ianvs-qs/project$ cd ./ianvs + +/home/ianvs-qs/project/ianvs$ sudo apt-get update +/home/ianvs-qs/project/ianvs$ sudo apt-get install libgl1-mesa-glx -y +/home/ianvs-qs/project/ianvs$ python -m pip install --upgrade pip + +/home/ianvs-qs/project/ianvs$ python -m pip install ./examples/resources/third_party/* +/home/ianvs-qs/project/ianvs$ python -m pip install -r requirements.txt +``` + +We are now ready to install Ianvs. +``` shell +/home/ianvs-qs/project/ianvs$ python setup.py install +``` + +## Step 2. Dataset and Model Preparation + +Datasets and models can be large. To avoid over-size projects in the Github repository of Ianvs, the Ianvs code base do not include origin datasets and models. Then developers do not need to download non-necessary datasets and models for a quick start. + +First, the user need to prepare the dataset according to the targeted scenario, from source links (e.g., from Cloud Service or Kaggle) provided by Ianvs. All scenarios with dataset are available [Links of scenarios]. As an example in this document, we are using [the PCB-AoI Public Dataset] released by KubeEdge SIG AI members on Kaggle. See [Details of PCB-AoI dataset] for more information of this dataset. + + + +``` shell +/home/ianvs-qs/project/ianvs$ cd /home/ianvs-qs #One might use other path preferred +/home/ianvs-qs$ mkdir -p ./dataset/ +/home/ianvs-qs$ cd ./dataset +/home/ianvs-qs/dataset$ wget https://kubeedge.obs.cn-north-1.myhuaweicloud.com:443/ianvs/pcb-aoi/dataset.zip +/home/ianvs-qs/dataset$ unzip dataset.zip +``` + +The URL address of this dataset then should be filled in the configuration file ``testenv.yaml``. In this quick start, we have done that for you and the interested readers can refer to [testenv.yaml] for more details. + + + +Then we may Develop the targeted algorithm as usual. In this quick start, Ianvs has prepared an initial model for benchmarking. One can find the model at [FPN-model]. + + + +``` shell +/home/ianvs-qs/dataset$ cd /home/ianvs-qs #One might use other path preferred +/home/ianvs-qs$ mkdir -p ./initial_model +/home/ianvs-qs$ cd /ianvs/initial_model/ +/home/ianvs-qs/initial_model$ wget https://kubeedge.obs.cn-north-1.myhuaweicloud.com:443/ianvs/pcb-aoi/model.zip +``` + + + +Related algorithm is also ready as a wheel in this quick start. +``` shell +/home/ianvs-qs/initial_model$ cd /home/ianvs-qs #One might use other path preferred +/home/ianvs-qs$ cd ./project/ianvs/ +/home/ianvs-qs/project/ianvs$ python -m pip install examples/resources/algorithms/FPN_TensorFlow-0.1-py3-none-any.whl +``` + +The URL address of this algorithm then should be filled in the configuration file ``algorithm.yaml``. In this quick start, we have done that for you and the interested readers can refer to [algorithm.yaml] for more details. + +## Step 3. Ianvs Execution and Presentation + +We are now ready to run the ianvs for benchmarking on PCB-AoI dataset. + +``` shell +/home/ianvs-qs/project/ianvs$ ianvs -f examples/pcb-aoi/singletask_learning_bench/benchmarkingjob.yaml +``` + +Finally, the user can check the result of benchmarking on the console and also in the output path(e.g. `/ianvs/singletask_learning_bench/workspace`) defined in the +benchmarking config file (e.g. `benchmarkingjob.yaml`). In this quick start, we have done all configurations for you and the interested readers can refer to [benchmarkingJob.yaml] for more details. + +The final output might look like: + +|rank |algorithm |f1_score |paradigm |basemodel |learning_rate |momentum |time |url | +|:----:|:-----------------------:|:--------:|:------------------:|:---------:|:-------------:|:--------:|:------------------------|:-------------------------------------------------------------------------------------------------------------------------------| +|1 |fpn_singletask_learning | 0.8396 |singletasklearning | FPN | 0.1 | 0.5 | 2022-07-07 20:33:53 |/ianvs/pcb-aoi/singletask_learning_bench/workspace/benchmarkingjob/fpn_singletask_learning/49eb5ffd-fdf0-11ec-8d5d-fa163eaa99d5 | +|2 |fpn_singletask_learning | 0.8353 |singletasklearning | FPN | 0.1 | 0.95 | 2022-07-07 20:31:08 |/ianvs/pcb-aoi/singletask_learning_bench/workspace/benchmarkingjob/fpn_singletask_learning/49eb5ffc-fdf0-11ec-8d5d-fa163eaa99d5 | + +# What is next + +Now the reader shall be ready to exlore more on Ianvs: + +[How to test algorithms] + +[How to contribute algorithms] + +[How to contribute test environments] + +[Links of scenarios] + +[Details of PCB-AoI dataset] diff --git a/docs/index.rst b/docs/index.rst new file mode 100644 index 0000000..3c150aa --- /dev/null +++ b/docs/index.rst @@ -0,0 +1,104 @@ +=========================================== +Welcome to Ianvs documentation! +=========================================== + +Ianvs is a distributed synergy AI benchmarking project incubated in KubeEdge SIG AI. According to the landing challenge survey 2022 in KubeEdge SIG AI, +when it comes to the landing of distributed synergy AI projects, developers suffer from the lack of support on related datasets and algorithms; +while end users are lost in the sea of mismatched solutions. +That limits the wide application of related techniques and hinders a prosperous ecosystem of distributed synergy AI. + +Confronted with these challenges, Ianvs aims to test the performance of distributed synergy AI solutions following recognized standards, +in order to facilitate more efficient and effective development. More detailedly, Ianvs prepares not only test cases with datasets and corresponding algorithms, +but also benchmarking tools including simulation and hyper-parameter searching. +Ianvs also revealing best practices for developers and end users with presentation tools including leaderboards and test reports. + +The scope of Ianvs includes +- Providing end-to-end benchmark toolkits across devices, edge nodes and cloud nodes based on typical distributed-synergy AI paradigms and applications. + - Tools to manage test environment. For example, it would be necessary to support the CRUD (Create, Read, Update and Delete) actions in test environments. Elements of such test environments include algorithm-wise and system-wise configuration. + - Tools to control test cases. Typical examples include paradigm templates, simulation tools, and hyper-parameter-based assistant tools. + - Tools to manage benchmark presentation, e.g., leaderboard and test report generation. +- Cooperation with other organizations or communities, e.g., in KubeEdge SIG AI, to establish comprehensive benchmarks and developed related applications, which can include but are not limited to + - Dataset collection, re-organization, and publication + - Formalized specifications, e.g., standards + - Holding competitions or coding events, e.g., open source promotion plan + - Maintaining solution leaderboards or certifications for commercial usage + +Start your journey on Ianvs with the following links: + +.. toctree:: + :maxdepth: 1 + :caption: Introduction + + Introduction to Ianvs + guides/quick-start + +.. toctree:: + :maxdepth: 1 + :caption: GUIDEs + + guides/how-to-install-ianvs + guides/how-to-contribute-test-environments + guides/how-to-test-algorithms + guides/how-to-contribute-leaderboards-or-test-reports + guides/how-to-contribute-algorithms + +.. toctree:: + :maxdepth: 1 + :caption: SCENARIOs + + Industrial Defect Detection: PCB-AoI + +.. toctree:: + :maxdepth: 1 + :titlesonly: + :glob: + :caption: Stories + + Leaderboard: Single Task Learning on PCB-AoI + Leaderboard: Incremental Learning on PCB-AoI + Test Report: Single Task Learning on PCB-AoI + Test Report: Incremental Learning on PCB-AoI + +.. toctree:: + :maxdepth: 1 + :titlesonly: + :glob: + :caption: ALGORITHMs + + Single Task Learning: FPN + Incremental Learning: BasicIL-FPN + + +.. toctree:: + :maxdepth: 1 + :titlesonly: + :glob: + :caption: USER INTERFACE + + + How to Config Algorithm + How to Config TestEnv + How to Config Benchmarkingjob + How to Use Ianvs Command Line + + + + +.. toctree:: + :maxdepth: 1 + :caption: ROADMAP + + roadmap + + +RELATED LINKs +============= + +.. mdinclude:: related-link.md + +Indices and tables +================== + +* :ref:`genindex` +* :ref:`modindex` +* :ref:`search` diff --git a/docs/leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-incremental-learning.md b/docs/leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-incremental-learning.md new file mode 100644 index 0000000..2418056 --- /dev/null +++ b/docs/leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-incremental-learning.md @@ -0,0 +1,7 @@ +# Leaderboard of incremental learning + + +|rank |algorithm |f1_score |samples_transfer_ratio|paradigm |basemodel |learning_rate |momentum |threshold_img |threshold_box |time | +|:----:|:-----------------------:|:--------:|:--------------------:|:-----------------:|:---------:|:-------------:|:--------:|:------------:|:------------:|-------------------| +|1 |fpn_incremental_learning | 0.9572 |0.5263 |incrementallearning|FPN | 0.1 | 0.95 | 0.9 |0.9 |2022-07-07 20:14:12| +|2 |fpn_incremental_learning | 0.9444 |0.5789 |incrementallearning|FPN | 0.1 | 0.5 | 0.9 |0.9 |2022-07-07 20:20:57| diff --git a/docs/leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-single-task-learning.md b/docs/leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-single-task-learning.md new file mode 100644 index 0000000..806bcaf --- /dev/null +++ b/docs/leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-single-task-learning.md @@ -0,0 +1,8 @@ +# Leaderboard of single task learning + + + +|rank |algorithm |f1_score |paradigm |basemodel |learning_rate |momentum |time | +|:----:|:-----------------------:|:--------:|:------------------:|:---------:|:-------------:|:--------:|:------------------------ | +|1 |fpn_singletask_learning | 0.8396 |singletasklearning | FPN | 0.1 | 0.5 | 2022-07-07 20:33:53 | +|2 |fpn_singletask_learning | 0.8353 |singletasklearning | FPN | 0.1 | 0.95 | 2022-07-07 20:31:08 | diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..954237b --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/proposals/algorithms/incremental-learning/basicIL-fpn.md b/docs/proposals/algorithms/incremental-learning/basicIL-fpn.md new file mode 100644 index 0000000..52804db --- /dev/null +++ b/docs/proposals/algorithms/incremental-learning/basicIL-fpn.md @@ -0,0 +1,170 @@ +# Incremental learning: BasicIL-FPN + +Initial model: [Huawei OBS](https://kubeedge.obs.cn-north-1.myhuaweicloud.com:443/ianvs/pcb-aoi/model.zip) + +Traditionally, the data is collected manually and periodically retrained on the cloud to improve the model effect. However, data is continuously generated on the edge side. Traditional method wastes a lot of human resources, and the model update frequency is slow. + +Incremental learning allows users to continuously monitor the newly generated data and by configuring some triggering rules to determine whether to start training, evaluation, and deployment automatically, and continuously improve the model performance. + +Its goals include: +* Automatically retrains, evaluates, and updates models based on the data generated at the edge. +* Support time trigger, sample size trigger, and precision-based trigger. +* Support manual triggering of training, evaluation, and model update. +* Support hard example discovering of unlabeled data, for reducing the manual labeling workload. + +![](incremental_learning.png) + +As shown in the above figure, the incremental learning works as following procedures: +1. Developer implements and deploys the application based on incremental learning. +2. The application runs and launches incremental learning. It can also return the inference result to the application. +3. The system detects hard examples and uploads hard examples to the cloud. +4. Labeling service labels the hard examples. +5. Incremental training online learns the hard examples to generate a new model. +6. Model evaluation is conducted and updates the model if qualified. +7. The model outputs the inference result given test samples and continue as Step 3. + +## Implementation +Here we will show how to implement a single task learning algorithm for testing in ianvs, based on an opensource algorithm [FPN]. + +For test of your own algorithm, the base model of FPN is not necessary: It can be replaced with any algorithm complying the requirement of ianvs interface. + +Ianvs testing algorithm development, at present, are using Sedna Lib. The following is recommended development workflow: +1. Algorithm Development: put the algorithm implementation to ianvs [examples directory] locally, for testing. +2. Algorithm Submission: submit the algorithm implementation to [Sedna repository], for sharing, then everyone can test and use your algorithm. + +Sedna provides a class called `class_factory.py` in `common` package, in which only a few lines of changes are required to become a module of sedna. + +Two classes are defined in `class_factory.py`, namely `ClassType` and `ClassFactory`. + +`ClassFactory` can register the modules you want to reuse through decorators. For example, in the following code example, you have customized an **single task learning algorithm**, you only need to add a line of `ClassFactory.register(ClassType.GENERAL)` to complete the registration. + +The following code is just to show the overall structure of a basicIL-fpn BaseModel, not the complete version. The complete code can be found [here](https://github.com/JimmyYang20/ianvs/tree/main/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn). + +```python + +@ClassFactory.register(ClassType.GENERAL, alias="FPN") +class BaseModel: + + def __init__(self, **kwargs): + """ + initialize logging configuration + """ + + self.has_fast_rcnn_predict = False + + self._init_tf_graph() + + self.temp_dir = tempfile.mkdtemp() + if not os.path.isdir(self.temp_dir): + mkdir(self.temp_dir) + + os.environ["MODEL_NAME"] = "model.zip" + cfgs.LR = kwargs.get("learning_rate", 0.0001) + cfgs.MOMENTUM = kwargs.get("momentum", 0.9) + cfgs.MAX_ITERATION = kwargs.get("max_iteration", 5) + + def train(self, train_data, valid_data=None, **kwargs): + + if train_data is None or train_data.x is None or train_data.y is None: + raise Exception("Train data is None.") + + with tf.Graph().as_default(): + + img_name_batch, train_data, gtboxes_and_label_batch, num_objects_batch, data_num = \ + next_batch_for_tasks( + (train_data.x, train_data.y), + dataset_name=cfgs.DATASET_NAME, + batch_size=cfgs.BATCH_SIZE, + shortside_len=cfgs.SHORT_SIDE_LEN, + is_training=True, + save_name="train" + ) + + # ... ... + # several lines are omitted here. + + return self.checkpoint_path + + def save(self, model_path): + if not model_path: + raise Exception("model path is None.") + + model_dir, model_name = os.path.split(self.checkpoint_path) + models = [model for model in os.listdir(model_dir) if model_name in model] + + if os.path.splitext(model_path)[-1] != ".zip": + model_path = os.path.join(model_path, "model.zip") + + if not os.path.isdir(os.path.dirname(model_path)): + os.makedirs(os.path.dirname(model_path)) + + with zipfile.ZipFile(model_path, "w") as f: + for model_file in models: + model_file_path = os.path.join(model_dir, model_file) + f.write(model_file_path, model_file, compress_type=zipfile.ZIP_DEFLATED) + + return model_path + + def predict(self, data, input_shape=None, **kwargs): + if data is None: + raise Exception("Predict data is None") + + inference_output_dir = os.getenv("RESULT_SAVED_URL") + + with self.tf_graph.as_default(): + if not self.has_fast_rcnn_predict: + self._fast_rcnn_predict() + self.has_fast_rcnn_predict = True + + restorer = self._get_restorer() + + config = tf.ConfigProto() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + with tf.Session(config=config) as sess: + sess.run(init_op) + + # ... ... + # several lines are omitted here. + + return predict_dict + + def load(self, model_url=None): + if model_url: + model_dir = os.path.split(model_url)[0] + with zipfile.ZipFile(model_url, "r") as f: + f.extractall(path=model_dir) + ckpt_name = os.path.basename(f.namelist()[0]) + index = ckpt_name.find("ckpt") + ckpt_name = ckpt_name[:index + 4] + self.checkpoint_path = os.path.join(model_dir, ckpt_name) + + else: + raise Exception(f"model url is None") + + return self.checkpoint_path + + def evaluate(self, data, model_path, **kwargs): + if data is None or data.x is None or data.y is None: + raise Exception("Prediction data is None") + + self.load(model_path) + predict_dict = self.predict(data.x) + metric_name, metric_func = kwargs.get("metric") + if callable(metric_func): + return {"f1_score": metric_func(data.y, predict_dict)} + else: + raise Exception(f"not found model metric func(name={metric_name}) in model eval phase") +``` + +After registration, you only need to change the name of the basicIL and parameters in the yaml file, and then the corresponding class will be automatically called according to the name. + + + +[FPN]: https://github.com/DetectionTeamUCAS/FPN_Tensorflow +[examples directory]: ../../../../examples +[Sedna repository]: https://github.com/kubeedge/sedna + diff --git a/docs/proposals/algorithms/incremental-learning/incremental_learning.png b/docs/proposals/algorithms/incremental-learning/incremental_learning.png new file mode 100644 index 0000000..0303e24 Binary files /dev/null and b/docs/proposals/algorithms/incremental-learning/incremental_learning.png differ diff --git a/docs/proposals/algorithms/single-task-learning/fpn.md b/docs/proposals/algorithms/single-task-learning/fpn.md new file mode 100644 index 0000000..0ada20a --- /dev/null +++ b/docs/proposals/algorithms/single-task-learning/fpn.md @@ -0,0 +1,171 @@ +# Single task learning: FPN + +Pre-trained model: [Huawei OBS](https://kubeedge.obs.cn-north-1.myhuaweicloud.com:443/ianvs/pcb-aoi/model.zip) + +Single task learning is a traditional learning pooling all data together to train a single model. It typically includes a specialist model laser-focused on a single task and requires large amounts of task-specific labeled data, which is not always available on early stage of a distributed synergy AI project. + +As shown in the following figure, the single task learning works as procedures below: +1. Developer implements and deploys the application based on single task learning. +2. The application runs and launches single task learning. +3. The application uploads samples to the cloud. +4. Labeling service labels the uploaded samples. +5. Training learns the samples to generate a new model. +6. The system updates the model on the edge. +7. The model conducts inference given test samples where the inference result is send to the application which ends the process. + +![](single_task_learning.png) + + + +As for the base model of single task learning, in this report we are using FPN_TensorFlow. It is a tensorflow re-implementation of Feature Pyramid Networks for Object Detection, which is based on Faster-RCNN. More detailedly, feature pyramids are a basic component in recognition systems for detecting objects at different scales. But recent deep learning object detectors have avoided pyramid representations, in part because they are compute and memory intensive. Researchers have exploited the inherent multi-scale, pyramidal hierarchy of deep convolutional networks to construct feature pyramids with marginal extra cost. A top-down architecture with lateral connections is developed for building high-level semantic feature maps at all scales. The architecture, called a Feature Pyramid Network (FPN), shows significant improvement as a generic feature extractor in several applications. Using FPN in a basic Faster R-CNN system, the method achieves state-of-the-art single-model results on the COCO detection benchmark without bells and whistles, surpassing all existing single-task entries including those from the COCO 2016 challenge winners. In addition, FPN can run at 5 FPS on a GPU and thus is a practical and accurate solution to multi-scale object detection. + +The ``FPN_TensorFlow`` is also open sourced and completed by YangXue and YangJirui. For those interested in details of ``FPN_TensorFlow``, an example implementation is available [here](https://github.com/DetectionTeamUCAS/FPN_Tensorflow) and is extended with the Ianvs algorithm inferface [here](https://github.com/kubeedge-sedna/FPN_Tensorflow). + + +## Implementation + +Here we also show how to implement a single task learning algorithm for testing in ianvs, based on an opensource algorithm [FPN]. + +When testing your own algorithm, of course, FPN is not necessary. It can be replaced with any algorithm complying the requirement of ianvs interface. + +Ianvs testing algorithm development, at present, are using Sedna Lib. The following is recommended development workflow: +1. Algorithm Development: put the algorithm implementation to ianvs [examples directory] locally, for testing. +2. Algorithm Submission: submit the algorithm implementation to [Sedna repository], for sharing, then everyone can test and use your algorithm. + +## Customize algorithm + +Sedna provides a class called `class_factory.py` in `common` package, in which only a few lines of changes are required to become a module of sedna. + +Two classes are defined in `class_factory.py`, namely `ClassType` and `ClassFactory`. + +`ClassFactory` can register the modules you want to reuse through decorators. For example, in the following code example, you have customized an **single task learning algorithm**, you only need to add a line of `ClassFactory.register(ClassType.GENERAL)` to complete the registration. + +The following code is just to show the overall structure of a basicIL-fpn BaseModel, not the complete version. The complete code can be found [here](https://github.com/JimmyYang20/ianvs/tree/main/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn). + +```python + +@ClassFactory.register(ClassType.GENERAL, alias="FPN") +class BaseModel: + + def __init__(self, **kwargs): + """ + initialize logging configuration + """ + + self.has_fast_rcnn_predict = False + + self._init_tf_graph() + + self.temp_dir = tempfile.mkdtemp() + if not os.path.isdir(self.temp_dir): + mkdir(self.temp_dir) + + os.environ["MODEL_NAME"] = "model.zip" + cfgs.LR = kwargs.get("learning_rate", 0.0001) + cfgs.MOMENTUM = kwargs.get("momentum", 0.9) + cfgs.MAX_ITERATION = kwargs.get("max_iteration", 5) + + def train(self, train_data, valid_data=None, **kwargs): + + if train_data is None or train_data.x is None or train_data.y is None: + raise Exception("Train data is None.") + + with tf.Graph().as_default(): + + img_name_batch, train_data, gtboxes_and_label_batch, num_objects_batch, data_num = \ + next_batch_for_tasks( + (train_data.x, train_data.y), + dataset_name=cfgs.DATASET_NAME, + batch_size=cfgs.BATCH_SIZE, + shortside_len=cfgs.SHORT_SIDE_LEN, + is_training=True, + save_name="train" + ) + + # ... ... + # several lines are omitted here. + + return self.checkpoint_path + + def save(self, model_path): + if not model_path: + raise Exception("model path is None.") + + model_dir, model_name = os.path.split(self.checkpoint_path) + models = [model for model in os.listdir(model_dir) if model_name in model] + + if os.path.splitext(model_path)[-1] != ".zip": + model_path = os.path.join(model_path, "model.zip") + + if not os.path.isdir(os.path.dirname(model_path)): + os.makedirs(os.path.dirname(model_path)) + + with zipfile.ZipFile(model_path, "w") as f: + for model_file in models: + model_file_path = os.path.join(model_dir, model_file) + f.write(model_file_path, model_file, compress_type=zipfile.ZIP_DEFLATED) + + return model_path + + def predict(self, data, input_shape=None, **kwargs): + if data is None: + raise Exception("Predict data is None") + + inference_output_dir = os.getenv("RESULT_SAVED_URL") + + with self.tf_graph.as_default(): + if not self.has_fast_rcnn_predict: + self._fast_rcnn_predict() + self.has_fast_rcnn_predict = True + + restorer = self._get_restorer() + + config = tf.ConfigProto() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + with tf.Session(config=config) as sess: + sess.run(init_op) + + # ... ... + # several lines are omitted here. + + return predict_dict + + def load(self, model_url=None): + if model_url: + model_dir = os.path.split(model_url)[0] + with zipfile.ZipFile(model_url, "r") as f: + f.extractall(path=model_dir) + ckpt_name = os.path.basename(f.namelist()[0]) + index = ckpt_name.find("ckpt") + ckpt_name = ckpt_name[:index + 4] + self.checkpoint_path = os.path.join(model_dir, ckpt_name) + + else: + raise Exception(f"model url is None") + + return self.checkpoint_path + + def evaluate(self, data, model_path, **kwargs): + if data is None or data.x is None or data.y is None: + raise Exception("Prediction data is None") + + self.load(model_path) + predict_dict = self.predict(data.x) + metric_name, metric_func = kwargs.get("metric") + if callable(metric_func): + return {"f1_score": metric_func(data.y, predict_dict)} + else: + raise Exception(f"not found model metric func(name={metric_name}) in model eval phase") +``` + +After registration, you only need to change the name of the STL and parameters in the yaml file, and then the corresponding class will be automatically called according to the name. + + + +[FPN]: https://github.com/DetectionTeamUCAS/FPN_Tensorflow +[examples directory]: ../../../../examples +[Sedna repository]: https://github.com/kubeedge/sedna \ No newline at end of file diff --git a/docs/proposals/algorithms/single-task-learning/single_task_learning.png b/docs/proposals/algorithms/single-task-learning/single_task_learning.png new file mode 100644 index 0000000..f448391 Binary files /dev/null and b/docs/proposals/algorithms/single-task-learning/single_task_learning.png differ diff --git a/docs/proposals/scenarios/industrial-defect-detection/images/PCB-AoI_example.png b/docs/proposals/scenarios/industrial-defect-detection/images/PCB-AoI_example.png new file mode 100644 index 0000000..9fbea5e Binary files /dev/null and b/docs/proposals/scenarios/industrial-defect-detection/images/PCB-AoI_example.png differ diff --git a/docs/proposals/scenarios/industrial-defect-detection/pcb-aoi.md b/docs/proposals/scenarios/industrial-defect-detection/pcb-aoi.md new file mode 100644 index 0000000..1fba247 --- /dev/null +++ b/docs/proposals/scenarios/industrial-defect-detection/pcb-aoi.md @@ -0,0 +1,48 @@ +# Industrial defect detection: the PCB-AoI dataset + +Download link: [Kaggle](https://www.kaggle.com/datasets/kubeedgeianvs/pcb-aoi), [Huawei OBS](https://kubeedge.obs.cn-north-1.myhuaweicloud.com:443/ianvs/pcb-aoi/dataset.zip) + +## Authors +- China Telcom Research Institute: Dongdong Li, Dan Liu, Yun Shen, Yaqi Song +- Raisecom Technology Co.,ltd.: Liangliang Luo + +## Background +Surface-mount technology (SMT) is a technology that automates electronic circuits production in which components are mounted or placed onto the surface of printed circuit boards. Solder paste printing (SPP) is the most delicate stage in SMT. It prints solder paste on the pads of an electronic circuit panel. Thus, SPP is followed by a solder paste inspection (SPI) stage to detect defects. SPI scans the printed circuit board for missing/less paste, bridging between pads, miss alignments, and so forth. Boards with anomaly must be detected, and boards in good condition should not be disposed of. Thus SPI requires high precision and a high recall. + +The PCB-AoI dataset is a part of the open-source distributed synergy AI benchmarking project KubeEdge-Ianvs. Ianvs is honored to be the ``First`` site that this dataset is released and the Ianvs working group put it on Kaggle as [The PCB-AoI public dataset](https://www.kaggle.com/datasets/kubeedgeianvs/pcb-aoi). It is released by KubeEdge SIG AI members from China Telecom and Raisecom Technology. + +Below shows two example figures in the dataset. + +![](images/PCB-AoI_example.png) + + +## Data Explorer +In this dataset, more than 230 boards are collected and the number of images is enhanced to more than 1200. Detailedly, the dataset include two parts, i.e., the train and the test set. The train set includes 173 boards while the test set includes 60 boards. That is, the train-test ratio is around 3:1 in terms of PCB boards. Data augmentation is conducted, boosting the train-test ratio to 1211:60 (about 20:1) in term of images. Both directories of train_data and test_data include the ``index`` file which recodes the mapping between the raw images and the label of annotation. + +The directories of this dataset is as follows: +``` +├── PCB-AoI Dataset +│   ├── train_data +│   ├── Annotation +│   ├── JPEGImages +│   ├── index.txt +│ ├── test_data +│   ├── Annotation +│   ├── JPEGImages +│   ├── index.txt +│ ├── train_data_augmentation +│   ├── Annotation +│   ├── JPEGImages +│   ├── index.txt +``` + +The following is part of `index.txt`: +```shell +./JPEGImages/20161019-SPI-AOI-1.jpeg ./Annotations/20161019-SPI-AOI-1.xml +./JPEGImages/20161020-SPI-AOI-5.jpeg ./Annotations/20161020-SPI-AOI-5.xml +./JPEGImages/20161021-SPI-AOI-13.jpeg ./Annotations/20161021-SPI-AOI-13.xml +./JPEGImages/20161021-SPI-AOI-14.jpeg ./Annotations/20161021-SPI-AOI-14.xml +./JPEGImages/20161021-SPI-AOI-15.jpeg ./Annotations/20161021-SPI-AOI-15.xml +``` + +Column 1 stands for the file path of the raw image, and column 2 is the file path of corresponding annotation file.In this dataset, the xml annotation follows Pascal VOC XML format. you can find more description of Pascal VOC XML at [here](https://roboflow.com/formats/pascal-voc-xml). diff --git a/docs/proposals/test-reports/images/20170316-SPI-AOI-19.xml b/docs/proposals/test-reports/images/20170316-SPI-AOI-19.xml new file mode 100644 index 0000000..74d8130 --- /dev/null +++ b/docs/proposals/test-reports/images/20170316-SPI-AOI-19.xml @@ -0,0 +1,116 @@ + + UAV_data + 20170316-SPI-AOI-19.jpg + + The UAV autolanding + UAV AutoLanding + flickr + NULL + + + NULL + ChaojieZhu + + + 600 + 600 + 3 + + 0 + + Bad_qiaojiao + Unspecified + 0 + 0 + + 63 + 270 + 102 + 278 + + + + Bad_podu + Unspecified + 0 + 0 + + 39 + 279 + 72 + 290 + + + + Bad_qiaojiao + Unspecified + 0 + 0 + + 61 + 416 + 103 + 424 + + + + Bad_qiaojiao + Unspecified + 0 + 0 + + 61 + 439 + 100 + 447 + + + + Bad_qiaojiao + Unspecified + 0 + 0 + + 65 + 488 + 105 + 496 + + + + Bad_qiaojiao + Unspecified + 0 + 0 + + 61 + 468 + 99 + 476 + + + + Bad_podu + Unspecified + 0 + 0 + + 42 + 476 + 72 + 488 + + + + Bad_podu + Unspecified + 0 + 0 + + 42 + 427 + 71 + 437 + + + \ No newline at end of file diff --git a/docs/proposals/test-reports/images/PCB-AoI_example.png b/docs/proposals/test-reports/images/PCB-AoI_example.png new file mode 100644 index 0000000..9fbea5e Binary files /dev/null and b/docs/proposals/test-reports/images/PCB-AoI_example.png differ diff --git a/docs/proposals/test-reports/images/after_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg b/docs/proposals/test-reports/images/after_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg new file mode 100644 index 0000000..4d84b69 Binary files /dev/null and b/docs/proposals/test-reports/images/after_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg differ diff --git a/docs/proposals/test-reports/images/before_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg b/docs/proposals/test-reports/images/before_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg new file mode 100644 index 0000000..7b136f9 Binary files /dev/null and b/docs/proposals/test-reports/images/before_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg differ diff --git a/docs/proposals/test-reports/testing-incremental-learning-in-industrial-defect-detection-with-pcb-aoi.md b/docs/proposals/test-reports/testing-incremental-learning-in-industrial-defect-detection-with-pcb-aoi.md new file mode 100644 index 0000000..c144428 --- /dev/null +++ b/docs/proposals/test-reports/testing-incremental-learning-in-industrial-defect-detection-with-pcb-aoi.md @@ -0,0 +1,157 @@ +# Testing incremental learning in industrial defect detection + +## About Industrial Defect Detection + +In recent years, the manufacturing process is moving towards a higher degree of automation and improved manufacturing efficiency. During this development, smart manufacturing increasingly employs computing technologies, for example, with a higher degree of automation, there is also a higher risk in product defects; thus, a number of machine learning models have been developed to detect defectives in the manufacturing process. + +Defects are an unwanted thing in manufacturing industry. There are many types of defect in manufacturing like blow holes, pinholes, burr, shrinkage defects, mould material defects, pouring metal defects, metallurgical defects, etc. For removing this defective product all industry have their defect detection department. But the main problem is this inspection process is carried out manually. It is a very time-consuming process and due to human accuracy, this is not 100\% accurate. This can because of the rejection of the whole order. So it creates a big loss in the company. + +## About Dataset + +The printed circuit board (PCB) industry is not different. Surface-mount technology (SMT) is a technology that automates PCB production in which components are mounted or placed onto the surface of printed circuit boards. Solder paste printing (SPP) is the most delicate stage in SMT. It prints solder paste on the pads of an electronic circuit panel. Thus, SPP is followed by a solder paste inspection (SPI) stage to detect defects. SPI scans the printed circuit board for missing/less paste, bridging between pads, miss alignments, and so forth. Boards with anomaly must be detected, and boards in good condition should not be disposed of. Thus SPI requires high precision and a high recall. + +As an example in this document, we are using [the PCB-AoI dataset](https://www.kaggle.com/datasets/kubeedgeianvs/pcb-aoi) released by KubeEdge SIG AI members on Kaggle. See [this link](../scenarios/industrial-defect-detection/pcb-aoi.md) for more information of this dataset. Below also shows two example figures in the dataset. + +![](images/PCB-AoI_example.png) + + +## About Incremental Learning +Traditionally, the data is collected manually and periodically retrained on the cloud to improve the model effect. However, data is continuously generated on the edge side. Traditional method wastes a lot of human resources, and the model update frequency is slow. + +Incremental learning allows users to continuously monitor the newly generated data and by configuring some triggering rules to determine whether to start training, evaluation, and deployment automatically, and continuously improve the model performance. + +Its goals include: +* Automatically retrains, evaluates, and updates models based on the data generated at the edge. +* Support time trigger, sample size trigger, and precision-based trigger. +* Support manual triggering of training, evaluation, and model update. +* Support hard example discovering of unlabeled data, for reducing the manual labeling workload. + +This report is testing the basic incremental algorithm based on FPN and interested readers can refer to [the basicIL-fpn](../algorithms/incremental-learning/basicIL-fpn.md) for more details. + +## Benchmark Setting + +Key settings of the test environment to incremental learning are as follows: + +``` yaml +# testenv.yaml +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # model eval configuration of incremental learning; + model_eval: + # metric used for model evaluation + model_metric: + # metric name; string type; + name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + + # condition of triggering inference model to update + # threshold of the condition; types are float/int + threshold: 0.01 + # operator of the condition; string type; + # values are ">=", ">", "<=", "<" and "="; + operator: ">=" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + - name: "samples_transfer_ratio" + + # incremental rounds setting for incremental learning paradigm.; int type; default value is 2; + incremental_rounds: 2 +``` + +Key settings of the algorithm to incremental learning are as follows: + +```yaml +# algorithm.yaml +algorithm: + # paradigm type; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "incrementallearning" + incremental_learning_data_setting: + # ratio of training dataset; float type. + # the default value is 0.8. + train_ratio: 0.8 + # the method of splitting dataset; string type; optional; + # currently the options of value are as follows: + # 1> "default": the dataset is evenly divided based train_ratio; + splitting_method: "default" + # the url address of initial model for model pre-training; string url; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + # type of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel": contains important interfaces such as train eval predict and more; required module; + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + # hyperparameters configuration files; dictionary type; + - other_hyperparameters: + # the url addresses of hyperparameters configuration files; list type; + # type of the value is string; + values: + - "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_hyperparameter.yaml" + # 2> "hard_example_mining": check hard example when predict ; optional module; + - type: "hard_example_mining" + # name of python module; string type; + name: "IBT" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py" + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + # threshold of image; value is [0, 1] + - threshold_img: + values: + - 0.9 + # predict box of image; value is [0, 1] + - threshold_box: + values: + - 0.9 + +``` + +## Benchmark Result + +We release the +leaderboard [here](../leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-incremental-learning.md) +. + +## Effect Display + +The pcb image has 8 bad defects. See [label file](./images/20170316-SPI-AOI-19.xml) for details. + +* Before incremental learning, `7` the bad defects have been detected. + +![](./images/before_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg) + +* After incremental learning, `8` the bad defects have been detected. + +![](./images/after_increment_20170316-SPI-AOI-19.jpeg_fpn.jpg) \ No newline at end of file diff --git a/docs/proposals/test-reports/testing-single-task-learning-in-industrial-defect-detection-with-pcb-aoi.md b/docs/proposals/test-reports/testing-single-task-learning-in-industrial-defect-detection-with-pcb-aoi.md new file mode 100644 index 0000000..719e2bf --- /dev/null +++ b/docs/proposals/test-reports/testing-single-task-learning-in-industrial-defect-detection-with-pcb-aoi.md @@ -0,0 +1,90 @@ +# Testing single task learning in industrial defect detection + +## About Industrial Defect Detection + +In recent years, the manufacturing process is moving towards a higher degree of automation and improved manufacturing efficiency. During this development, smart manufacturing increasingly employs computing technologies, for example, with a higher degree of automation, there is also a higher risk in product defects; thus, a number of machine learning models have been developed to detect defectives in the manufacturing process. + +Defects are an unwanted thing in manufacturing industry. There are many types of defect in manufacturing like blow holes, pinholes, burr, shrinkage defects, mould material defects, pouring metal defects, metallurgical defects, etc. For removing this defective product all industry have their defect detection department. But the main problem is this inspection process is carried out manually. It is a very time-consuming process and due to human accuracy, this is not 100\% accurate. This can because of the rejection of the whole order. So it creates a big loss in the company. + + +## About Dataset + +The printed circuit board (PCB) industry is not different. Surface-mount technology (SMT) is a technology that automates PCB production in which components are mounted or placed onto the surface of printed circuit boards. Solder paste printing (SPP) is the most delicate stage in SMT. It prints solder paste on the pads of an electronic circuit panel. Thus, SPP is followed by a solder paste inspection (SPI) stage to detect defects. SPI scans the printed circuit board for missing/less paste, bridging between pads, miss alignments, and so forth. Boards with anomaly must be detected, and boards in good condition should not be disposed of. Thus SPI requires high precision and a high recall. + +As an example in this document, we are using [the PCB-AoI dataset](https://www.kaggle.com/datasets/kubeedgeianvs/pcb-aoi) released by KubeEdge SIG AI members on Kaggle. See [this link](../scenarios/industrial-defect-detection/pcb-aoi.md) for more information of this dataset. Below also shows two example figures in the dataset. + +![](images/PCB-AoI_example.png) + +## About Single Task Learning +Single task learning is a traditional learning pooling all data together to train a single model. It typically includes a specialist model laser-focused on a single task and requires large amounts of task-specific labeled data, which is not always available on the early stage of a distributed synergy AI project. + +This report is testing the single task learning algorithm based on ``FPN_TensorFlow``. It is a tensorflow re-implementation of Feature Pyramid Networks for Object Detection, which is based on Faster-RCNN. More detailedly, feature pyramids are a basic component in recognition systems for detecting objects at different scales. But recent deep learning object detectors have avoided pyramid representations, in part because they are compute and memory intensive. Researchers have exploited the inherent multi-scale, pyramidal hierarchy of deep convolutional networks to construct feature pyramids with marginal extra cost. A top-down architecture with lateral connections is developed for building high-level semantic feature maps at all scales. The architecture, called a Feature Pyramid Network (FPN), shows significant improvement as a generic feature extractor in several applications. Using FPN in a basic Faster R-CNN system, the method achieves state-of-the-art single-model results on the COCO detection benchmark without bells and whistles, surpassing all existing single-task entries including those from the COCO 2016 challenge winners. In addition, FPN can run at 5 FPS on a GPU and thus is a practical and accurate solution to multi-scale object detection. The ``FPN_TensorFlow`` is also open sourced and completed by YangXue and YangJirui. For those interested in details of ``FPN_TensorFlow``, an example implementation is available [here](https://github.com/DetectionTeamUCAS/FPN_Tensorflow) and is extended with the Ianvs algorithm inferface [here](https://github.com/kubeedge-sedna/FPN_Tensorflow). Interested readers can refer to [the FPN](../algorithms/single-task-learning/fpn.md) for more details. + +## Benchmark Setting +Key settings of the test environment to single task learning are as follows: +``` yaml +# testenv.yaml +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/singletask_learning_bench/testenv/f1_score.py" +``` + +Key settings of the algorithm to single learning are as follows: + +```yaml +# algorithm.yaml +algorithm: + # paradigm type; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "singletasklearning" + # the url address of initial model; string type; optional; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + # kind of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel" + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + # hyperparameters configuration files; dictionary type; + - other_hyperparameters: + # the url addresses of hyperparameters configuration files; list type; + # type of the value is string; + values: + - "./examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/fpn_hyperparameter.yaml" + +``` + +## Benchmark Result + +We release the +leaderboard [here](../leaderboards/leaderboard-in-industrial-defect-detection-of-PCB-AoI/leaderboard-of-single-task-learning.md) +. \ No newline at end of file diff --git a/docs/related-link.md b/docs/related-link.md new file mode 100644 index 0000000..d516aa1 --- /dev/null +++ b/docs/related-link.md @@ -0,0 +1,15 @@ + +### Release +[边缘智能还能怎么玩?KubeEdge AI SIG 带你飞](https://mp.weixin.qq.com/s/t10_ZrZW42AZoYnisVAbpg) + + +### Meetup and Conference +[HDC.Cloud 2021: 边云协同,打通AI最后一公里](https://xie.infoq.cn/article/b22e72afe8de50ca34269bb21) +[KubeEdge Sedna如何实现边缘AI模型精度提升50%](https://www.huaweicloud.com/zhishi/hdc2021-Track-24-18.html) + + +### Distributed Synergy AI Toolkit: Sedna +[Sedna0.4.0发布,支持表征提取联邦学习,减少边侧资源需求](https://mp.weixin.qq.com/s/_m5q0t0yYY7gnfQUAssjFg) +[支持边云协同终身学习特性,KubeEdge子项目Sedna 0.3.0版本发布!](https://mp.weixin.qq.com/s/kSFL_pf2BTyVvH5c9zv0Jg) +[体验边云协同AI框架!KubeEdge子项目Sedna 0.1版本发布](https://mp.weixin.qq.com/s/3Ei8ynSAxnfuoIWYdb7Gpg) +[加速AI边云协同创新!KubeEdge社区建立Sedna子项目](https://mp.weixin.qq.com/s/FX2DOsctS_Z7CKHndFByRw) \ No newline at end of file diff --git a/docs/roadmap.md b/docs/roadmap.md new file mode 100644 index 0000000..ed03586 --- /dev/null +++ b/docs/roadmap.md @@ -0,0 +1,31 @@ +# Roadmap + +Upon the release of ianvs, the roadmap would be as follows +- AUG 2022: Release Another Use Case and Advanced Algorithm Paradigm - Non-structured lifelong learning paradigm in ianvs +- SEP 2022: Release Another Use Case, Dataset, and Algorithm Paradigm - Another structured dataset and lifelong learning paradigm in ianvs +- OCT 2022: Release Advanced Benchmark Presentation - shared space for story manager to present your work in public +- NOV 2022: Release Advanced Algorithm Paradigm - Re-ID with Multi-edge Synergy Inference in ianvs +- DEC 2022: Release Simulation Tools +- JUN 2023: More datasets, algorithms, and test cases with ianvs +- DEC 2023: Standards, coding events, and competitions with ianvs + + + +# Ianvs v0.1.0 release +## 1. Release the Ianvs distributed synergy AI benchmarking framework. + a) Release test environment management and configuration. + b) Release test case management and configuration. + c) Release test story management and configuration. + d) Release the open-source test case generation tool: Use hyperparameter enumeration to fill in one configuration file to generate multiple test cases. + +## 2. Release the PCB-AoI public dataset. +Release the PCB-AoI public dataset, its corresponding preprocessing, and baseline algorithm projects. +Ianvs is the first open-source site for that dataset. + +## 3. Support two new paradigms in test environments and test cases. + a) Test environments and test cases that support the single-task learning paradigm. + b) Test environments and test cases that support the incremental learning paradigm. + +## 4. Release PCB-AoI benchmark cases based on the two new paradigms. + a) Release PCB-AoI benchmark cases based on single-task learning, including leaderboards and test reports. + b) Release PCB-AoI benchmark cases based on incremental learning, including leaderboards and test reports. diff --git a/docs/user_interface/how-to-config-algorithm.md b/docs/user_interface/how-to-config-algorithm.md new file mode 100644 index 0000000..34e4a48 --- /dev/null +++ b/docs/user_interface/how-to-config-algorithm.md @@ -0,0 +1,187 @@ +# How to config algorithm + +The algorithm developer is able to test his/her own targeted algorithm and configs the algorithm using the following +configuration. + +## The configuration of algorithm + +| Property | Required | Description | +|----------|----------|-------------| +|paradigm_type|yes|Paradigm name; Type: string; Value Constraint: Currently the options of value are as follows: 1> singletasklearning 2> incrementallearning| +|incremental_learning_data_setting|no|Data setting for incremental learning paradigm.[the configuration of incremental_learning_data_setting](#id1)| +|initial_model_url|no|The url address of initial model for model pre-training; Type: string| +|modules|yes|The algorithm modules for paradigm; Type: list; Value Constraint: the list of [the configuration of module](#id2)| + +For example: + +```yaml +algorithm: + # paradigm type; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "incrementallearning" + incremental_learning_data_setting: + ... + # the url address of initial model for model pre-training; string url; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + ... +``` + +### The configuration of incremental_learning_data_setting + +| Property | Required | Description | +|----------|----------|-------------| +|train_ratio|no|Ratio of training dataset; Type: float; Default value: 0.8; Value Constraint: the value is greater than 0 and less than 1.| +|splitting_method|no|The method of splitting dataset; Type: string; Default value: default; Value Constraint: Currently the options of value are as follows: 1> default: the dataset is evenly divided based train_ratio. + +For example: + +```yaml +incremental_learning_data_setting: + # ratio of training dataset; float type; + # the default value is 0.8. + train_ratio: 0.8 + # the method of splitting dataset; string type; optional; + # currently the options of value are as follows: + # 1> "default": the dataset is evenly divided based train_ratio; + splitting_method: "default" +``` + +### The configuration of module + +| Property | Required | Description | +|----------|----------|-------------| +|type|yes|Algorithm module type; Type: string; Value Constraint: Currently the options of value are as follows: 1> basemodel: the algorithm module contains important interfaces such as train, eval, predict and more.it's required module. 2> hard_example_mining: the module checks hard example when predict. it's optional module and often used for incremental learning paradigm. | +|name|yes|Algorithm module name; Type: string; Value Constraint: a python module name| +|url|yes|The url address of python module file; Type: string | +|hyperparameters|no|[the configuration of hyperparameters](#id3)| + +For example: + +```yaml +# algorithm module configuration in the paradigm; list type; +modules: + # type of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel": contains important interfaces such as train、 eval、 predict and more; required module; + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + ... + # 2> "hard_example_mining": check hard example when predict ; optional module; + - type: "hard_example_mining" + # name of python module; string type; + name: "IBT" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py" + # hyperparameters configuration for the python module; list type; + hyperparameters: + ... +``` + +### The configuration of hyperparameters + +The following is an example of hyperparameters configuration: + +```yaml +# hyperparameters configuration for the python module; list type; +hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + - learning_rate: + values: + - 0.1 + - 0.2 +``` + +Ianvs will test for all the hyperparameter combination, that means it will run all the following 4 test: + +| Num | learning_rate | momentum | +|------|---------------|----------| +| 1 | 0.1 | 0.95 | +| 2 | 0.1 | 0.5 | +| 3 | 0.2 | 0.95 | +| 4 | 0.2 | 0.5 | + +Currently, Ianvs is not restricted to validity of the hyperparameter combination. That might lead to some invalid +parameter combination, and it is controlled by the user himself. In the further version of Ianvs, it will support +excluding invalid parameter combinations to improve efficiency. + +## Show example + +```yaml +# fpn_algorithm.yaml +algorithm: + # paradigm type; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "incrementallearning" + incremental_learning_data_setting: + # ratio of training dataset; float type; + # the default value is 0.8. + train_ratio: 0.8 + # the method of splitting dataset; string type; optional; + # currently the options of value are as follows: + # 1> "default": the dataset is evenly divided based train_ratio; + splitting_method: "default" + # the url address of initial model for model pre-training; string url; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + # type of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel": contains important interfaces such as train、 eval、 predict and more; required module; + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + - learning_rate: + values: + - 0.1 + # 2> "hard_example_mining": check hard example when predict ; optional module; + - type: "hard_example_mining" + # name of python module; string type; + name: "IBT" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py" + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + # threshold of image; value is [0, 1] + - threshold_img: + values: + - 0.9 + # predict box of image; value is [0, 1] + - threshold_box: + values: + - 0.9 +``` \ No newline at end of file diff --git a/docs/user_interface/how-to-config-benchmarkingjob.md b/docs/user_interface/how-to-config-benchmarkingjob.md new file mode 100644 index 0000000..e1264ee --- /dev/null +++ b/docs/user_interface/how-to-config-benchmarkingjob.md @@ -0,0 +1,231 @@ +# How to config benchmarkingjob + +The algorithm developer is able to test his/her own targeted algorithm using the following configuration information. + +## The configuration of benchmarkingjob + +| Property | Required | Description | +|----------|----------|-------------| +|name|yes|Job name of benchmarking; Type: string| +|workspace|no|The url address of job workspace that will reserve the output of tests; Type: string; Default value: `./workspace`| +|testenv|yes|The url address of test environment configuration file; Type: string; Value Constraint: The file format supports yaml/yml.| +|test_object|yes|[The configuration of test_object](#id1)| +|rank|yes|[The configuration of ranking leaderboard](#id2)| + +For example: + +```yaml +benchmarkingjob: + # job name of benchmarking; string type; + name: "benchmarkingjob" + # the url address of job workspace that will reserve the output of tests; string type; + # default value: "./workspace" + workspace: "/ianvs/incremental_learning_bench/workspace" + + # the url address of test environment configuration file; string type; + # the file format supports yaml/yml; + testenv: "./examples/pcb-aoi/incremental_learning_bench/testenv/testenv.yaml" + # the configuration of test object + test_object: + ... + # the configuration of ranking leaderboard + rank: + ... +``` + +### The configuration of test_object + +| Property | Required | Description | +|----------|----------|-------------| +|type|yes|Type of test object; Type: string; Value Constraint: Currently the option of value is "algorithms",the others will be added in succession.| +|algorithms|no|[Test algorithm configuration](#id2); Type: list| + +For example: + +```yaml +# the configuration of test object +test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + ... +``` + +### The configuration of algorithms + +| Property | Required | Description | +|----------|----------|-------------| +|name|yes|Algorithm name; Type: string| +|url|yes|The url address of test algorithm configuration file; Type: string; Value Constraint: The file format supports yaml/yml.| + +For example: + +```yaml +# test algorithm configuration files; list type; +algorithms: + # algorithm name; string type; + - name: "fpn_incremental_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml" +``` + +### The configuration of rank + +| Property | Required | Description | +|----------|----------|-------------| +|sort_by|yes|Rank leaderboard with metric of test case's evaluation and order; Type: list; Value Constraint: The sorting priority is based on the sequence of metrics in the list from front to back.| +|visualization|yes|[The configuration of visualization](#id3)| +|selected_dataitem|yes|[The configuration of selected_dataitem](#id4); The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", so that the selected columns will be shown.| +|save_mode|yes|save mode of selected and all dataitems in workspace `./rank`; Type: string; Value Constraint: Currently the options of value are as follows: 1> "selected_and_all": save selected and all dataitems. 2> "selected_only": save selected dataitems.| + +For example: + +```yaml +# the configuration of ranking leaderboard +rank: + # rank leaderboard with metric of test case's evaluation and order ; list type; + # the sorting priority is based on the sequence of metrics in the list from front to back; + sort_by: [ { "f1_score": "descend" }, { "samples_transfer_ratio": "ascend" } ] + # visualization configuration + visualization: + ... + # selected dataitem configuration + # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", + # so that the selected columns will be shown. + selected_dataitem: + ... + # save mode of selected and all dataitems in workspace `./rank` ; string type; + # currently the options of value are as follows: + # 1> "selected_and_all": save selected and all dataitems; + # 2> "selected_only": save selected dataitems; + save_mode: "selected_and_all" +``` + +### The configuration of visualization + +| Property | Required | Description | +|----------|----------|-------------| +|mode|no|Mode of visualization in the leaderboard. There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen; Type: string; Default value: selected_only| +|method|no|Method of visualization for selected dataitems; Type: string; Value Constraint: Currently the options of value are as follows: 1> "print_table": print selected dataitems.| + +For example: + +```yaml +# visualization configuration +visualization: +# mode of visualization in the leaderboard; string type; +# There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. +# In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. +mode: "selected_only" +# method of visualization for selected dataitems; string type; +# currently the options of value are as follows: +# 1> "print_table": print selected dataitems; +method: "print_table" +``` + +### The configuration of selected_dataitem + +| Property | Required | Description | +|----------|----------|-------------| +|paradigms|yes|Select paradigms in the leaderboard; Type: list; Default value: ["all"]; Value Constraint: Currently the options of value are as follows: 1> "all": select all paradigms in the leaderboard. 2> paradigms in the leaderboard, e.g., "singletasklearning".| +|modules|yes|Select modules in the leaderboard; Type: list; Default value: ["all"]; Value Constraint: Currently the options of value are as follows: 1> "all": select all hyperparameters in the leaderboard. 2> hyperparameters in the leaderboard, e.g., "momentum".| +|hyperparameters|yes|Select hyperparameters in the leaderboard; Type: list; Default value: ["all"]; Value Constraint: Currently the options of value are as follows: 1> "all": select all hyperparameters in the leaderboard. 2> hyperparameters in the leaderboard, e.g., "momentum".| +|metrics|yes|Select metrics in the leaderboard; Type: list; Default value: ["all"]; Value Constraint: Currently the options of value are as follows: 1> "all": select all metrics in the leaderboard. 2> metrics in the leaderboard, e.g., "f1_score".| + +```yaml +# selected dataitem configuration +# The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", +# so that the selected columns will be shown. +selected_dataitem: + # currently the options of value are as follows: + # 1> "all": select all paradigms in the leaderboard; + # 2> paradigms in the leaderboard, e.g., "singletasklearning" + paradigms: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all modules in the leaderboard; + # 2> modules in the leaderboard, e.g., "basemodel" + modules: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all hyperparameters in the leaderboard; + # 2> hyperparameters in the leaderboard, e.g., "momentum" + hyperparameters: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all metrics in the leaderboard; + # 2> metrics in the leaderboard, e.g., "F1_SCORE" + metrics: [ "f1_score", "samples_transfer_ratio" ] +``` + +## Show the example + +```yaml +benchmarkingjob: + # job name of benchmarking; string type; + name: "benchmarkingjob" + # the url address of job workspace that will reserve the output of tests; string type; + # default value: "./workspace" + workspace: "/ianvs/incremental_learning_bench/workspace" + + # the url address of test environment configuration file; string type; + # the file format supports yaml/yml; + testenv: "./examples/pcb-aoi/incremental_learning_bench/testenv/testenv.yaml" + + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "fpn_incremental_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml" + + # the configuration of ranking leaderboard + rank: + # rank leaderboard with metric of test case's evaluation and order ; list type; + # the sorting priority is based on the sequence of metrics in the list from front to back; + sort_by: [ { "f1_score": "descend" }, { "samples_transfer_ratio": "ascend" } ] + + # visualization configuration + visualization: + # mode of visualization in the leaderboard; string type; + # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. + # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. + mode: "selected_only" + # method of visualization for selected dataitems; string type; + # currently the options of value are as follows: + # 1> "print_table": print selected dataitems; + method: "print_table" + + # selected dataitem configuration + # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", + # so that the selected columns will be shown. + selected_dataitem: + # currently the options of value are as follows: + # 1> "all": select all paradigms in the leaderboard; + # 2> paradigms in the leaderboard, e.g., "singletasklearning" + paradigms: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all modules in the leaderboard; + # 2> modules in the leaderboard, e.g., "basemodel" + modules: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all hyperparameters in the leaderboard; + # 2> hyperparameters in the leaderboard, e.g., "momentum" + hyperparameters: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all metrics in the leaderboard; + # 2> metrics in the leaderboard, e.g., "f1_score" + metrics: [ "f1_score", "samples_transfer_ratio" ] + + # save mode of selected and all dataitems in workspace `./rank` ; string type; + # currently the options of value are as follows: + # 1> "selected_and_all": save selected and all dataitems; + # 2> "selected_only": save selected dataitems; + save_mode: "selected_and_all" +``` \ No newline at end of file diff --git a/docs/user_interface/how-to-config-testenv.md b/docs/user_interface/how-to-config-testenv.md new file mode 100644 index 0000000..bd59d81 --- /dev/null +++ b/docs/user_interface/how-to-config-testenv.md @@ -0,0 +1,136 @@ +# How to config testenv + +The algorithm developer is able to test his/her own targeted algorithm, he/she should prepare the test environment. +how to config test environment, please to refer to the following configuration information. + +## The configuration of testenv + +| Property | Required | Description | +|----------|----------|-------------| +|dataset|yes|[The configuration of dataset](#id1)| +|model_eval|no|[The configuration of model_eval](#id2) +|metrics|yes|The metrics used for test case's evaluation; Type: list; Value Constraint: the list of [the configuration of metric](#id3).| +|incremental_rounds|no|Incremental rounds setting for incremental learning paradigm; Type: int; Default value: 2; Value Constraint: the value must be not less than 2. | + +For example: + +```yaml +testenv: + # dataset configuration + dataset: + ... + # model eval configuration of incremental learning; + model_eval: + ... + # metrics configuration for test case's evaluation; list type; + metrics: + ... + # incremental rounds setting for incremental learning paradigm; int type; default value is 2; + # the value must be not less than 2; + incremental_rounds: 2 +``` + +### The configuration of dataset + +| Property | Required | Description | +|----------|----------|-------------| +|train_url|yes|The url address of train dataset index; Type: string| +|test_url|yes|The url address of test dataset index; Type: string| + +For example: + +```yaml +# dataset configuration +dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" +``` + +### The configuration of model_eval + +| Property | Required | Description | +|----------|----------|-------------| +|model_metric|yes|The Metric used for model evaluation; [The configuration of metric](#id4).| +|threshold|yes|Threshold of condition for triggering inference model to update; Type: float/int| +|operator|yes|Operator of condition for triggering inference model to update; Type: string; Value Constraint: the values are ">=", ">", "<=", "<" and "=".| + +For example: + +```yaml +# model eval configuration of incremental learning; +model_eval: + # metric used for model evaluation + model_metric: + ... + # condition of triggering inference model to update + # threshold of the condition; types are float/int + threshold: 0.01 + # operator of the condition; string type; + # values are ">=", ">", "<=", "<" and "="; + operator: ">=" +``` + +### The configuration of metric + +| Property | Required | Description | +|----------|----------|-------------| +|name|yes|Metric name; Type: string; Value Constraint: a python module name| +|url|no|The url address of python module file; Type: string.| + +For example: + +```yaml +# metric used for model evaluation +model_metric: + # metric name; string type; + name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" +``` + +## Show example + +```yaml +# testenv.yaml +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # model eval configuration of incremental learning; + model_eval: + # metric used for model evaluation + model_metric: + # metric name; string type; + name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + + # condition of triggering inference model to update + # threshold of the condition; types are float/int + threshold: 0.01 + # operator of the condition; string type; + # values are ">=", ">", "<=", "<" and "="; + operator: ">=" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + - name: "samples_transfer_ratio" + + # incremental rounds setting for incremental learning paradigm; int type; default value is 2; + incremental_rounds: 2 +``` + + + + + diff --git a/docs/user_interface/how-to-use-ianvs-command-line.md b/docs/user_interface/how-to-use-ianvs-command-line.md new file mode 100644 index 0000000..77c208a --- /dev/null +++ b/docs/user_interface/how-to-use-ianvs-command-line.md @@ -0,0 +1,52 @@ +# How to use Ianvs command line + +### List available commands + +Command line: `ianvs -h` +For example: + +```shell +$ ianvs -h +usage: ianvs [-h] [-f [BENCHMARKING_CONFIG_FILE]] [-v] + +AI Benchmarking Tool + +optional arguments: + -h, --help show this help message and exit + -f [BENCHMARKING_CONFIG_FILE], --benchmarking_config_file [BENCHMARKING_CONFIG_FILE] + run a benchmarking job, and the benchmarking config + file must be yaml/yml file. + -v, --version show program version info and exit. + +``` + +### Show the version of ianvs + +Command line: `ianvs -v` +For example: + +```shell +$ ianvs -v +0.1.0 +``` + +### Run a benchmarking job + +Command line: `ianvs -f [BENCHMARKING_CONFIG_FILE]` +For example: + +```yaml +ianvs -f examples/pcb-aoi/singletask_learning_bench/benchmarkingjob.yaml +``` + +The final output might look like: + +|rank |algorithm |f1_score |paradigm |basemodel |learning_rate |momentum |time |url | +|:----:|:-----------------------:|:--------:|:------------------:|:---------:|:-------------:|:--------:|:------------------------|:-------------------------------------------------------------------------------------------------------------------------------| +|1 |fpn_singletask_learning | 0.8396 |singletasklearning | FPN | 0.1 | 0.5 | 2022-07-07 20:33:53 |/ianvs/pcb-aoi/singletask_learning_bench/workspace/benchmarkingjob/fpn_singletask_learning/49eb5ffd-fdf0-11ec-8d5d-fa163eaa99d5 | +|2 |fpn_singletask_learning | 0.8353 |singletasklearning | FPN | 0.1 | 0.95 | 2022-07-07 20:31:08 |/ianvs/pcb-aoi/singletask_learning_bench/workspace/benchmarkingjob/fpn_singletask_learning/49eb5ffc-fdf0-11ec-8d5d-fa163eaa99d5 | + +Refer to [details of example]. + +[details of example]: ../guides/quick-start.md + diff --git a/examples/pcb-aoi/README.md b/examples/pcb-aoi/README.md new file mode 100644 index 0000000..e69de29 diff --git a/examples/pcb-aoi/incremental_learning_bench/benchmarkingjob.yaml b/examples/pcb-aoi/incremental_learning_bench/benchmarkingjob.yaml new file mode 100644 index 0000000..d9ff2d5 --- /dev/null +++ b/examples/pcb-aoi/incremental_learning_bench/benchmarkingjob.yaml @@ -0,0 +1,72 @@ +benchmarkingjob: + # job name of bechmarking; string type; + name: "benchmarkingjob" + # the url address of job workspace that will reserve the output of tests; string type; + workspace: "/ianvs/incremental_learning_bench/workspace" + + # the url address of test environment configuration file; string type; + # the file format supports yaml/yml; + testenv: "./examples/pcb-aoi/incremental_learning_bench/testenv/testenv.yaml" + + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "fpn_incremental_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml" + + # the configuration of ranking leaderboard + rank: + # rank leaderboard with metric of test case's evaluation and order ; list type; + # the sorting priority is based on the sequence of metrics in the list from front to back; + sort_by: [ { "f1_score": "descend" }, { "samples_transfer_ratio": "ascend" } ] + + # visualization configuration + visualization: + # mode of visualization in the leaderboard; string type; + # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. + # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. + mode: "selected_only" + # method of visualization for selected dataitems; string type; + # currently the options of value are as follows: + # 1> "print_table": print selected dataitems; + method: "print_table" + + # selected dataitem configuration + # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", + # so that the selected columns will be shown. + selected_dataitem: + # currently the options of value are as follows: + # 1> "all": select all paradigms in the leaderboard; + # 2> paradigms in the leaderboard, e.g., "singletasklearning" + paradigms: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all modules in the leaderboard; + # 2> modules in the leaderboard, e.g., "basemodel" + modules: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all hyperparameters in the leaderboard; + # 2> hyperparameters in the leaderboard, e.g., "momentum" + hyperparameters: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all metrics in the leaderboard; + # 2> metrics in the leaderboard, e.g., "F1_SCORE" + metrics: [ "f1_score", "samples_transfer_ratio" ] + + # model of save selected and all dataitems in workspace `./rank` ; string type; + # currently the options of value are as follows: + # 1> "selected_and_all": save selected and all dataitems; + # 2> "selected_only": save selected dataitems; + save_mode: "selected_and_all" + + + + + + diff --git a/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py b/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py new file mode 100644 index 0000000..13e2c8c --- /dev/null +++ b/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py @@ -0,0 +1,503 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import os +import tempfile +import time +import zipfile +import cv2 +import logging + +import numpy as np +import tensorflow as tf +import tensorflow.contrib.slim as slim +from sedna.common.config import Context +from sedna.common.class_factory import ClassType, ClassFactory +from FPN_TensorFlow.help_utils.help_utils import draw_box_cv +from FPN_TensorFlow.libs.label_name_dict.label_dict import NAME_LABEL_MAP +from FPN_TensorFlow.data.io.read_tfrecord import next_batch_for_tasks, convert_labels +from FPN_TensorFlow.data.io import image_preprocess +from FPN_TensorFlow.help_utils.tools import mkdir, view_bar, get_single_label_dict, single_label_eval +from FPN_TensorFlow.libs.configs import cfgs +from FPN_TensorFlow.libs.box_utils.show_box_in_tensor import draw_box_with_color, draw_boxes_with_categories +from FPN_TensorFlow.libs.fast_rcnn import build_fast_rcnn +from FPN_TensorFlow.libs.networks.network_factory import get_flags_byname, get_network_byname +from FPN_TensorFlow.libs.rpn import build_rpn + +FLAGS = get_flags_byname(cfgs.NET_NAME) + +# avoid the conflict: 1. tf parses flags with sys.argv; 2. test system also parses flags . +tf.flags.DEFINE_string("benchmarking_config_file", "", "ignore") + +# close global warning log +# reason: during the running of tensorflow, a large number of warning logs will be printed +# and these will submerge some important logs and increase inference latency. +# After disable the global warning job, that will not affect the running of application. +# if you want to open the global warning log, please comment(e.g: #) the statement. +# todo: 1. disable the local warning log instead of the global warning log. +# e.g.: only to disable tensorflow warning log. + +logging.disable(logging.WARNING) + +__all__ = ["BaseModel"] + +# set backend +os.environ['BACKEND_TYPE'] = 'TENSORFLOW' + + +@ClassFactory.register(ClassType.GENERAL, alias="FPN") +class BaseModel: + + def __init__(self, **kwargs): + """ + initialize logging configuration + """ + + self.has_fast_rcnn_predict = False + + self._init_tf_graph() + + self.temp_dir = tempfile.mkdtemp() + if not os.path.isdir(self.temp_dir): + mkdir(self.temp_dir) + + os.environ["MODEL_NAME"] = "model.zip" + cfgs.LR = kwargs.get("learning_rate", 0.0001) + cfgs.MOMENTUM = kwargs.get("momentum", 0.9) + cfgs.MAX_ITERATION = kwargs.get("max_iteration", 5) + + def train(self, train_data, valid_data=None, **kwargs): + + if train_data is None or train_data.x is None or train_data.y is None: + raise Exception("Train data is None.") + + with tf.Graph().as_default(): + + img_name_batch, train_data, gtboxes_and_label_batch, num_objects_batch, data_num = \ + next_batch_for_tasks( + (train_data.x, train_data.y), + dataset_name=cfgs.DATASET_NAME, + batch_size=cfgs.BATCH_SIZE, + shortside_len=cfgs.SHORT_SIDE_LEN, + is_training=True, + save_name="train" + ) + + with tf.name_scope('draw_gtboxes'): + gtboxes_in_img = draw_box_with_color(train_data, tf.reshape(gtboxes_and_label_batch, [-1, 5])[:, :-1], + text=tf.shape(gtboxes_and_label_batch)[1]) + + # *********************************************************************************************** + # * share net * + # *********************************************************************************************** + _, share_net = get_network_byname(net_name=cfgs.NET_NAME, + inputs=train_data, + num_classes=None, + is_training=True, + output_stride=None, + global_pool=False, + spatial_squeeze=False) + + # *********************************************************************************************** + # * rpn * + # *********************************************************************************************** + rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, + inputs=train_data, + gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), + is_training=True, + share_head=cfgs.SHARE_HEAD, + share_net=share_net, + stride=cfgs.STRIDE, + anchor_ratios=cfgs.ANCHOR_RATIOS, + anchor_scales=cfgs.ANCHOR_SCALES, + scale_factors=cfgs.SCALE_FACTORS, + base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 + level=cfgs.LEVEL, + top_k_nms=cfgs.RPN_TOP_K_NMS, + rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, + max_proposals_num=cfgs.MAX_PROPOSAL_NUM, + rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, + # iou>=0.7 is positive box, iou< 0.3 is negative + rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, + rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, + rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, + remove_outside_anchors=False, # whether remove anchors outside + rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) + + rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] + + rpn_location_loss, rpn_classification_loss = rpn.rpn_losses() + rpn_total_loss = rpn_classification_loss + rpn_location_loss + + with tf.name_scope('draw_proposals'): + # score > 0.5 is object + rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, 0.5)), [-1]) + rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices) + + rpn_proposals_objcet_boxes_in_img = draw_box_with_color(train_data, rpn_object_boxes, + text=tf.shape(rpn_object_boxes)[0]) + rpn_proposals_boxes_in_img = draw_box_with_color(train_data, rpn_proposals_boxes, + text=tf.shape(rpn_proposals_boxes)[0]) + # *********************************************************************************************** + # * Fast RCNN * + # *********************************************************************************************** + + fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=train_data, + feature_pyramid=rpn.feature_pyramid, + rpn_proposals_boxes=rpn_proposals_boxes, + rpn_proposals_scores=rpn_proposals_scores, + img_shape=tf.shape(train_data), + roi_size=cfgs.ROI_SIZE, + roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, + scale_factors=cfgs.SCALE_FACTORS, + gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), + fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, + fast_rcnn_maximum_boxes_per_img=100, + fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, + show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, + # show detections which score >= 0.6 + num_classes=cfgs.CLASS_NUM, + fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, + fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, + # iou>0.5 is positive, iou<0.5 is negative + fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, + use_dropout=False, + weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], + is_training=True, + level=cfgs.LEVEL) + + fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ + fast_rcnn.fast_rcnn_predict() + fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss() + fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss + + with tf.name_scope('draw_boxes_with_categories'): + fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=train_data, + boxes=fast_rcnn_decode_boxes, + labels=detection_category, + scores=fast_rcnn_score) + + # train + added_loss = rpn_total_loss + fast_rcnn_total_loss + total_loss = tf.losses.get_total_loss() + + global_step = tf.train.get_or_create_global_step() + + lr = tf.train.piecewise_constant(global_step, + boundaries=[np.int64(20000), np.int64(40000)], + values=[cfgs.LR, cfgs.LR / 10, cfgs.LR / 100]) + tf.summary.scalar('lr', lr) + optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) + + train_op = slim.learning.create_train_op(total_loss, optimizer, global_step) # rpn_total_loss, + # train_op = optimizer.minimize(second_classification_loss, global_step) + + # *********************************************************************************************** + # * Summary * + # *********************************************************************************************** + # ground truth and predict + tf.summary.image('img/gtboxes', gtboxes_in_img) + tf.summary.image('img/faster_rcnn_predict', fast_rcnn_predict_boxes_in_imgs) + # rpn loss and image + tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss) + tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss) + tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss) + + tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss) + tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss) + tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss) + + tf.summary.scalar('loss/added_loss', added_loss) + tf.summary.scalar('loss/total_loss', total_loss) + + tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img) + tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img) + # learning_rate + tf.summary.scalar('learning_rate', lr) + + summary_op = tf.summary.merge_all() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + restorer = self._get_restorer() + saver = tf.train.Saver(max_to_keep=3) + self.checkpoint_path = self.load(Context.get_parameters("base_model_url")) + + config = tf.ConfigProto() + config.gpu_options.allow_growth = False + with tf.Session(config=config) as sess: + sess.run(init_op) + if self.checkpoint_path: + restorer.restore(sess, self.checkpoint_path) + print('restore model') + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess, coord) + + summary_path = os.path.join(self.temp_dir, 'output/{}'.format(cfgs.DATASET_NAME), + FLAGS.summary_path, cfgs.VERSION) + + mkdir(summary_path) + summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) + + for step in range(cfgs.MAX_ITERATION): + training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + start = time.time() + + _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ + _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ + _fast_rcnn_total_loss, _added_loss, _total_loss, _ = \ + sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss, + rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss, + fast_rcnn_total_loss, added_loss, total_loss, train_op]) + + end = time.time() + + if step % 50 == 0: + print("""{}: step{} image_name:{} + rpn_loc_loss:{:.4f} | rpn_cla_loss:{:.4f} | rpn_total_loss:{:.4f} + fast_rcnn_loc_loss:{:.4f} | fast_rcnn_cla_loss:{:.4f} | fast_rcnn_total_loss:{:.4f} + added_loss:{:.4f} | total_loss:{:.4f} | pre_cost_time:{:.4f}s""" + .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, + _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, + _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _added_loss, _total_loss, + (end - start))) + + if step % 500 == 0: + summary_str = sess.run(summary_op) + summary_writer.add_summary(summary_str, _global_step) + summary_writer.flush() + + if step > 0 and step == cfgs.MAX_ITERATION - 1: + self.checkpoint_path = os.path.join(self.temp_dir, '{}_'.format( + cfgs.DATASET_NAME) + str(_global_step) + "_" + str(time.time()) + '_model.ckpt') + saver.save(sess, self.checkpoint_path) + print('Weights have been saved to {}.'.format(self.checkpoint_path)) + + coord.request_stop() + coord.join(threads) + + return self.checkpoint_path + + def save(self, model_path): + if not model_path: + raise Exception("model path is None.") + + model_dir, model_name = os.path.split(self.checkpoint_path) + models = [model for model in os.listdir(model_dir) if model_name in model] + + if os.path.splitext(model_path)[-1] != ".zip": + model_path = os.path.join(model_path, "model.zip") + + if not os.path.isdir(os.path.dirname(model_path)): + os.makedirs(os.path.dirname(model_path)) + + with zipfile.ZipFile(model_path, "w") as f: + for model_file in models: + model_file_path = os.path.join(model_dir, model_file) + f.write(model_file_path, model_file, compress_type=zipfile.ZIP_DEFLATED) + + return model_path + + def predict(self, data, input_shape=None, **kwargs): + if data is None: + raise Exception("Predict data is None") + + inference_output_dir = os.getenv("RESULT_SAVED_URL") + + with self.tf_graph.as_default(): + if not self.has_fast_rcnn_predict: + self._fast_rcnn_predict() + self.has_fast_rcnn_predict = True + + restorer = self._get_restorer() + + config = tf.ConfigProto() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + with tf.Session(config=config) as sess: + sess.run(init_op) + + restorer.restore(sess, self.checkpoint_path) + + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess, coord) + + imgs = [cv2.imread(img) for img in data] + img_names = [os.path.basename(img_path) for img_path in data] + + predict_dict = {} + + for i, img in enumerate(imgs): + start = time.time() + + _img_batch, _fast_rcnn_decode_boxes, _fast_rcnn_score, _detection_category = \ + sess.run( + [self.img_batch, self.fast_rcnn_decode_boxes, self.fast_rcnn_score, + self.detection_category], + feed_dict={self.img_plac: img}) + end = time.time() + + # predict box dict + predict_dict[str(img_names[i])] = [] + + for label in NAME_LABEL_MAP.keys(): + if label == 'back_ground': + continue + else: + temp_dict = {} + temp_dict['name'] = label + + ind = np.where(_detection_category == NAME_LABEL_MAP[label])[0] + temp_boxes = _fast_rcnn_decode_boxes[ind] + temp_score = np.reshape(_fast_rcnn_score[ind], [-1, 1]) + temp_dict['bbox'] = np.array(np.concatenate( + [temp_boxes, temp_score], axis=1), np.float64) + predict_dict[str(img_names[i])].append(temp_dict) + + img_np = np.squeeze(_img_batch, axis=0) + + img_np = draw_box_cv(img_np, + boxes=_fast_rcnn_decode_boxes, + labels=_detection_category, + scores=_fast_rcnn_score) + + if inference_output_dir: + mkdir(inference_output_dir) + cv2.imwrite(inference_output_dir + '/{}_fpn.jpg'.format(img_names[i]), img_np) + view_bar('{} cost {}s'.format(img_names[i], (end - start)), i + 1, len(imgs)) + print(f"\nInference results have been saved to directory:{inference_output_dir}.") + + coord.request_stop() + coord.join(threads) + + return predict_dict + + def load(self, model_url=None): + if model_url: + model_dir = os.path.split(model_url)[0] + with zipfile.ZipFile(model_url, "r") as f: + f.extractall(path=model_dir) + ckpt_name = os.path.basename(f.namelist()[0]) + index = ckpt_name.find("ckpt") + ckpt_name = ckpt_name[:index + 4] + self.checkpoint_path = os.path.join(model_dir, ckpt_name) + + else: + raise Exception(f"model url is None") + + return self.checkpoint_path + + def evaluate(self, data, model_path, **kwargs): + if data is None or data.x is None or data.y is None: + raise Exception("Prediction data is None") + + self.load(model_path) + predict_dict = self.predict(data.x) + metric_name, metric_func = kwargs.get("metric") + if callable(metric_func): + return {"f1_score": metric_func(data.y, predict_dict)} + else: + raise Exception(f"not found model metric func(name={metric_name}) in model eval phase") + + def _get_restorer(self): + model_variables = slim.get_model_variables() + restore_variables = [var for var in model_variables if not var.name.startswith( + 'Fast_Rcnn')] + [tf.train.get_or_create_global_step()] + return tf.train.Saver(restore_variables) + + def _init_tf_graph(self): + self.tf_graph = tf.Graph() + with self.tf_graph.as_default(): + self.img_plac = tf.placeholder(shape=[None, None, 3], dtype=tf.uint8) + + self.img_tensor = tf.cast(self.img_plac, tf.float32) - tf.constant([103.939, 116.779, 123.68]) + self.img_batch = image_preprocess.short_side_resize_for_inference_data(self.img_tensor, + target_shortside_len=cfgs.SHORT_SIDE_LEN, + is_resize=True) + + def _fast_rcnn_predict(self): + with self.tf_graph.as_default(): + # *********************************************************************************************** + # * share net * + # *********************************************************************************************** + _, share_net = get_network_byname(net_name=cfgs.NET_NAME, + inputs=self.img_batch, + num_classes=None, + is_training=True, + output_stride=None, + global_pool=False, + spatial_squeeze=False) + # *********************************************************************************************** + # * RPN * + # *********************************************************************************************** + rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, + inputs=self.img_batch, + gtboxes_and_label=None, + is_training=False, + share_head=cfgs.SHARE_HEAD, + share_net=share_net, + stride=cfgs.STRIDE, + anchor_ratios=cfgs.ANCHOR_RATIOS, + anchor_scales=cfgs.ANCHOR_SCALES, + scale_factors=cfgs.SCALE_FACTORS, + base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 + level=cfgs.LEVEL, + top_k_nms=cfgs.RPN_TOP_K_NMS, + rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, + max_proposals_num=cfgs.MAX_PROPOSAL_NUM, + rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, + rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, + rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, + rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, + remove_outside_anchors=False, # whether remove anchors outside + rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) + + # rpn predict proposals + rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] + + # *********************************************************************************************** + # * Fast RCNN * + # *********************************************************************************************** + fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=self.img_batch, + feature_pyramid=rpn.feature_pyramid, + rpn_proposals_boxes=rpn_proposals_boxes, + rpn_proposals_scores=rpn_proposals_scores, + img_shape=tf.shape(self.img_batch), + roi_size=cfgs.ROI_SIZE, + scale_factors=cfgs.SCALE_FACTORS, + roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, + gtboxes_and_label=None, + fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, + fast_rcnn_maximum_boxes_per_img=100, + fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, + show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, + # show detections which score >= 0.6 + num_classes=cfgs.CLASS_NUM, + fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, + fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, + fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, + use_dropout=False, + weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], + is_training=False, + level=cfgs.LEVEL) + + self.fast_rcnn_decode_boxes, self.fast_rcnn_score, self.num_of_objects, self.detection_category = \ + fast_rcnn.fast_rcnn_predict() diff --git a/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml b/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml new file mode 100644 index 0000000..c60c71f --- /dev/null +++ b/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml @@ -0,0 +1,58 @@ +algorithm: + # paradigm type; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "incrementallearning" + incremental_learning_data_setting: + # ratio of training dataset; float type; + # the default value is 0.8. + train_ratio: 0.8 + # the method of splitting dataset; string type; optional; + # currently the options of value are as follows: + # 1> "default": the dataset is evenly divided based train_ratio; + splitting_method: "default" + # the url address of initial model for model pre-training; string url; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + # type of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel": contains important interfaces such as train、 eval、 predict and more; required module; + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + - learning_rate: + values: + - 0.1 + # 2> "hard_example_mining": check hard example when predict ; optional module; + - type: "hard_example_mining" + # name of python module; string type; + name: "IBT" + # the url address of python module; string type; + url: "./examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py" + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + # threshold of image; value is [0, 1] + - threshold_img: + values: + - 0.9 + # predict box of image; value is [0, 1] + - threshold_box: + values: + - 0.9 \ No newline at end of file diff --git a/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py b/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py new file mode 100644 index 0000000..1b45eb7 --- /dev/null +++ b/examples/pcb-aoi/incremental_learning_bench/testalgorithms/fpn/hard_example_mining.py @@ -0,0 +1,110 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Hard Example Mining Algorithms""" + +import abc + +from sedna.common.class_factory import ClassFactory, ClassType + +__all__ = ('IBTFilter') + + +class BaseFilter(metaclass=abc.ABCMeta): + """The base class to define unified interface.""" + + def __call__(self, infer_result=None): + """ + predict function, judge the sample is hard or not. + + Parameters + ---------- + infer_result : array_like + prediction result + + Returns + ------- + is_hard_sample : bool + `True` means hard sample, `False` means not. + """ + raise NotImplementedError + + @classmethod + def data_check(cls, data): + """Check the data in [0,1].""" + return 0 <= float(data) <= 1 + + +@ClassFactory.register(ClassType.HEM, alias="IBT") +class IBTFilter(BaseFilter, abc.ABC): + """ + **Object detection** Hard samples discovery methods named `IBT` + + Parameters + ---------- + threshold_img: float + hard coefficient threshold score to filter img, default to 0.5. + threshold_box: float + threshold_box to calculate hard coefficient, formula is hard + coefficient = number(prediction_boxes less than threshold_box) / + number(prediction_boxes) + """ + + def __init__(self, threshold_img=0.5, threshold_box=0.5, **kwargs): + self.threshold_box = float(threshold_box) + self.threshold_img = float(threshold_img) + + def __call__(self, infer_result=None) -> bool: + """Judge the img is hard sample or not. + + Parameters + ---------- + infer_result: array_like + prediction boxes list, such as [bbox1, bbox2, bbox3,....], + where bbox = [xmin, ymin, xmax, ymax, score, label] + score should be in [0,1], who will be ignored if its value not + in [0,1]. + + Returns + ------- + is hard sample: bool + `True` means hard sample, `False` means not. + """ + + def _convert_to_bboxes(): + bboxes = [] + for vs in infer_result.values(): + for v in vs: + bbox = v.get("bbox").tolist() + if bbox: + bboxes.extend(bbox) + return bboxes + + infer_result = _convert_to_bboxes() + + if not (infer_result + and all(map(lambda x: len(x) > 4, infer_result))): + # if invalid input, return False + return False + + data_check_list = [bbox[4] for bbox in infer_result + if self.data_check(bbox[4])] + if len(data_check_list) != len(infer_result): + return False + + confidence_score_list = [ + float(box_score) for box_score in data_check_list + if float(box_score) <= self.threshold_box] + return (len(confidence_score_list) / len(infer_result) + >= (1 - self.threshold_img)) diff --git a/examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py b/examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py new file mode 100644 index 0000000..6c74947 --- /dev/null +++ b/examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py @@ -0,0 +1,54 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from FPN_TensorFlow.libs.label_name_dict.label_dict import NAME_LABEL_MAP +from FPN_TensorFlow.data.io.read_tfrecord import convert_labels +from FPN_TensorFlow.help_utils.tools import get_single_label_dict, single_label_eval +from sedna.common.class_factory import ClassType, ClassFactory + +__all__ = ["f1_score"] + + +@ClassFactory.register(ClassType.GENERAL, alias="f1_score") +def f1_score(y_true, y_pred): + predict_dict = {} + + for k, v in y_pred.items(): + k = f"b'{k}'" + if not predict_dict.get(k): + predict_dict[k] = v + + gtboxes_dict = convert_labels(y_true) + + f1_score_list = [] + + for label in NAME_LABEL_MAP.keys(): + if label == 'back_ground': + continue + + rboxes, gboxes = get_single_label_dict(predict_dict, gtboxes_dict, label) + rec, prec, ap, box_num = single_label_eval(rboxes, gboxes, 0.3, False) + recall = 0 if rec.shape[0] == 0 else rec[-1] + precision = 0 if prec.shape[0] == 0 else prec[-1] + f1_score = 0 if not (recall + precision) else (2 * precision * recall / (recall + precision)) + + f1_score_list.append(f1_score) + + f1_score_avg = 0 + if f1_score_list: + f1_score_avg = round(float(sum(f1_score_list)) / len(f1_score_list), 4) + + print(f"f1_score_avg: {f1_score_avg}") + + return f1_score_avg diff --git a/examples/pcb-aoi/incremental_learning_bench/testenv/testenv.yaml b/examples/pcb-aoi/incremental_learning_bench/testenv/testenv.yaml new file mode 100644 index 0000000..a353f8e --- /dev/null +++ b/examples/pcb-aoi/incremental_learning_bench/testenv/testenv.yaml @@ -0,0 +1,34 @@ +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # model eval configuration of incremental learning; + model_eval: + # metric used for model evaluation + model_metric: + # metric name; string type; + name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + + # condition of triggering inference model to update + # threshold of the condition; types are float/int + threshold: 0.01 + # operator of the condition; string type; + # values are ">=", ">", "<=", "<" and "="; + operator: ">=" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/incremental_learning_bench/testenv/f1_score.py" + - name: "samples_transfer_ratio" + + # incremental rounds setting of incremental learning; int type; default value is 2; + incremental_rounds: 2 \ No newline at end of file diff --git a/examples/pcb-aoi/singletask_learning_bench/benchmarkingjob.yaml b/examples/pcb-aoi/singletask_learning_bench/benchmarkingjob.yaml new file mode 100644 index 0000000..30dd2b0 --- /dev/null +++ b/examples/pcb-aoi/singletask_learning_bench/benchmarkingjob.yaml @@ -0,0 +1,72 @@ +benchmarkingjob: + # job name of bechmarking; string type; + name: "benchmarkingjob" + # the url address of job workspace that will reserve the output of tests; string type; + workspace: "/ianvs/singletask_learning_bench/workspace" + + # the url address of test environment configuration file; string type; + # the file format supports yaml/yml; + testenv: "./examples/pcb-aoi/singletask_learning_bench/testenv/testenv.yaml" + + # the configuration of test object + test_object: + # test type; string type; + # currently the option of value is "algorithms",the others will be added in succession. + type: "algorithms" + # test algorithm configuration files; list type; + algorithms: + # algorithm name; string type; + - name: "fpn_singletask_learning" + # the url address of test algorithm configuration file; string type; + # the file format supports yaml/yml; + url: "./examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml" + + # the configuration of ranking leaderboard + rank: + # rank leaderboard with metric of test case's evaluation and order ; list type; + # the sorting priority is based on the sequence of metrics in the list from front to back; + sort_by: [ { "f1_score": "descend" } ] + + # visualization configuration + visualization: + # mode of visualization in the leaderboard; string type; + # There are quite a few possible dataitems in the leaderboard. Not all of them can be shown simultaneously on the screen. + # In the leaderboard, we provide the "selected_only" mode for the user to configure what is shown or is not shown. + mode: "selected_only" + # method of visualization for selected dataitems; string type; + # currently the options of value are as follows: + # 1> "print_table": print selected dataitems; + method: "print_table" + + # selected dataitem configuration + # The user can add his/her interested dataitems in terms of "paradigms", "modules", "hyperparameters" and "metrics", + # so that the selected columns will be shown. + selected_dataitem: + # currently the options of value are as follows: + # 1> "all": select all paradigms in the leaderboard; + # 2> paradigms in the leaderboard, e.g., "singletasklearning" + paradigms: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all modules in the leaderboard; + # 2> modules in the leaderboard, e.g., "basemodel" + modules: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all hyperparameters in the leaderboard; + # 2> hyperparameters in the leaderboard, e.g., "momentum" + hyperparameters: [ "all" ] + # currently the options of value are as follows: + # 1> "all": select all metrics in the leaderboard; + # 2> metrics in the leaderboard, e.g., "f1_score" + metrics: [ "f1_score" ] + + # model of save selected and all dataitems in workspace; string type; + # currently the options of value are as follows: + # 1> "selected_and_all": save selected and all dataitems; + # 2> "selected_only": save selected dataitems; + save_mode: "selected_and_all" + + + + + + diff --git a/examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/basemodel.py b/examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/basemodel.py new file mode 100644 index 0000000..13e2c8c --- /dev/null +++ b/examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/basemodel.py @@ -0,0 +1,503 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import, division, print_function + +import os +import tempfile +import time +import zipfile +import cv2 +import logging + +import numpy as np +import tensorflow as tf +import tensorflow.contrib.slim as slim +from sedna.common.config import Context +from sedna.common.class_factory import ClassType, ClassFactory +from FPN_TensorFlow.help_utils.help_utils import draw_box_cv +from FPN_TensorFlow.libs.label_name_dict.label_dict import NAME_LABEL_MAP +from FPN_TensorFlow.data.io.read_tfrecord import next_batch_for_tasks, convert_labels +from FPN_TensorFlow.data.io import image_preprocess +from FPN_TensorFlow.help_utils.tools import mkdir, view_bar, get_single_label_dict, single_label_eval +from FPN_TensorFlow.libs.configs import cfgs +from FPN_TensorFlow.libs.box_utils.show_box_in_tensor import draw_box_with_color, draw_boxes_with_categories +from FPN_TensorFlow.libs.fast_rcnn import build_fast_rcnn +from FPN_TensorFlow.libs.networks.network_factory import get_flags_byname, get_network_byname +from FPN_TensorFlow.libs.rpn import build_rpn + +FLAGS = get_flags_byname(cfgs.NET_NAME) + +# avoid the conflict: 1. tf parses flags with sys.argv; 2. test system also parses flags . +tf.flags.DEFINE_string("benchmarking_config_file", "", "ignore") + +# close global warning log +# reason: during the running of tensorflow, a large number of warning logs will be printed +# and these will submerge some important logs and increase inference latency. +# After disable the global warning job, that will not affect the running of application. +# if you want to open the global warning log, please comment(e.g: #) the statement. +# todo: 1. disable the local warning log instead of the global warning log. +# e.g.: only to disable tensorflow warning log. + +logging.disable(logging.WARNING) + +__all__ = ["BaseModel"] + +# set backend +os.environ['BACKEND_TYPE'] = 'TENSORFLOW' + + +@ClassFactory.register(ClassType.GENERAL, alias="FPN") +class BaseModel: + + def __init__(self, **kwargs): + """ + initialize logging configuration + """ + + self.has_fast_rcnn_predict = False + + self._init_tf_graph() + + self.temp_dir = tempfile.mkdtemp() + if not os.path.isdir(self.temp_dir): + mkdir(self.temp_dir) + + os.environ["MODEL_NAME"] = "model.zip" + cfgs.LR = kwargs.get("learning_rate", 0.0001) + cfgs.MOMENTUM = kwargs.get("momentum", 0.9) + cfgs.MAX_ITERATION = kwargs.get("max_iteration", 5) + + def train(self, train_data, valid_data=None, **kwargs): + + if train_data is None or train_data.x is None or train_data.y is None: + raise Exception("Train data is None.") + + with tf.Graph().as_default(): + + img_name_batch, train_data, gtboxes_and_label_batch, num_objects_batch, data_num = \ + next_batch_for_tasks( + (train_data.x, train_data.y), + dataset_name=cfgs.DATASET_NAME, + batch_size=cfgs.BATCH_SIZE, + shortside_len=cfgs.SHORT_SIDE_LEN, + is_training=True, + save_name="train" + ) + + with tf.name_scope('draw_gtboxes'): + gtboxes_in_img = draw_box_with_color(train_data, tf.reshape(gtboxes_and_label_batch, [-1, 5])[:, :-1], + text=tf.shape(gtboxes_and_label_batch)[1]) + + # *********************************************************************************************** + # * share net * + # *********************************************************************************************** + _, share_net = get_network_byname(net_name=cfgs.NET_NAME, + inputs=train_data, + num_classes=None, + is_training=True, + output_stride=None, + global_pool=False, + spatial_squeeze=False) + + # *********************************************************************************************** + # * rpn * + # *********************************************************************************************** + rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, + inputs=train_data, + gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), + is_training=True, + share_head=cfgs.SHARE_HEAD, + share_net=share_net, + stride=cfgs.STRIDE, + anchor_ratios=cfgs.ANCHOR_RATIOS, + anchor_scales=cfgs.ANCHOR_SCALES, + scale_factors=cfgs.SCALE_FACTORS, + base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 + level=cfgs.LEVEL, + top_k_nms=cfgs.RPN_TOP_K_NMS, + rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, + max_proposals_num=cfgs.MAX_PROPOSAL_NUM, + rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, + # iou>=0.7 is positive box, iou< 0.3 is negative + rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, + rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, + rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, + remove_outside_anchors=False, # whether remove anchors outside + rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) + + rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] + + rpn_location_loss, rpn_classification_loss = rpn.rpn_losses() + rpn_total_loss = rpn_classification_loss + rpn_location_loss + + with tf.name_scope('draw_proposals'): + # score > 0.5 is object + rpn_object_boxes_indices = tf.reshape(tf.where(tf.greater(rpn_proposals_scores, 0.5)), [-1]) + rpn_object_boxes = tf.gather(rpn_proposals_boxes, rpn_object_boxes_indices) + + rpn_proposals_objcet_boxes_in_img = draw_box_with_color(train_data, rpn_object_boxes, + text=tf.shape(rpn_object_boxes)[0]) + rpn_proposals_boxes_in_img = draw_box_with_color(train_data, rpn_proposals_boxes, + text=tf.shape(rpn_proposals_boxes)[0]) + # *********************************************************************************************** + # * Fast RCNN * + # *********************************************************************************************** + + fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=train_data, + feature_pyramid=rpn.feature_pyramid, + rpn_proposals_boxes=rpn_proposals_boxes, + rpn_proposals_scores=rpn_proposals_scores, + img_shape=tf.shape(train_data), + roi_size=cfgs.ROI_SIZE, + roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, + scale_factors=cfgs.SCALE_FACTORS, + gtboxes_and_label=tf.squeeze(gtboxes_and_label_batch, 0), + fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, + fast_rcnn_maximum_boxes_per_img=100, + fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, + show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, + # show detections which score >= 0.6 + num_classes=cfgs.CLASS_NUM, + fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, + fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, + # iou>0.5 is positive, iou<0.5 is negative + fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, + use_dropout=False, + weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], + is_training=True, + level=cfgs.LEVEL) + + fast_rcnn_decode_boxes, fast_rcnn_score, num_of_objects, detection_category = \ + fast_rcnn.fast_rcnn_predict() + fast_rcnn_location_loss, fast_rcnn_classification_loss = fast_rcnn.fast_rcnn_loss() + fast_rcnn_total_loss = fast_rcnn_location_loss + fast_rcnn_classification_loss + + with tf.name_scope('draw_boxes_with_categories'): + fast_rcnn_predict_boxes_in_imgs = draw_boxes_with_categories(img_batch=train_data, + boxes=fast_rcnn_decode_boxes, + labels=detection_category, + scores=fast_rcnn_score) + + # train + added_loss = rpn_total_loss + fast_rcnn_total_loss + total_loss = tf.losses.get_total_loss() + + global_step = tf.train.get_or_create_global_step() + + lr = tf.train.piecewise_constant(global_step, + boundaries=[np.int64(20000), np.int64(40000)], + values=[cfgs.LR, cfgs.LR / 10, cfgs.LR / 100]) + tf.summary.scalar('lr', lr) + optimizer = tf.train.MomentumOptimizer(lr, momentum=cfgs.MOMENTUM) + + train_op = slim.learning.create_train_op(total_loss, optimizer, global_step) # rpn_total_loss, + # train_op = optimizer.minimize(second_classification_loss, global_step) + + # *********************************************************************************************** + # * Summary * + # *********************************************************************************************** + # ground truth and predict + tf.summary.image('img/gtboxes', gtboxes_in_img) + tf.summary.image('img/faster_rcnn_predict', fast_rcnn_predict_boxes_in_imgs) + # rpn loss and image + tf.summary.scalar('rpn/rpn_location_loss', rpn_location_loss) + tf.summary.scalar('rpn/rpn_classification_loss', rpn_classification_loss) + tf.summary.scalar('rpn/rpn_total_loss', rpn_total_loss) + + tf.summary.scalar('fast_rcnn/fast_rcnn_location_loss', fast_rcnn_location_loss) + tf.summary.scalar('fast_rcnn/fast_rcnn_classification_loss', fast_rcnn_classification_loss) + tf.summary.scalar('fast_rcnn/fast_rcnn_total_loss', fast_rcnn_total_loss) + + tf.summary.scalar('loss/added_loss', added_loss) + tf.summary.scalar('loss/total_loss', total_loss) + + tf.summary.image('rpn/rpn_all_boxes', rpn_proposals_boxes_in_img) + tf.summary.image('rpn/rpn_object_boxes', rpn_proposals_objcet_boxes_in_img) + # learning_rate + tf.summary.scalar('learning_rate', lr) + + summary_op = tf.summary.merge_all() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + restorer = self._get_restorer() + saver = tf.train.Saver(max_to_keep=3) + self.checkpoint_path = self.load(Context.get_parameters("base_model_url")) + + config = tf.ConfigProto() + config.gpu_options.allow_growth = False + with tf.Session(config=config) as sess: + sess.run(init_op) + if self.checkpoint_path: + restorer.restore(sess, self.checkpoint_path) + print('restore model') + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess, coord) + + summary_path = os.path.join(self.temp_dir, 'output/{}'.format(cfgs.DATASET_NAME), + FLAGS.summary_path, cfgs.VERSION) + + mkdir(summary_path) + summary_writer = tf.summary.FileWriter(summary_path, graph=sess.graph) + + for step in range(cfgs.MAX_ITERATION): + training_time = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + start = time.time() + + _global_step, _img_name_batch, _rpn_location_loss, _rpn_classification_loss, \ + _rpn_total_loss, _fast_rcnn_location_loss, _fast_rcnn_classification_loss, \ + _fast_rcnn_total_loss, _added_loss, _total_loss, _ = \ + sess.run([global_step, img_name_batch, rpn_location_loss, rpn_classification_loss, + rpn_total_loss, fast_rcnn_location_loss, fast_rcnn_classification_loss, + fast_rcnn_total_loss, added_loss, total_loss, train_op]) + + end = time.time() + + if step % 50 == 0: + print("""{}: step{} image_name:{} + rpn_loc_loss:{:.4f} | rpn_cla_loss:{:.4f} | rpn_total_loss:{:.4f} + fast_rcnn_loc_loss:{:.4f} | fast_rcnn_cla_loss:{:.4f} | fast_rcnn_total_loss:{:.4f} + added_loss:{:.4f} | total_loss:{:.4f} | pre_cost_time:{:.4f}s""" + .format(training_time, _global_step, str(_img_name_batch[0]), _rpn_location_loss, + _rpn_classification_loss, _rpn_total_loss, _fast_rcnn_location_loss, + _fast_rcnn_classification_loss, _fast_rcnn_total_loss, _added_loss, _total_loss, + (end - start))) + + if step % 500 == 0: + summary_str = sess.run(summary_op) + summary_writer.add_summary(summary_str, _global_step) + summary_writer.flush() + + if step > 0 and step == cfgs.MAX_ITERATION - 1: + self.checkpoint_path = os.path.join(self.temp_dir, '{}_'.format( + cfgs.DATASET_NAME) + str(_global_step) + "_" + str(time.time()) + '_model.ckpt') + saver.save(sess, self.checkpoint_path) + print('Weights have been saved to {}.'.format(self.checkpoint_path)) + + coord.request_stop() + coord.join(threads) + + return self.checkpoint_path + + def save(self, model_path): + if not model_path: + raise Exception("model path is None.") + + model_dir, model_name = os.path.split(self.checkpoint_path) + models = [model for model in os.listdir(model_dir) if model_name in model] + + if os.path.splitext(model_path)[-1] != ".zip": + model_path = os.path.join(model_path, "model.zip") + + if not os.path.isdir(os.path.dirname(model_path)): + os.makedirs(os.path.dirname(model_path)) + + with zipfile.ZipFile(model_path, "w") as f: + for model_file in models: + model_file_path = os.path.join(model_dir, model_file) + f.write(model_file_path, model_file, compress_type=zipfile.ZIP_DEFLATED) + + return model_path + + def predict(self, data, input_shape=None, **kwargs): + if data is None: + raise Exception("Predict data is None") + + inference_output_dir = os.getenv("RESULT_SAVED_URL") + + with self.tf_graph.as_default(): + if not self.has_fast_rcnn_predict: + self._fast_rcnn_predict() + self.has_fast_rcnn_predict = True + + restorer = self._get_restorer() + + config = tf.ConfigProto() + init_op = tf.group( + tf.global_variables_initializer(), + tf.local_variables_initializer() + ) + + with tf.Session(config=config) as sess: + sess.run(init_op) + + restorer.restore(sess, self.checkpoint_path) + + coord = tf.train.Coordinator() + threads = tf.train.start_queue_runners(sess, coord) + + imgs = [cv2.imread(img) for img in data] + img_names = [os.path.basename(img_path) for img_path in data] + + predict_dict = {} + + for i, img in enumerate(imgs): + start = time.time() + + _img_batch, _fast_rcnn_decode_boxes, _fast_rcnn_score, _detection_category = \ + sess.run( + [self.img_batch, self.fast_rcnn_decode_boxes, self.fast_rcnn_score, + self.detection_category], + feed_dict={self.img_plac: img}) + end = time.time() + + # predict box dict + predict_dict[str(img_names[i])] = [] + + for label in NAME_LABEL_MAP.keys(): + if label == 'back_ground': + continue + else: + temp_dict = {} + temp_dict['name'] = label + + ind = np.where(_detection_category == NAME_LABEL_MAP[label])[0] + temp_boxes = _fast_rcnn_decode_boxes[ind] + temp_score = np.reshape(_fast_rcnn_score[ind], [-1, 1]) + temp_dict['bbox'] = np.array(np.concatenate( + [temp_boxes, temp_score], axis=1), np.float64) + predict_dict[str(img_names[i])].append(temp_dict) + + img_np = np.squeeze(_img_batch, axis=0) + + img_np = draw_box_cv(img_np, + boxes=_fast_rcnn_decode_boxes, + labels=_detection_category, + scores=_fast_rcnn_score) + + if inference_output_dir: + mkdir(inference_output_dir) + cv2.imwrite(inference_output_dir + '/{}_fpn.jpg'.format(img_names[i]), img_np) + view_bar('{} cost {}s'.format(img_names[i], (end - start)), i + 1, len(imgs)) + print(f"\nInference results have been saved to directory:{inference_output_dir}.") + + coord.request_stop() + coord.join(threads) + + return predict_dict + + def load(self, model_url=None): + if model_url: + model_dir = os.path.split(model_url)[0] + with zipfile.ZipFile(model_url, "r") as f: + f.extractall(path=model_dir) + ckpt_name = os.path.basename(f.namelist()[0]) + index = ckpt_name.find("ckpt") + ckpt_name = ckpt_name[:index + 4] + self.checkpoint_path = os.path.join(model_dir, ckpt_name) + + else: + raise Exception(f"model url is None") + + return self.checkpoint_path + + def evaluate(self, data, model_path, **kwargs): + if data is None or data.x is None or data.y is None: + raise Exception("Prediction data is None") + + self.load(model_path) + predict_dict = self.predict(data.x) + metric_name, metric_func = kwargs.get("metric") + if callable(metric_func): + return {"f1_score": metric_func(data.y, predict_dict)} + else: + raise Exception(f"not found model metric func(name={metric_name}) in model eval phase") + + def _get_restorer(self): + model_variables = slim.get_model_variables() + restore_variables = [var for var in model_variables if not var.name.startswith( + 'Fast_Rcnn')] + [tf.train.get_or_create_global_step()] + return tf.train.Saver(restore_variables) + + def _init_tf_graph(self): + self.tf_graph = tf.Graph() + with self.tf_graph.as_default(): + self.img_plac = tf.placeholder(shape=[None, None, 3], dtype=tf.uint8) + + self.img_tensor = tf.cast(self.img_plac, tf.float32) - tf.constant([103.939, 116.779, 123.68]) + self.img_batch = image_preprocess.short_side_resize_for_inference_data(self.img_tensor, + target_shortside_len=cfgs.SHORT_SIDE_LEN, + is_resize=True) + + def _fast_rcnn_predict(self): + with self.tf_graph.as_default(): + # *********************************************************************************************** + # * share net * + # *********************************************************************************************** + _, share_net = get_network_byname(net_name=cfgs.NET_NAME, + inputs=self.img_batch, + num_classes=None, + is_training=True, + output_stride=None, + global_pool=False, + spatial_squeeze=False) + # *********************************************************************************************** + # * RPN * + # *********************************************************************************************** + rpn = build_rpn.RPN(net_name=cfgs.NET_NAME, + inputs=self.img_batch, + gtboxes_and_label=None, + is_training=False, + share_head=cfgs.SHARE_HEAD, + share_net=share_net, + stride=cfgs.STRIDE, + anchor_ratios=cfgs.ANCHOR_RATIOS, + anchor_scales=cfgs.ANCHOR_SCALES, + scale_factors=cfgs.SCALE_FACTORS, + base_anchor_size_list=cfgs.BASE_ANCHOR_SIZE_LIST, # P2, P3, P4, P5, P6 + level=cfgs.LEVEL, + top_k_nms=cfgs.RPN_TOP_K_NMS, + rpn_nms_iou_threshold=cfgs.RPN_NMS_IOU_THRESHOLD, + max_proposals_num=cfgs.MAX_PROPOSAL_NUM, + rpn_iou_positive_threshold=cfgs.RPN_IOU_POSITIVE_THRESHOLD, + rpn_iou_negative_threshold=cfgs.RPN_IOU_NEGATIVE_THRESHOLD, + rpn_mini_batch_size=cfgs.RPN_MINIBATCH_SIZE, + rpn_positives_ratio=cfgs.RPN_POSITIVE_RATE, + remove_outside_anchors=False, # whether remove anchors outside + rpn_weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME]) + + # rpn predict proposals + rpn_proposals_boxes, rpn_proposals_scores = rpn.rpn_proposals() # rpn_score shape: [300, ] + + # *********************************************************************************************** + # * Fast RCNN * + # *********************************************************************************************** + fast_rcnn = build_fast_rcnn.FastRCNN(img_batch=self.img_batch, + feature_pyramid=rpn.feature_pyramid, + rpn_proposals_boxes=rpn_proposals_boxes, + rpn_proposals_scores=rpn_proposals_scores, + img_shape=tf.shape(self.img_batch), + roi_size=cfgs.ROI_SIZE, + scale_factors=cfgs.SCALE_FACTORS, + roi_pool_kernel_size=cfgs.ROI_POOL_KERNEL_SIZE, + gtboxes_and_label=None, + fast_rcnn_nms_iou_threshold=cfgs.FAST_RCNN_NMS_IOU_THRESHOLD, + fast_rcnn_maximum_boxes_per_img=100, + fast_rcnn_nms_max_boxes_per_class=cfgs.FAST_RCNN_NMS_MAX_BOXES_PER_CLASS, + show_detections_score_threshold=cfgs.FINAL_SCORE_THRESHOLD, + # show detections which score >= 0.6 + num_classes=cfgs.CLASS_NUM, + fast_rcnn_minibatch_size=cfgs.FAST_RCNN_MINIBATCH_SIZE, + fast_rcnn_positives_ratio=cfgs.FAST_RCNN_POSITIVE_RATE, + fast_rcnn_positives_iou_threshold=cfgs.FAST_RCNN_IOU_POSITIVE_THRESHOLD, + use_dropout=False, + weight_decay=cfgs.WEIGHT_DECAY[cfgs.NET_NAME], + is_training=False, + level=cfgs.LEVEL) + + self.fast_rcnn_decode_boxes, self.fast_rcnn_score, self.num_of_objects, self.detection_category = \ + fast_rcnn.fast_rcnn_predict() diff --git a/examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml b/examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml new file mode 100644 index 0000000..9325069 --- /dev/null +++ b/examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/fpn_algorithm.yaml @@ -0,0 +1,33 @@ +algorithm: + # paradigm name; string type; + # currently the options of value are as follows: + # 1> "singletasklearning" + # 2> "incrementallearning" + paradigm_type: "singletasklearning" + # the url address of initial model; string type; optional; + initial_model_url: "/ianvs/initial_model/model.zip" + + # algorithm module configuration in the paradigm; list type; + modules: + # kind of algorithm module; string type; + # currently the options of value are as follows: + # 1> "basemodel" + - type: "basemodel" + # name of python module; string type; + # example: basemodel.py has BaseModel module that the alias is "FPN" for this benchmarking; + name: "FPN" + # the url address of python module; string type; + url: "./examples/pcb-aoi/singletask_learning_bench/testalgorithms/fpn/basemodel.py" + + # hyperparameters configuration for the python module; list type; + hyperparameters: + # name of the hyperparameter; string type; + - momentum: + # values of the hyperparameter; list type; + # types of the value are string/int/float/boolean/list/dictionary + values: + - 0.95 + - 0.5 + - learning_rate: + values: + - 0.1 \ No newline at end of file diff --git a/examples/pcb-aoi/singletask_learning_bench/testenv/f1_score.py b/examples/pcb-aoi/singletask_learning_bench/testenv/f1_score.py new file mode 100644 index 0000000..82be36e --- /dev/null +++ b/examples/pcb-aoi/singletask_learning_bench/testenv/f1_score.py @@ -0,0 +1,54 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from FPN_TensorFlow.libs.label_name_dict.label_dict import NAME_LABEL_MAP +from FPN_TensorFlow.data.io.read_tfrecord import convert_labels +from FPN_TensorFlow.help_utils.tools import get_single_label_dict, single_label_eval +from sedna.common.class_factory import ClassType, ClassFactory + +__all__ = ["f1_score"] + + +@ClassFactory.register(ClassType.GENERAL, alias="f1_score") +def f1_score(y_true, y_pred): + predict_dict = {} + + for k, v in y_pred.items(): + k = f"b'{k}'" + if not predict_dict.get(k): + predict_dict[k] = v + + gtboxes_dict = convert_labels(y_true) + + f1_score_list = [] + + for label in NAME_LABEL_MAP.keys(): + if label == 'back_ground': + continue + + rboxes, gboxes = get_single_label_dict(predict_dict, gtboxes_dict, label) + rec, prec, ap, box_num = single_label_eval(rboxes, gboxes, 0.3, False) + recall = 0 if rec.shape[0] == 0 else rec[-1] + precision = 0 if prec.shape[0] == 0 else prec[-1] + f1_score = 0 if not (recall + precision) else (2 * precision * recall / (recall + precision)) + + f1_score_list.append(f1_score) + + f1_score_avg = 0 + if f1_score_list: + f1_score_avg = round(float(sum(f1_score_list)) / len(f1_score_list), 4) + + print(f"f1_score_avg: {f1_score_avg}") + + return f1_score_avg diff --git a/examples/pcb-aoi/singletask_learning_bench/testenv/testenv.yaml b/examples/pcb-aoi/singletask_learning_bench/testenv/testenv.yaml new file mode 100644 index 0000000..3d72b62 --- /dev/null +++ b/examples/pcb-aoi/singletask_learning_bench/testenv/testenv.yaml @@ -0,0 +1,14 @@ +testenv: + # dataset configuration + dataset: + # the url address of train dataset index; string type; + train_url: "/ianvs/dataset/train_data/index.txt" + # the url address of test dataset index; string type; + test_url: "/ianvs/dataset/test_data/index.txt" + + # metrics configuration for test case's evaluation; list type; + metrics: + # metric name; string type; + - name: "f1_score" + # the url address of python file + url: "./examples/pcb-aoi/singletask_learning_bench/testenv/f1_score.py" \ No newline at end of file diff --git a/examples/resources/algorithms/FPN_TensorFlow-0.1-py3-none-any.whl b/examples/resources/algorithms/FPN_TensorFlow-0.1-py3-none-any.whl new file mode 100644 index 0000000..8c1d712 Binary files /dev/null and b/examples/resources/algorithms/FPN_TensorFlow-0.1-py3-none-any.whl differ diff --git a/examples/resources/algorithms/FPN_Tensorflow.md b/examples/resources/algorithms/FPN_Tensorflow.md new file mode 100644 index 0000000..e69de29 diff --git a/examples/resources/datasets/pcb-aoi.md b/examples/resources/datasets/pcb-aoi.md new file mode 100644 index 0000000..e69de29 diff --git a/examples/resources/third_party/sedna-0.4.5-py3-none-any.whl b/examples/resources/third_party/sedna-0.4.5-py3-none-any.whl new file mode 100644 index 0000000..54e41a3 Binary files /dev/null and b/examples/resources/third_party/sedna-0.4.5-py3-none-any.whl differ diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9f7cec5 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +prettytable~=2.5.0 # BSD \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f28dec6 --- /dev/null +++ b/setup.py @@ -0,0 +1,117 @@ +# Copyright 2022 The KubeEdge Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Setuptools of Ianvs""" +import sys +import os + +from setuptools import setup, find_packages + +assert sys.version_info >= (3, 6), "Sorry, Python < 3.6 is not supported." + + +class InstallPrepare: + """ + Parsing dependencies + """ + + def __init__(self): + self.project = os.path.join(os.path.dirname(__file__), "core") + self._long_desc = os.path.join(self.project, "..", "README.md") + self._owner = os.path.join(self.project, "..", "OWNERS") + self._requirements = os.path.join(self.project, "..", "requirements.txt") + + @property + def long_desc(self): + if not os.path.isfile(self._long_desc): + return "" + with open(self._long_desc, "r", encoding="utf-8") as fh: + long_desc = fh.read() + return long_desc + + @property + def version(self): + default_version = "0.1.0" + return default_version + + @property + def owners(self): + default_owner = "ianvs" + if not os.path.isfile(self._owner): + return default_owner + with open(self._owner, "r", encoding="utf-8") as fh: + check, approvers = False, set() + for line in fh: + if not line.strip(): + continue + if check: + approvers.add(line.strip().split()[-1]) + check = (line.startswith("approvers:") or + (line.startswith(" -") and check)) + return ",".join(approvers) or default_owner + + @property + def basic_dependencies(self): + return self._read_requirements(self._requirements) + + @staticmethod + def _read_requirements(file_path, section="all"): + print(f"Start to install requirements of {section} " + f"in ianvs from {file_path}") + if not os.path.isfile(file_path): + return [] + with open(file_path, "r", encoding="utf-8") as f: + install_requires = [p.strip() for p in f.readlines() if p.strip()] + if section == "all": + return list(filter(lambda x: not x.startswith("#"), + install_requires)) + section_start = False + section_requires = [] + for p in install_requires: + if section_start: + if p.startswith("#"): + return section_requires + section_requires.append(p) + elif p.startswith(f"# {section}"): + section_start = True + return section_requires + + +_infos = InstallPrepare() + +setup( + name='ianvs', + version=_infos.version, + description="The ianvs package is designed to help algorithm developers \ + better do algorithm test.", + packages=find_packages(exclude=["tests", "*.tests", + "*.tests.*", "tests.*"]), + author=_infos.owners, + maintainer=_infos.owners, + maintainer_email="", + include_package_data=True, + entry_points={ + "console_scripts": ["ianvs = core.cmd.benchmarking:main"] + }, + python_requires=">=3.6", + long_description=_infos.long_desc, + long_description_content_type="text/markdown", + license="Apache License 2.0", + url="https://github.com/kubeedge/ianvs", + classifiers=[ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + "Operating System :: POSIX :: Linux", + ], +)