From 521adc1450b3c0be2f95b010ea5fbdb4506ff115 Mon Sep 17 00:00:00 2001 From: nvliyuan <84758614+nvliyuan@users.noreply.github.com> Date: Wed, 18 Aug 2021 15:12:50 +0800 Subject: [PATCH 1/3] add signoff checks (#8) Signed-off-by: Yuan Li Co-authored-by: Yuan Li --- .github/ISSUE_TEMPLATE/bug_report.md | 22 +++++++ .github/workflow/signoff-check.yml | 34 ++++++++++ .github/workflow/signoff-check/Dockerfile | 22 +++++++ .github/workflow/signoff-check/action.yml | 19 ++++++ .github/workflow/signoff-check/signoff-check | 69 ++++++++++++++++++++ 5 files changed, 166 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md create mode 100644 .github/workflow/signoff-check.yml create mode 100644 .github/workflow/signoff-check/Dockerfile create mode 100644 .github/workflow/signoff-check/action.yml create mode 100644 .github/workflow/signoff-check/signoff-check diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 000000000..286283614 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,22 @@ +--- +name: Bug report +about: Create a report to help us improve +title: '' +labels: '' +assignees: GaryShen2008 + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**Steps/Code to reproduce bug** +Please provide a list of steps or a code sample to reproduce the issue. +Avoid posting private or sensitive data. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Environment details (please complete the following information)** + - Environment location: [Standalone, YARN, Kubernetes, Cloud(specify cloud provider)] + - Spark configuration settings related to the issue \ No newline at end of file diff --git a/.github/workflow/signoff-check.yml b/.github/workflow/signoff-check.yml new file mode 100644 index 000000000..a673fc4ab --- /dev/null +++ b/.github/workflow/signoff-check.yml @@ -0,0 +1,34 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A workflow to check if PR got sign-off +name: signoff check + +on: + pull_request_target: + types: [opened, synchronize, reopened] + +jobs: + signoff-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: sigoff-check job + uses: ./.github/workflows/signoff-check + env: + OWNER: NVIDIA + REPO_NAME: spark-xgboost-examples + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PULL_NUMBER: ${{ github.event.number }} diff --git a/.github/workflow/signoff-check/Dockerfile b/.github/workflow/signoff-check/Dockerfile new file mode 100644 index 000000000..b4ab6cf77 --- /dev/null +++ b/.github/workflow/signoff-check/Dockerfile @@ -0,0 +1,22 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM python:alpine + +WORKDIR / +COPY signoff-check . +RUN pip install PyGithub && chmod +x /signoff-check + +# require envs: OWNER,REPO_NAME,GITHUB_TOKEN,PULL_NUMBER +ENTRYPOINT ["/signoff-check"] \ No newline at end of file diff --git a/.github/workflow/signoff-check/action.yml b/.github/workflow/signoff-check/action.yml new file mode 100644 index 000000000..93fe0b362 --- /dev/null +++ b/.github/workflow/signoff-check/action.yml @@ -0,0 +1,19 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +name: 'signoff check action' +description: 'check if PR got signed off' +runs: + using: 'docker' + image: 'Dockerfile' \ No newline at end of file diff --git a/.github/workflow/signoff-check/signoff-check b/.github/workflow/signoff-check/signoff-check new file mode 100644 index 000000000..54afa22e8 --- /dev/null +++ b/.github/workflow/signoff-check/signoff-check @@ -0,0 +1,69 @@ +#!/usr/bin/env python + +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A signoff check +The tool checks if any commit got signoff in a pull request. +NOTE: this script is for github actions only, you should not use it anywhere else. +""" +import os +import re +import sys +from argparse import ArgumentParser + +from github import Github + +SIGNOFF_REGEX = re.compile('Signed-off-by:') + + +def signoff(token: str, owner: str, repo_name: str, pull_number: int): + gh = Github(token, per_page=100, user_agent='signoff-check', verify=True) + pr = gh.get_repo(f"{owner}/{repo_name}").get_pull(pull_number) + for c in pr.get_commits(): + if SIGNOFF_REGEX.search(c.commit.message): + print('Found signoff.\n') + print(f"Commit sha:\n{c.commit.sha}") + print(f"Commit message:\n{c.commit.message}") + return True + return False + + +def main(token: str, owner: str, repo_name: str, pull_number: int): + try: + if not signoff(token, owner, repo_name, pull_number): + raise Exception('No commits w/ signoff') + except Exception as e: # pylint: disable=broad-except + print(e) + sys.exit(1) + + +if __name__ == '__main__': + parser = ArgumentParser(description="signoff check") + parser.add_argument("--owner", help="repo owner", default='') + parser.add_argument("--repo_name", help="repo name", default='') + parser.add_argument("--token", help="github token, will use GITHUB_TOKEN if empty", default='') + parser.add_argument("--pull_number", help="pull request number", type=int) + args = parser.parse_args() + + GITHUB_TOKEN = args.token if args.token else os.environ.get('GITHUB_TOKEN') + assert GITHUB_TOKEN, 'env GITHUB_TOKEN should not be empty' + OWNER = args.owner if args.owner else os.environ.get('OWNER') + assert OWNER, 'env OWNER should not be empty' + REPO_NAME = args.repo_name if args.repo_name else os.environ.get('REPO_NAME') + assert REPO_NAME, 'env REPO_NAME should not be empty' + PULL_NUMBER = args.pull_number if args.pull_number else int(os.environ.get('PULL_NUMBER')) + assert PULL_NUMBER, 'env PULL_NUMBER should not be empty' + + main(token=GITHUB_TOKEN, owner=OWNER, repo_name=REPO_NAME, pull_number=PULL_NUMBER) \ No newline at end of file From 9ab41665793c8135315847c2fa8b85e5aed97af3 Mon Sep 17 00:00:00 2001 From: nvliyuan <84758614+nvliyuan@users.noreply.github.com> Date: Wed, 18 Aug 2021 16:09:32 +0800 Subject: [PATCH 2/3] fix error path names (#10) * fix error path names Signed-off-by: Yuan Li * fix error paths Signed-off-by: Yuan Li Co-authored-by: Yuan Li --- .github/workflow/signoff-check.yml | 2 +- .../xgboost-examples/building-sample-apps/python.md | 2 +- .../xgboost-examples/building-sample-apps/scala.md | 4 ++-- ...ok-for-rapids-spark-xgboost-on-databricks-gpu-7.0-ml.ipynb | 2 +- .../xgboost-examples/on-prem-cluster/kubernetes.md | 4 ++-- 5 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflow/signoff-check.yml b/.github/workflow/signoff-check.yml index a673fc4ab..40d36786b 100644 --- a/.github/workflow/signoff-check.yml +++ b/.github/workflow/signoff-check.yml @@ -29,6 +29,6 @@ jobs: uses: ./.github/workflows/signoff-check env: OWNER: NVIDIA - REPO_NAME: spark-xgboost-examples + REPO_NAME: spark-rapids-examples GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} PULL_NUMBER: ${{ github.event.number }} diff --git a/docs/get-started/xgboost-examples/building-sample-apps/python.md b/docs/get-started/xgboost-examples/building-sample-apps/python.md index 4e6e79948..0563bc612 100644 --- a/docs/get-started/xgboost-examples/building-sample-apps/python.md +++ b/docs/get-started/xgboost-examples/building-sample-apps/python.md @@ -21,4 +21,4 @@ Two files are required by PySpark: + *main.py* - entrypoint for PySpark, you can find it in 'spark-xgboost-examples/examples' folder + entrypoint for PySpark, you can find it in 'spark-rapids-examples/examples' folder diff --git a/docs/get-started/xgboost-examples/building-sample-apps/scala.md b/docs/get-started/xgboost-examples/building-sample-apps/scala.md index eed6de309..6ddf70795 100644 --- a/docs/get-started/xgboost-examples/building-sample-apps/scala.md +++ b/docs/get-started/xgboost-examples/building-sample-apps/scala.md @@ -7,8 +7,8 @@ The examples rely on [XGBoost](https://github.com/nvidia/spark-xgboost). Follow these steps to build the Scala jars: ``` bash -git clone https://github.com/NVIDIA/spark-xgboost-examples.git -cd spark-xgboost-examples/examples +git clone https://github.com/NVIDIA/spark-rapids-examples.git +cd spark-rapids-examples/examples mvn package ``` diff --git a/docs/get-started/xgboost-examples/csp/databricks/init-notebook-for-rapids-spark-xgboost-on-databricks-gpu-7.0-ml.ipynb b/docs/get-started/xgboost-examples/csp/databricks/init-notebook-for-rapids-spark-xgboost-on-databricks-gpu-7.0-ml.ipynb index 1064f1d9c..934627b0a 100644 --- a/docs/get-started/xgboost-examples/csp/databricks/init-notebook-for-rapids-spark-xgboost-on-databricks-gpu-7.0-ml.ipynb +++ b/docs/get-started/xgboost-examples/csp/databricks/init-notebook-for-rapids-spark-xgboost-on-databricks-gpu-7.0-ml.ipynb @@ -132,7 +132,7 @@ "1. Edit your cluster, adding an initialization script from `dbfs:/databricks/init_scripts/init.sh` in the \"Advanced Options\" under \"Init Scripts\" tab\n", "2. Reboot the cluster\n", "3. Go to \"Libraries\" tab under your cluster and install `dbfs:/FileStore/jars/xgboost4j-spark_3.0-1.3.0-0.1.0.jar` in your cluster by selecting the \"DBFS\" option for installing jars\n", - "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-xgboost-examples/blob/spark-3/examples/notebooks/python/mortgage-gpu.ipynb`\n", + "4. Import the mortgage example notebook from `https://github.com/NVIDIA/spark-rapids-examples/blob/branch-21.06/examples/mortgage/notebooks/python/mortgage-gpu.ipynb`\n", "5. Inside the mortgage example notebook, update the data paths\n", " `train_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-train.csv')`\n", " `trans_data = reader.schema(schema).option('header', True).csv('/data/mortgage/csv/small-trans.csv')`" diff --git a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes.md b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes.md index 9b2e3367e..b668dc1b7 100644 --- a/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes.md +++ b/docs/get-started/xgboost-examples/on-prem-cluster/kubernetes.md @@ -26,7 +26,7 @@ Build a GPU Docker image with Spark resources in it, this Docker image must be a 1. Locate your Spark installations. If you don't have one, you can [download](https://spark.apache.org/downloads.html) from Apache and unzip it. 2. `export SPARK_HOME=` -3. [Download the Dockerfile](https://github.com/rapidsai/spark-xgboost-examples/Dockerfile) into `${SPARK_HOME}` (Here CUDA 10.2 is used as an example in the Dockerfile, you may need to update it for other CUDA versions.) +3. [Download the Dockerfile](/dockerfile/Dockerfile) into `${SPARK_HOME}` (Here CUDA 10.2 is used as an example in the Dockerfile, you may need to update it for other CUDA versions.) 4. __(OPTIONAL)__ install any additional library jars into the `${SPARK_HOME}/jars` directory * Most public cloud file systems are not natively supported -- pulling data and jar files from S3, GCS, etc. require installing additional libraries 5. Build and push the docker image @@ -37,7 +37,7 @@ export SPARK_DOCKER_IMAGE= export SPARK_DOCKER_TAG= pushd ${SPARK_HOME} -wget https://github.com/NVIDIA/spark-xgboost-examples/raw/spark-3/Dockerfile +wget https://github.com/NVIDIA/spark-rapids-examples/raw/branch-21.06/dockerfile/Dockerfile # Optionally install additional jars into ${SPARK_HOME}/jars/ From 5f65c3002485d1622c1ef3aeae9a0530d14d7196 Mon Sep 17 00:00:00 2001 From: nvliyuan <84758614+nvliyuan@users.noreply.github.com> Date: Wed, 18 Aug 2021 17:07:32 +0800 Subject: [PATCH 3/3] Update python.md update building docs --- .../xgboost-examples/building-sample-apps/python.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/get-started/xgboost-examples/building-sample-apps/python.md b/docs/get-started/xgboost-examples/building-sample-apps/python.md index 0563bc612..ef1524338 100644 --- a/docs/get-started/xgboost-examples/building-sample-apps/python.md +++ b/docs/get-started/xgboost-examples/building-sample-apps/python.md @@ -5,8 +5,8 @@ Follow these steps to package the Python zip file: ``` bash -git clone https://gitlab-master.nvidia.com/nvspark/spark-examples2.git -cd spark-examples2/scripts/building +git clone https://github.com/NVIDIA/spark-rapids-examples.git +cd spark-rapids-examples/scripts/building sh python_build.sh ```