From c449a02eae166c8bbd37a3764046013b1b5f987c Mon Sep 17 00:00:00 2001 From: Victor Villas Date: Mon, 27 Apr 2020 15:37:28 -0300 Subject: [PATCH 1/3] Add delete flag to s3 sync command --- Makefile | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index 0b79f913..2373f469 100644 --- a/Makefile +++ b/Makefile @@ -25,13 +25,11 @@ pack: 7z a ./functions/package.zip ./functions/*.py -stl s3-%: pack - # aws s3 rm $(bucket)-$*/$(prefix) --recursive - aws s3 sync --exclude '.*' --acl public-read . $(bucket)-$*/$(prefix) + aws s3 sync --delete --exclude '.*' --acl public-read . $(bucket)-$*/$(prefix) targets := $(addprefix s3-,$(regions)) sync: pack $(targets) - # aws s3 rm $(bucket)/$(prefix) --recursive - aws s3 sync --exclude '.*' --acl public-read . $(bucket)/$(prefix) + aws s3 sync --delete --exclude '.*' --acl public-read . $(bucket)/$(prefix) test: pack pytest -vv From a253ee213eadaaf830658ef8b15f2b2a3a0ea2a2 Mon Sep 17 00:00:00 2001 From: Victor Villas Date: Tue, 28 Apr 2020 10:21:59 -0300 Subject: [PATCH 2/3] Add sa-south-1 to ci deployment tests --- README.md | 37 +++++++++++++++---------------------- ci/taskcat.yaml | 3 ++- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/README.md b/README.md index 35272633..5038deda 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,7 @@ tinkered with, allowing it to be used in real production environments with little extra effort. Deploy in a few clicks, personalize in a few fields, configure in a few commands. + ## Overview ![stack diagram](/.github/img/stack-diagram.png) @@ -19,8 +20,7 @@ The stack is composed mainly of three services: the Airflow web server, the Airflow scheduler, and the Airflow worker. Supporting resources include an RDS to host the Airflow metadata database, an SQS to be used as broker backend, S3 buckets for logs and deployment bundles, an EFS to serve as shared directory, -and a custom CloudWatch metric measured by a timed AWS Lambda. All other -resources are the usual boilerplate to keep the wind blowing. +and a custom CloudWatch metric measured by a timed AWS Lambda. ### Deployment and File Sharing @@ -53,6 +53,7 @@ the latter is a very advanced scenario and would be best handled by Celery's own scaling mechanism or offloading the computation to another system (like Spark or Kubernetes) and use Airflow only for orchestration. + ## Get It Working ### 0. Prerequisites @@ -70,8 +71,8 @@ branch (defaults to your last used region): [![Launch](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/images/cloudformation-launch-stack-button.png)](https://console.aws.amazon.com/cloudformation/home#/stacks/new?templateURL=https://turbine-quickstart.s3.amazonaws.com/quickstart-turbine-airflow/templates/turbine-master.template) The stack resources take around 15 minutes to create, while the airflow -installation and bootstrap another 3 to 5 minutes. After that you can already -access the Airflow UI and deploy your own Airflow DAGs. +installation another 3 to 5 minutes. After that you can already access the +Airflow UI and deploy your own Airflow DAGs. ### 2. Upstream your files @@ -97,8 +98,6 @@ debug or just inspect the Airflow services and database. The stack is designed to minimize this need, but just in case it also offers decent internal tooling for those scenarios. -### Using Systems Manager Sessions - Instead of the usual SSH procedure, this stack encourages the use of AWS Systems Manager Sessions for increased security and auditing capabilities. You can still use the CLI after a bit more configuration and not having to expose your @@ -125,7 +124,7 @@ coming, or the `--no-pager` to directly dump the text lines, but it offers [much more](https://www.freedesktop.org/software/systemd/man/journalctl.html). ```bash -$ sudo journalctl -u airflow -n 50 +$ sudo journalctl -u airflow-scheduler -n 50 ``` @@ -144,26 +143,19 @@ $ sudo journalctl -u airflow -n 50 Workers have lifecycle hooks that make sure to wait for Celery to finish its tasks before allowing EC2 to terminate that instance (except maybe for Spot Instances going out of capacity). If you want to kill running tasks, you - will need to SSH into worker instances and stop the airflow service - forcefully. + will need to forcefully stop the airflow systemd services (via AWS Systems + Manager). 3. Is there any documentation around the architectural decisions? - Yes, most of them should be available in the project's GitHub - [Wiki](https://github.com/villasv/aws-airflow-stack/wiki). It doesn't mean - those decisions are final, but reading them beforehand will help formulating - new proposals. + Yes, they should be available in the project's GitHub [Wiki][]. It doesn't + mean those decisions are final, but reading them beforehand will help + formulating new proposals. -## Contributing +[Wiki]: https://github.com/villasv/aws-airflow-stack/wiki ->This project aims to be constantly evolving with up to date tooling and newer ->AWS features, as well as improving its design qualities and maintainability. ->Requests for Enhancement should be abundant and anyone is welcome to pick them ->up. -> ->Stacks can get quite opinionated. If you have a divergent fork, you may open a ->Request for Comments and we will index it. Hopefully this will help to build a ->diverse set of possible deployment models for various production needs. + +## Contributing See the [contribution guidelines](/CONTRIBUTING.md) for details. @@ -174,6 +166,7 @@ Did this project help you? Consider buying me a cup of coffee ;-) [![Buy me a coffee!](https://www.buymeacoffee.com/assets/img/custom_images/white_img.png)](https://www.buymeacoffee.com/villasv) + ## Licensing > MIT License diff --git a/ci/taskcat.yaml b/ci/taskcat.yaml index 0612a07c..58576249 100644 --- a/ci/taskcat.yaml +++ b/ci/taskcat.yaml @@ -8,7 +8,8 @@ tests: master: template: templates/turbine-master.template regions: + - sa-east-1 - us-east-1 - - us-east-2 + - us-west-1 parameters: QSS3BucketName: "$[taskcat_autobucket]" From 48461ba9bc22a758cbcecaf363f98e9a50f5f6dc Mon Sep 17 00:00:00 2001 From: Victor Villas Date: Tue, 28 Apr 2020 13:05:10 -0300 Subject: [PATCH 3/3] Add package installation to example project files Fixes #177 --- examples/project/airflow/appspec.yml | 3 ++ examples/project/airflow/dags/my_dag.py | 44 ++++++++++++++++--- examples/project/airflow/requirements.txt | 2 + .../project/airflow/scripts/cdapp_deps.sh | 2 + 4 files changed, 44 insertions(+), 7 deletions(-) create mode 100644 examples/project/airflow/requirements.txt create mode 100644 examples/project/airflow/scripts/cdapp_deps.sh diff --git a/examples/project/airflow/appspec.yml b/examples/project/airflow/appspec.yml index bdfe47b4..b3f5d66b 100644 --- a/examples/project/airflow/appspec.yml +++ b/examples/project/airflow/appspec.yml @@ -10,3 +10,6 @@ hooks: ApplicationStop: - location: scripts/cdapp_stop.sh runas: root + AfterInstall: + - location: scripts/cdapp_deps.sh + runas: root diff --git a/examples/project/airflow/dags/my_dag.py b/examples/project/airflow/dags/my_dag.py index f1e2f0f9..6f510cdd 100644 --- a/examples/project/airflow/dags/my_dag.py +++ b/examples/project/airflow/dags/my_dag.py @@ -1,17 +1,47 @@ -from datetime import datetime +from datetime import datetime, timedelta from airflow import DAG from airflow.operators.bash_operator import BashOperator +from airflow.operators.python_operator import PythonOperator +import silly default_args = { "start_date": datetime(2019, 1, 1), } -dag = DAG(dag_id="my_dag", default_args=default_args, schedule_interval="@daily",) +with DAG( + "my_dag", default_args=default_args, schedule_interval=timedelta(days=1) +) as dag: -for i in range(5): - task = BashOperator( - task_id="runme_" + str(i), - bash_command='echo "{{ task_instance_key_str }}" && sleep 5 && echo "done"', - dag=dag, + setup_task = BashOperator( + task_id="setup", + bash_command='echo "setup initiated" && sleep 5 && echo "done"', ) + + def fetch_companies(): + return [silly.company(capitalize=True) for _ in range(5)] + + fetch_companies_task = PythonOperator( + task_id="fetch_companies", python_callable=fetch_companies, + ) + setup_task >> fetch_companies_task + + def generate_reports(**context): + companies = context["task_instance"].xcom_pull(task_ids="fetch_companies") + reports = [ + f"# '{company}' Report\n\n{silly.markdown()}" for company in companies + ] + return reports + + generate_reports_task = PythonOperator( + task_id="generate_reports", + python_callable=generate_reports, + provide_context=True, + ) + fetch_companies_task >> generate_reports_task + + teardown_task = BashOperator( + task_id="teardown", + bash_command='echo "teardown initiated" && sleep 5 && echo "done"', + ) + generate_reports_task >> teardown_task diff --git a/examples/project/airflow/requirements.txt b/examples/project/airflow/requirements.txt new file mode 100644 index 00000000..3cc7010e --- /dev/null +++ b/examples/project/airflow/requirements.txt @@ -0,0 +1,2 @@ +apache-airflow[aws]==1.10.10 +silly diff --git a/examples/project/airflow/scripts/cdapp_deps.sh b/examples/project/airflow/scripts/cdapp_deps.sh new file mode 100644 index 00000000..b94f6e9d --- /dev/null +++ b/examples/project/airflow/scripts/cdapp_deps.sh @@ -0,0 +1,2 @@ +#!/bin/bash -e +pip3 install -r /airflow/requirements.txt