From c95467881706f22654f6370f2df286fe1c072705 Mon Sep 17 00:00:00 2001 From: Jakub Filipczak Date: Wed, 18 Aug 2021 16:27:31 +0200 Subject: [PATCH 1/4] Initial commit for AWS Cloudformation Stacks Exporter --- .../.dockerignore | 1 + .../Dockerfile | 17 ++ .../README.md | 22 ++ .../aws-stack-states.cnf | 24 +++ .../aws_cfs_exporter.py | 200 ++++++++++++++++++ .../version.json | 3 + 6 files changed, 267 insertions(+) create mode 100644 exporters/aws-cloudformation-stacks-exporter/.dockerignore create mode 100644 exporters/aws-cloudformation-stacks-exporter/Dockerfile create mode 100644 exporters/aws-cloudformation-stacks-exporter/README.md create mode 100644 exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf create mode 100644 exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py create mode 100644 exporters/aws-cloudformation-stacks-exporter/version.json diff --git a/exporters/aws-cloudformation-stacks-exporter/.dockerignore b/exporters/aws-cloudformation-stacks-exporter/.dockerignore new file mode 100644 index 0000000..42061c0 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/.dockerignore @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/exporters/aws-cloudformation-stacks-exporter/Dockerfile b/exporters/aws-cloudformation-stacks-exporter/Dockerfile new file mode 100644 index 0000000..55c8b2a --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/Dockerfile @@ -0,0 +1,17 @@ +FROM registry.access.redhat.com/ubi8/python-38 + +# Add application sources with correct permissions for OpenShift +USER 0 +# Install the dependencies +RUN pip install --upgrade pip && \ + pip install prometheus-client boto3 python-benedict +ADD aws_cfs_exporter.py . +ADD aws-stack-states.cnf . +RUN chown -R 1001:0 ./ +RUN chmod 700 ./aws_cfs_exporter.py +USER 1001 +EXPOSE 8000 + + +# Run the application +# CMD ./aws_cfs_exporter.py diff --git a/exporters/aws-cloudformation-stacks-exporter/README.md b/exporters/aws-cloudformation-stacks-exporter/README.md new file mode 100644 index 0000000..20cf243 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/README.md @@ -0,0 +1,22 @@ +## AWS CloudFormation Stack Exporter ## +*** *** +This is a simple Prometheus Exporter that querries AWS API for number of CloudFormation Stacks in any state. + +## Building the exporter Docker image ## +Docker image should be based on provided Dockerfile, to build the image run that command from repository root directory: + + `export VERSION="0.1.1"; docker build -t aws-cloudformation-stacks-exporter:${VERSION} exporters/aws-cloudformation-stacks-exporter/` + +## Running the exporter and AWS credentials ## +Exporter uses AWS API directly, simplest way of injecting API keys is by mounting prepopulated .aws into the container: + + `docker run -p 8000:8000 -v /${HOME}/.aws:/home/exporter/.aws aws-cloudformation-stacks-exporter:0.1.1` + +Other options are: + +* -a APIKEY, --apikey APIKEY : AWS Access Key ID +* -s SECRETKEY, --secretkey SECRETKEY : AWS Sercet Access Key +* -r REGION(S), --regions REGION : AWS Region or list of comma separated regions to be used for queries +* -t TIME, --time TIME : Sleep time between fetching the AWS API input +* -d, --debug : Should we be more verbose? +* -p PORT, --port PORT : TCP port to be used to expose metrics HTTP endpoint diff --git a/exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf b/exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf new file mode 100644 index 0000000..a5b7f13 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf @@ -0,0 +1,24 @@ +## Configuration file listing all available CloudFormation Stack states +CREATE_COMPLETE +CREATE_IN_PROGRESS +CREATE_FAILED +DELETE_COMPLETE +DELETE_FAILED +DELETE_IN_PROGRESS +REVIEW_IN_PROGRESS +ROLLBACK_COMPLETE +ROLLBACK_FAILED +ROLLBACK_IN_PROGRESS +UPDATE_COMPLETE +UPDATE_COMPLETE_CLEANUP_IN_PROGRESS +UPDATE_FAILED +UPDATE_IN_PROGRESS +UPDATE_ROLLBACK_COMPLETE +UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS +UPDATE_ROLLBACK_FAILED +UPDATE_ROLLBACK_IN_PROGRESS +IMPORT_IN_PROGRESS +IMPORT_COMPLETE +IMPORT_ROLLBACK_IN_PROGRESS +IMPORT_ROLLBACK_FAILED +IMPORT_ROLLBACK_COMPLETE diff --git a/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py new file mode 100644 index 0000000..7534bb2 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py @@ -0,0 +1,200 @@ +#!/opt/app-root/bin/python + +import subprocess, os +from prometheus_client import start_http_server, Summary, Gauge, Counter +import argparse +import time +import boto3 +import botocore +from benedict import benedict + + +# calculate number of stacks with state breakdown +def getAwsStacks(cSessions,awsStackAvailableSt): + paginator = cSessions["cloudformation"].get_paginator("list_stacks") + paginatorCursor = paginator.paginate(PaginationConfig={"MaxItems": 10000}) + nrStacksPerState = {} + #Initialize temporary structure holding the data + for stacks in awsStackAvailableSt: + nrStacksPerState[stacks] = 0 + for page in paginatorCursor: + for stacks in page["StackSummaries"]: + for stackState in awsStackAvailableSt: + if stacks["StackStatus"] == stackState: + nrStacksPerState[stackState] = nrStacksPerState[stackState] +1 + return nrStacksPerState + +def getAccountID(): + awsSession = boto3.client("sts", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey) + awsReturns = awsSession.get_caller_identity() + return awsReturns["Account"] + +def getAccountAliasBasedonID(accountID): + return 0 + +## If we want to fetch the usage for all of the regions on given account +## we'll need to fetch a list of regions available on this particular AWS account +def getRegions(): + awsSession = boto3.client("ec2", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey, region_name="us-east-1") + awsReturns = awsSession.describe_regions() + if args.debug == True: + print("Regions fetched from active account: " + str(awsReturns)) + regions = [] + for page in awsReturns["Regions"]: + regions.append(page["RegionName"]) + if args.debug == True: + print("Adding " + str(page["RegionName"]) + " to the region list") + return regions + + +if __name__ == "__main__": + + # Fetch&parse args + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--apikey", help=" AWS Access Key ID ") + parser.add_argument("-s", "--secretkey", help=" AWS Sercet Access Key") + parser.add_argument("-r", "--regions", default="All", help="List of AWS Regions to be used for queries") + parser.add_argument( + "-t", "--time", type=int, default=900, help=" Sleep time between fetching the AWS API input" + ) + parser.add_argument("-d", "--debug", help=" Should we be more verbose?", action="store_true") + parser.add_argument( + "-p", "--port", default=8000, help=" TCP port to be used to expose metrics HTTP endpoint" + ) + args = parser.parse_args() + + promMetrics = {} + awsStackAvailableStates= [] + with open('./aws-stack-states.cnf') as confFile: + for line in confFile: + if line != None and "#" not in line: + state = line.strip() + awsStackAvailableStates.append(state) + promMetrics[state] = {} + print("Creating metric for " + state + " CF stack state") + apiCFMetricName = "aws_cloudformation_stack_in_" + state + apiCFMetricDesc = "Gauge set on number of CloudFormation Stacks in "+ state +" state" + promMetrics[state] = Gauge( + apiCFMetricName, apiCFMetricDesc, ["region", "accountid"] + ) + + ## Strip regions string from leading and trailing spaces + aRegions = str(args.regions).strip() + + ## Setting up basic variables + awsRegions = {} + awsRegionsList = [] + + ## slice the string if we find comma or space between regions names + if aRegions.find(" ") > 0: + awsRegionsList = aRegions.split("\s") + for region in awsRegionsList: + awsRegions[region] = {} + elif aRegions.find(",") > 0: + awsRegionsList = aRegions.split(",") + for region in awsRegionsList: + awsRegions[region] = {} + ## If no region was specified, we're defaulting to "All" + elif aRegions == "All": + print("Region parameter was not passed, fetching all available AWS Regions") + awsRegionsList = getRegions() + for region in awsRegionsList: + awsRegions[region] = {} + ## Falling back to a single specified region + else: + if args.debug == True: + print("Following AWS region will be scraped for data: ") + awsRegionsList.append(aRegions) + print(str(awsRegionsList)) + for region in awsRegionsList: + awsRegions[region] = {} + + # Getting AccountId + awsAccountID = getAccountID() + print("Exporter configured to calculate metrics on : " + str(awsAccountID)) + + ## Setting initial sessions, per region + for region in awsRegionsList: + awsRegions[region]["clientSession"] = {} + awsRegions[region]["clientSession"]["cloudformation"] = boto3.client( + "cloudformation", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + awsRegions[region]["clientSession"]["ec2"] = boto3.client( + "ec2", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + + ## Setting up Counter metrics to track AWS API call failures + # Setting variables + apiCallFailureMetricName = "aws_api_failed_requests_cloudformation" + apiCallFailureMetricDesc = "Counter set on failed AWS API calls" + apiCallSuccessMetricName = "aws_api_success_requests_cloudformation" + apiCallSuccessMetricDesc = "Counter set on succesfull AWS API calls" + # Initializing metrics + apiCallFails = Counter(apiCallFailureMetricName, apiCallFailureMetricDesc) + apiCallSuccess = Counter(apiCallSuccessMetricName, apiCallSuccessMetricDesc) + + # Resetting counters + apiCallFails.inc(0) + apiCallSuccess.inc(0) + + ## Initializing HTTP /metrics endpoint for Prometheus metrics + start_http_server(args.port) + print("Started AWS CloudFormation Stack State Exporter listening on port: " + str(args.port)) + + # Variables controlling the flow on main loop + initialRequestsCounter = 0 + warmUpPeriod = 1 + requestDelay = 0.5 + requestCounterHardStop = 8196 + + #if args.debug == True: + # print("Total of CloudFormation Stacks Metric/Label set to be calculated: " + # +str(len(awsRegionsList) * len(promMetrics["PrometheusMetrics"]))) + + ## Main loop, going through the regions and setting current metrics values for both value and usage + while True: + for region in awsRegionsList: + metricsValue = getAwsStacks(awsRegions[region]["clientSession"],awsStackAvailableStates) + # Looping through metrics definitions: + for state in awsStackAvailableStates: + try: + apiCallSuccess.inc() + apiCFMetricName = "aws_cloudformation_stack_in_" + state + for state in awsStackAvailableStates: + promMetrics[state].labels(region=region, accountid=awsAccountID).set(metricsValue[state]) + except botocore.exceptions.EndpointConnectionError as error: + apiCallFails.inc() + print(str(error)) + except botocore.exceptions.ClientError as error: + apiCallFails.inc() + print(str(error)) + + ## Initial Requests are executed quicker to ensure we got all values in metrics + #initialRequestsCounter = initialRequestsCounter + 1 + # Check if we completed initial run + # If so throttle down to delay value specified in command line + + if ( + initialRequestsCounter >= (len(awsRegionsList) * len(promMetrics.keys())) + and initialRequestsCounter != requestCounterHardStop): + + if args.debug == True: + print("Warmup completed after " + str(initialRequestsCounter) + ", throttling down") + requestDelay = args.time + warmUpPeriod = 0 + initialRequestsCounter = requestCounterHardStop + + if warmUpPeriod == 1: + initialRequestsCounter = initialRequestsCounter + 1 + + + ## Hardcoded sleep to ensure we don't choke on AWS API + time.sleep(0.5) + time.sleep(requestDelay) +exit() diff --git a/exporters/aws-cloudformation-stacks-exporter/version.json b/exporters/aws-cloudformation-stacks-exporter/version.json new file mode 100644 index 0000000..1159bb1 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/version.json @@ -0,0 +1,3 @@ +{ + "version": "v0.0.1" +} From 27e1d9369df30bb338ec0dedd8a1b5ce1c925c12 Mon Sep 17 00:00:00 2001 From: Jakub Filipczak Date: Fri, 20 Aug 2021 17:33:27 +0200 Subject: [PATCH 2/4] Fixed the metrics naming scheme to comply with Prometheus guidelines --- .../aws_cfs_exporter.py | 65 +++++++------------ 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py index 7534bb2..9453020 100644 --- a/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py +++ b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py @@ -25,7 +25,8 @@ def getAwsStacks(cSessions,awsStackAvailableSt): return nrStacksPerState def getAccountID(): - awsSession = boto3.client("sts", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey) + awsSession = boto3.client("sts", aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey) awsReturns = awsSession.get_caller_identity() return awsReturns["Account"] @@ -63,20 +64,18 @@ def getRegions(): ) args = parser.parse_args() - promMetrics = {} + apiCFMetricName = "aws_cloudformation_stacks_total" + apiCFMetricDesc = "Gauge set on number of CloudFormation Stacks (state,region and accountid in labels)" + #promMetrics[state] + CloudFormationPromMetric = Gauge(apiCFMetricName, apiCFMetricDesc, ["region", "accountid", "state"]) + awsStackAvailableStates= [] with open('./aws-stack-states.cnf') as confFile: for line in confFile: if line != None and "#" not in line: state = line.strip() awsStackAvailableStates.append(state) - promMetrics[state] = {} - print("Creating metric for " + state + " CF stack state") - apiCFMetricName = "aws_cloudformation_stack_in_" + state - apiCFMetricDesc = "Gauge set on number of CloudFormation Stacks in "+ state +" state" - promMetrics[state] = Gauge( - apiCFMetricName, apiCFMetricDesc, ["region", "accountid"] - ) + ## Strip regions string from leading and trailing spaces aRegions = str(args.regions).strip() @@ -153,48 +152,34 @@ def getRegions(): requestDelay = 0.5 requestCounterHardStop = 8196 - #if args.debug == True: - # print("Total of CloudFormation Stacks Metric/Label set to be calculated: " - # +str(len(awsRegionsList) * len(promMetrics["PrometheusMetrics"]))) - ## Main loop, going through the regions and setting current metrics values for both value and usage while True: for region in awsRegionsList: - metricsValue = getAwsStacks(awsRegions[region]["clientSession"],awsStackAvailableStates) - # Looping through metrics definitions: - for state in awsStackAvailableStates: - try: - apiCallSuccess.inc() - apiCFMetricName = "aws_cloudformation_stack_in_" + state - for state in awsStackAvailableStates: - promMetrics[state].labels(region=region, accountid=awsAccountID).set(metricsValue[state]) - except botocore.exceptions.EndpointConnectionError as error: + try: + metricsValue = getAwsStacks(awsRegions[region]["clientSession"],awsStackAvailableStates) + apiCallSuccess.inc() + for state in awsStackAvailableStates: + CloudFormationPromMetric.labels(state=state,region=region, accountid=awsAccountID).set(metricsValue[state]) + except botocore.exceptions.EndpointConnectionError as error: apiCallFails.inc() print(str(error)) - except botocore.exceptions.ClientError as error: + except botocore.exceptions.ClientError as error: apiCallFails.inc() print(str(error)) - ## Initial Requests are executed quicker to ensure we got all values in metrics - #initialRequestsCounter = initialRequestsCounter + 1 - # Check if we completed initial run - # If so throttle down to delay value specified in command line - - if ( - initialRequestsCounter >= (len(awsRegionsList) * len(promMetrics.keys())) - and initialRequestsCounter != requestCounterHardStop): + if ( + initialRequestsCounter >= (len(awsRegionsList) * len(awsStackAvailableStates)) + and initialRequestsCounter != requestCounterHardStop): - if args.debug == True: - print("Warmup completed after " + str(initialRequestsCounter) + ", throttling down") - requestDelay = args.time - warmUpPeriod = 0 - initialRequestsCounter = requestCounterHardStop + requestDelay = args.time + warmUpPeriod = 0 + initialRequestsCounter = requestCounterHardStop - if warmUpPeriod == 1: - initialRequestsCounter = initialRequestsCounter + 1 + if warmUpPeriod == 1: + initialRequestsCounter = initialRequestsCounter + 1 ## Hardcoded sleep to ensure we don't choke on AWS API - time.sleep(0.5) - time.sleep(requestDelay) + time.sleep(0.5) + time.sleep(requestDelay) exit() From 1c74c86c218fe747c48fc72fabc2890d33d9b2fc Mon Sep 17 00:00:00 2001 From: Jakub Filipczak Date: Fri, 27 Aug 2021 11:24:35 +0200 Subject: [PATCH 3/4] Fixed port variable typing --- .../aws-cloudformation-stacks-exporter/aws_cfs_exporter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py index 9453020..e387039 100644 --- a/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py +++ b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py @@ -143,7 +143,7 @@ def getRegions(): apiCallSuccess.inc(0) ## Initializing HTTP /metrics endpoint for Prometheus metrics - start_http_server(args.port) + start_http_server(int(args.port)) print("Started AWS CloudFormation Stack State Exporter listening on port: " + str(args.port)) # Variables controlling the flow on main loop From 369f4977c6f0ec3e8bc4e6902bb8e3a46f9b9a8a Mon Sep 17 00:00:00 2001 From: Jakub Filipczak Date: Mon, 30 Aug 2021 08:19:34 -0400 Subject: [PATCH 4/4] * Updated AWS CloudFormation Stacks exporter code * Updated automation to support the new exporter * Added external mount for Alertmanager to persist silences --- .../Dockerfile | 4 -- playbooks/infra-prometheus/setup-all.yml | 6 +- prometheus/generic/add-target/tasks/main.yml | 40 +++++++++++ .../templates/aws_cfs_exporter.yml.j2 | 4 ++ .../templates/aws_sq_exporter.yml.j2 | 4 ++ .../setup-alertmanager/tasks/docker.yml | 1 + .../README.md | 69 +++++++++++++++++++ .../defaults/main.yml | 13 ++++ .../tasks/docker.yml | 31 +++++++++ .../tasks/main.yml | 6 ++ .../tasks/prereqs.yml | 35 ++++++++++ .../templates/prometheus.yml.j2 | 13 +++- 12 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 create mode 100644 prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 create mode 100644 prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md create mode 100644 prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml create mode 100644 prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml create mode 100644 prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml create mode 100644 prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml diff --git a/exporters/aws-cloudformation-stacks-exporter/Dockerfile b/exporters/aws-cloudformation-stacks-exporter/Dockerfile index 55c8b2a..29b7578 100644 --- a/exporters/aws-cloudformation-stacks-exporter/Dockerfile +++ b/exporters/aws-cloudformation-stacks-exporter/Dockerfile @@ -11,7 +11,3 @@ RUN chown -R 1001:0 ./ RUN chmod 700 ./aws_cfs_exporter.py USER 1001 EXPOSE 8000 - - -# Run the application -# CMD ./aws_cfs_exporter.py diff --git a/playbooks/infra-prometheus/setup-all.yml b/playbooks/infra-prometheus/setup-all.yml index 6fefe9e..974d3de 100644 --- a/playbooks/infra-prometheus/setup-all.yml +++ b/playbooks/infra-prometheus/setup-all.yml @@ -22,6 +22,8 @@ - "{{ playbook_dir }}/../../prometheus/generic/setup-prometheus" - "{{ playbook_dir }}/../../prometheus/generic/setup-alertmanager" - "{{ playbook_dir }}/../../prometheus/generic/update-thresholds" + - "{{ playbook_dir }}/../../grafana/generic/setup-grafana" + #- "{{ playbook_dir }}/../../grafana/generic/configure-grafana-datasource" tags: - prometheus - alertmanager @@ -31,8 +33,6 @@ - name: Setup onboard exporters hosts: monitoring-hosts become: True - vars: - provision_state: "started" roles: - "{{ playbook_dir }}/../../prometheus/generic/setup-ssl-exporter" - "{{ playbook_dir }}/../../prometheus/generic/setup-ilo-exporter" @@ -41,6 +41,8 @@ - "{{ playbook_dir }}/../../prometheus/generic/setup-openstack-exporter" - "{{ playbook_dir }}/../../prometheus/generic/setup-junos-exporter" - "{{ playbook_dir }}/../../prometheus/generic/setup-openstack-exporter" + - "{{ playbook_dir }}/../../prometheus/generic/setup-aws-sq-exporter" + - "{{ playbook_dir }}/../../prometheus/generic/setup-aws-cloudwatch-stacks-exporter" tags: - exporters - onboard-exporters diff --git a/prometheus/generic/add-target/tasks/main.yml b/prometheus/generic/add-target/tasks/main.yml index 4109d8c..dfd139a 100644 --- a/prometheus/generic/add-target/tasks/main.yml +++ b/prometheus/generic/add-target/tasks/main.yml @@ -89,6 +89,24 @@ seuser: system_u setype: container_file_t +- name: create aws-sq-exporter_targets directory + file: + path: "/var/prometheus_targets/aws_sq_exporter_targets" + state: directory + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + +- name: create aws_cfs_exporter_targets directory + file: + path: "/var/prometheus_targets/aws_cfs_exporter_targets" + state: directory + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + - name: create federated_prometheus_targets directory file: path: "/var/prometheus_targets/federated_targets" @@ -120,6 +138,28 @@ loop: "{{ groups['prometheus_target_haproxy'] }}" when: "'prometheus_target_haproxy' in groups" +- name: template the aws-sq-exporter_targets + template: + src: aws_sq_exporter.yml.j2 + dest: "/var/prometheus_targets/aws_sq_exporter_targets/aws-sq-exporter_target_{{ item.awsAccount }}.yml" + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + loop: "{{ ansible_sq_exporter }}" + when: "'monitoring-aws-sq-exporter' in groups" + +- name: template the aws-cfs-exporter_targets + template: + src: aws_cfs_exporter.yml.j2 + dest: "/var/prometheus_targets/aws_cfs_exporter_targets/aws-cfs-exporter_target_{{ item.awsAccount }}.yml" + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + loop: "{{ ansible_cfs_exporter }}" + when: "'monitoring-aws-cfs-exporter' in groups" + - name: template the bind_targets template: src: bind_target.yml.j2 diff --git a/prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 b/prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 new file mode 100644 index 0000000..dfa20d6 --- /dev/null +++ b/prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 @@ -0,0 +1,4 @@ +- targets: + - {{ ansible_ssh_host }}:{{ item.port }} + labels: + name: 'AWS CFS Exporter {{ item.awsAccount }}' diff --git a/prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 b/prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 new file mode 100644 index 0000000..f7fb3a7 --- /dev/null +++ b/prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 @@ -0,0 +1,4 @@ +- targets: + - {{ ansible_ssh_host }}:{{ item.port }} + labels: + name: 'AWS SQ Exporter {{ item.awsAccount }}' diff --git a/prometheus/generic/setup-alertmanager/tasks/docker.yml b/prometheus/generic/setup-alertmanager/tasks/docker.yml index 5b93c74..5411dd6 100644 --- a/prometheus/generic/setup-alertmanager/tasks/docker.yml +++ b/prometheus/generic/setup-alertmanager/tasks/docker.yml @@ -38,5 +38,6 @@ - "{{ alertmanager_port }}:9093" volumes: - "{{ monitoring_config_dir }}/alertmanager.yml:/etc/alertmanager/alertmanager.yml:Z" + - "/var/alertmanager:/alertmanager:Z"" state: "{{ provision_state }}" recreate: yes diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md new file mode 100644 index 0000000..08ace8a --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md @@ -0,0 +1,69 @@ +setup-aws-cloudformation-stacks-exporter +========= + +This role will instantiate a AWS Cloudformation Stacks Exporter container on targeted hosts. Role accepts a list of AWS accounts to monitor, and will spin up one Docker container per account. + +Requirements +------------ + +Docker must be available and running on the targeted hosts. + +Role Variables +-------------- +## Default values of variables: +``` +--- +aws_cfs_exporter_image: 'prom/aws-cloudformation-stacks-exporter' +aws_cfs_exporter_image_version: 'latest' +aws_cfs_exporter_port: '8080' + +provision_state: "started" + +ansible_cfs_exporter: + - awsAccount: "Dummy-Account" + port: 9420 + apikey: 22222 + secretkey: 3333 + regions: "us-east-1,us-east-2" + debug: false +``` +``` +aws_cfs_exporter_image - The AWS CFS Exporter image to deploy. +aws_cfs_exporter_image_version - The image tag to deploy. +aws_cfs_exporter_port - The port to be exposed on container. +provision_state - Options: [absent, killed, present, reloaded, restarted, **started** (default), stopped] + +ansible_cfs_exporter: - variable holding individual account configuration + - awsAccount: "Dummy-Account" - AWS Account alias + port: 9420 - Port on which this specific container will be exposed for metrics scraping + apikey: 22222 - AWS Account API Key + secretkey: 3333 - AWS Account SecretKey + regions: "ex1,ex2" - Commaseparated list of regions to query for Cloudformations Stack statuses + debug: false - Increase logging verbosity +``` + + +Dependencies +------------ +``` +python >= 2.6 +docker-py >= 0.3.0 +The docker server >= 0.10.0 +``` + +Example Playbook +---------------- +``` +- name: Setup AWS CFS Exporter + hosts: prometheus_master + become: True + vars: + provision_state: "started" + roles: + - prometheus/generic/setup-aws-cloudformation-stacks-exporter +``` + +License +------- + +BSD diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml new file mode 100644 index 0000000..b39dc19 --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml @@ -0,0 +1,13 @@ +--- +aws_cfs_exporter_image: 'prom/aws-cloudformation-stacks-exporter' +aws_cfs_exporter_image_version: 'latest' +aws_cfs_exporter_port: '8080' + +provision_state: "started" + +ansible_cfs_exporter: + - awsAccount: "Dummy-Account" + port: 9420 + apikey: 22222 + secretkey: 3333 + regions: "us-east-1" diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml new file mode 100644 index 0000000..475933a --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml @@ -0,0 +1,31 @@ +--- + +- name: Enable firewalld + service: + name: firewalld + enabled: yes + state: started + +- name: Open Firewall for Prometheus + firewalld: + port: "{{ item.port }}/tcp" + permanent: yes + state: enabled + immediate: yes + loop: "{{ ansible_cfs_exporter }}" + +- name: Run AWS SQ Exporter Docker container + docker_container: + name: "aws-cfs-exporter-{{ item.awsAccount }}" + image: "{{ aws_cfs_exporter_image }}:{{ aws_cfs_exporter_image_version }}" + restart_policy: unless-stopped + network_mode: host + state: "{{ provision_state }}" + command: | + /opt/app-root/src/aws_cfs_exporter.py + --apikey "{{ item.apikey}}" + --secretkey "{{ item.secretkey }}" + --regions "{{ item.regions}}" + --port "{{ item.port }}" + restart: yes + loop: "{{ ansible_cfs_exporter }}" diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml new file mode 100644 index 0000000..5affcdf --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml @@ -0,0 +1,6 @@ +--- +- name: Run prereqs + import_tasks: prereqs.yml + +- name: Run the docker images + import_tasks: docker.yml diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml new file mode 100644 index 0000000..213f376 --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml @@ -0,0 +1,35 @@ +--- +- name: "install EPEL GPG key - if specified" + rpm_key: + key: "{{ monitoring_host_epel_gpg_download_url }}" + state: present + when: + - monitoring_host_epel_gpg_download_url is defined + - monitoring_host_epel_gpg_download_url|trim != '' + - monitoring_host_epel_disable_gpg_check|lower == 'no' + +- name: "install epel-release" + yum: + name: "{{ monitoring_host_epel_download_url }}" + state: present + disable_gpg_check: "{{ monitoring_host_epel_disable_gpg_check | default('no') }}" + +- name: Ensure epel-release is installed + yum: + name: "{{ item }}" + state: present + with_items: + - epel-release + +- name: Ensure pip is installed + yum: + name: "{{ item }}" + state: present + with_items: + - python-pip + +- name: Install required python libraries + pip: + name: "docker-py" + state: present + diff --git a/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 b/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 index 6eb3067..986a8d4 100644 --- a/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 +++ b/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 @@ -31,6 +31,18 @@ scrape_configs: - files: - /etc/prometheus/targets/node_targets/*.yml + - job_name: 'aws_sq_exporter' + scrape_interval: 60s + file_sd_configs: + - files: + - /etc/prometheus/targets/aws_sq_exporter_targets/*.yml + + - job_name: 'aws_cloudformation_stacks_exporter' + scrape_interval: 60s + file_sd_configs: + - files: + - /etc/prometheus/targets/aws_cfs_exporter_targets/*.yml + {% if (groups['monitoring-hosts'] |length ) > 1 %} - job_name: 'federate-sanity-check' scrape_interval: 15s @@ -59,7 +71,6 @@ scrape_configs: - files: - /etc/prometheus/targets/federated_targets/*.yml - - job_name: 'haproxy_exporter' scrape_interval: 5s file_sd_configs: