diff --git a/exporters/aws-cloudformation-stacks-exporter/.dockerignore b/exporters/aws-cloudformation-stacks-exporter/.dockerignore new file mode 100644 index 0000000..42061c0 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/.dockerignore @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/exporters/aws-cloudformation-stacks-exporter/Dockerfile b/exporters/aws-cloudformation-stacks-exporter/Dockerfile new file mode 100644 index 0000000..29b7578 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/Dockerfile @@ -0,0 +1,13 @@ +FROM registry.access.redhat.com/ubi8/python-38 + +# Add application sources with correct permissions for OpenShift +USER 0 +# Install the dependencies +RUN pip install --upgrade pip && \ + pip install prometheus-client boto3 python-benedict +ADD aws_cfs_exporter.py . +ADD aws-stack-states.cnf . +RUN chown -R 1001:0 ./ +RUN chmod 700 ./aws_cfs_exporter.py +USER 1001 +EXPOSE 8000 diff --git a/exporters/aws-cloudformation-stacks-exporter/README.md b/exporters/aws-cloudformation-stacks-exporter/README.md new file mode 100644 index 0000000..20cf243 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/README.md @@ -0,0 +1,22 @@ +## AWS CloudFormation Stack Exporter ## +*** *** +This is a simple Prometheus Exporter that querries AWS API for number of CloudFormation Stacks in any state. + +## Building the exporter Docker image ## +Docker image should be based on provided Dockerfile, to build the image run that command from repository root directory: + + `export VERSION="0.1.1"; docker build -t aws-cloudformation-stacks-exporter:${VERSION} exporters/aws-cloudformation-stacks-exporter/` + +## Running the exporter and AWS credentials ## +Exporter uses AWS API directly, simplest way of injecting API keys is by mounting prepopulated .aws into the container: + + `docker run -p 8000:8000 -v /${HOME}/.aws:/home/exporter/.aws aws-cloudformation-stacks-exporter:0.1.1` + +Other options are: + +* -a APIKEY, --apikey APIKEY : AWS Access Key ID +* -s SECRETKEY, --secretkey SECRETKEY : AWS Sercet Access Key +* -r REGION(S), --regions REGION : AWS Region or list of comma separated regions to be used for queries +* -t TIME, --time TIME : Sleep time between fetching the AWS API input +* -d, --debug : Should we be more verbose? +* -p PORT, --port PORT : TCP port to be used to expose metrics HTTP endpoint diff --git a/exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf b/exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf new file mode 100644 index 0000000..a5b7f13 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/aws-stack-states.cnf @@ -0,0 +1,24 @@ +## Configuration file listing all available CloudFormation Stack states +CREATE_COMPLETE +CREATE_IN_PROGRESS +CREATE_FAILED +DELETE_COMPLETE +DELETE_FAILED +DELETE_IN_PROGRESS +REVIEW_IN_PROGRESS +ROLLBACK_COMPLETE +ROLLBACK_FAILED +ROLLBACK_IN_PROGRESS +UPDATE_COMPLETE +UPDATE_COMPLETE_CLEANUP_IN_PROGRESS +UPDATE_FAILED +UPDATE_IN_PROGRESS +UPDATE_ROLLBACK_COMPLETE +UPDATE_ROLLBACK_COMPLETE_CLEANUP_IN_PROGRESS +UPDATE_ROLLBACK_FAILED +UPDATE_ROLLBACK_IN_PROGRESS +IMPORT_IN_PROGRESS +IMPORT_COMPLETE +IMPORT_ROLLBACK_IN_PROGRESS +IMPORT_ROLLBACK_FAILED +IMPORT_ROLLBACK_COMPLETE diff --git a/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py new file mode 100644 index 0000000..e387039 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/aws_cfs_exporter.py @@ -0,0 +1,185 @@ +#!/opt/app-root/bin/python + +import subprocess, os +from prometheus_client import start_http_server, Summary, Gauge, Counter +import argparse +import time +import boto3 +import botocore +from benedict import benedict + + +# calculate number of stacks with state breakdown +def getAwsStacks(cSessions,awsStackAvailableSt): + paginator = cSessions["cloudformation"].get_paginator("list_stacks") + paginatorCursor = paginator.paginate(PaginationConfig={"MaxItems": 10000}) + nrStacksPerState = {} + #Initialize temporary structure holding the data + for stacks in awsStackAvailableSt: + nrStacksPerState[stacks] = 0 + for page in paginatorCursor: + for stacks in page["StackSummaries"]: + for stackState in awsStackAvailableSt: + if stacks["StackStatus"] == stackState: + nrStacksPerState[stackState] = nrStacksPerState[stackState] +1 + return nrStacksPerState + +def getAccountID(): + awsSession = boto3.client("sts", aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey) + awsReturns = awsSession.get_caller_identity() + return awsReturns["Account"] + +def getAccountAliasBasedonID(accountID): + return 0 + +## If we want to fetch the usage for all of the regions on given account +## we'll need to fetch a list of regions available on this particular AWS account +def getRegions(): + awsSession = boto3.client("ec2", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey, region_name="us-east-1") + awsReturns = awsSession.describe_regions() + if args.debug == True: + print("Regions fetched from active account: " + str(awsReturns)) + regions = [] + for page in awsReturns["Regions"]: + regions.append(page["RegionName"]) + if args.debug == True: + print("Adding " + str(page["RegionName"]) + " to the region list") + return regions + + +if __name__ == "__main__": + + # Fetch&parse args + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--apikey", help=" AWS Access Key ID ") + parser.add_argument("-s", "--secretkey", help=" AWS Sercet Access Key") + parser.add_argument("-r", "--regions", default="All", help="List of AWS Regions to be used for queries") + parser.add_argument( + "-t", "--time", type=int, default=900, help=" Sleep time between fetching the AWS API input" + ) + parser.add_argument("-d", "--debug", help=" Should we be more verbose?", action="store_true") + parser.add_argument( + "-p", "--port", default=8000, help=" TCP port to be used to expose metrics HTTP endpoint" + ) + args = parser.parse_args() + + apiCFMetricName = "aws_cloudformation_stacks_total" + apiCFMetricDesc = "Gauge set on number of CloudFormation Stacks (state,region and accountid in labels)" + #promMetrics[state] + CloudFormationPromMetric = Gauge(apiCFMetricName, apiCFMetricDesc, ["region", "accountid", "state"]) + + awsStackAvailableStates= [] + with open('./aws-stack-states.cnf') as confFile: + for line in confFile: + if line != None and "#" not in line: + state = line.strip() + awsStackAvailableStates.append(state) + + + ## Strip regions string from leading and trailing spaces + aRegions = str(args.regions).strip() + + ## Setting up basic variables + awsRegions = {} + awsRegionsList = [] + + ## slice the string if we find comma or space between regions names + if aRegions.find(" ") > 0: + awsRegionsList = aRegions.split("\s") + for region in awsRegionsList: + awsRegions[region] = {} + elif aRegions.find(",") > 0: + awsRegionsList = aRegions.split(",") + for region in awsRegionsList: + awsRegions[region] = {} + ## If no region was specified, we're defaulting to "All" + elif aRegions == "All": + print("Region parameter was not passed, fetching all available AWS Regions") + awsRegionsList = getRegions() + for region in awsRegionsList: + awsRegions[region] = {} + ## Falling back to a single specified region + else: + if args.debug == True: + print("Following AWS region will be scraped for data: ") + awsRegionsList.append(aRegions) + print(str(awsRegionsList)) + for region in awsRegionsList: + awsRegions[region] = {} + + # Getting AccountId + awsAccountID = getAccountID() + print("Exporter configured to calculate metrics on : " + str(awsAccountID)) + + ## Setting initial sessions, per region + for region in awsRegionsList: + awsRegions[region]["clientSession"] = {} + awsRegions[region]["clientSession"]["cloudformation"] = boto3.client( + "cloudformation", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + awsRegions[region]["clientSession"]["ec2"] = boto3.client( + "ec2", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + + ## Setting up Counter metrics to track AWS API call failures + # Setting variables + apiCallFailureMetricName = "aws_api_failed_requests_cloudformation" + apiCallFailureMetricDesc = "Counter set on failed AWS API calls" + apiCallSuccessMetricName = "aws_api_success_requests_cloudformation" + apiCallSuccessMetricDesc = "Counter set on succesfull AWS API calls" + # Initializing metrics + apiCallFails = Counter(apiCallFailureMetricName, apiCallFailureMetricDesc) + apiCallSuccess = Counter(apiCallSuccessMetricName, apiCallSuccessMetricDesc) + + # Resetting counters + apiCallFails.inc(0) + apiCallSuccess.inc(0) + + ## Initializing HTTP /metrics endpoint for Prometheus metrics + start_http_server(int(args.port)) + print("Started AWS CloudFormation Stack State Exporter listening on port: " + str(args.port)) + + # Variables controlling the flow on main loop + initialRequestsCounter = 0 + warmUpPeriod = 1 + requestDelay = 0.5 + requestCounterHardStop = 8196 + + ## Main loop, going through the regions and setting current metrics values for both value and usage + while True: + for region in awsRegionsList: + try: + metricsValue = getAwsStacks(awsRegions[region]["clientSession"],awsStackAvailableStates) + apiCallSuccess.inc() + for state in awsStackAvailableStates: + CloudFormationPromMetric.labels(state=state,region=region, accountid=awsAccountID).set(metricsValue[state]) + except botocore.exceptions.EndpointConnectionError as error: + apiCallFails.inc() + print(str(error)) + except botocore.exceptions.ClientError as error: + apiCallFails.inc() + print(str(error)) + + if ( + initialRequestsCounter >= (len(awsRegionsList) * len(awsStackAvailableStates)) + and initialRequestsCounter != requestCounterHardStop): + + requestDelay = args.time + warmUpPeriod = 0 + initialRequestsCounter = requestCounterHardStop + + if warmUpPeriod == 1: + initialRequestsCounter = initialRequestsCounter + 1 + + + ## Hardcoded sleep to ensure we don't choke on AWS API + time.sleep(0.5) + time.sleep(requestDelay) +exit() diff --git a/exporters/aws-cloudformation-stacks-exporter/version.json b/exporters/aws-cloudformation-stacks-exporter/version.json new file mode 100644 index 0000000..1159bb1 --- /dev/null +++ b/exporters/aws-cloudformation-stacks-exporter/version.json @@ -0,0 +1,3 @@ +{ + "version": "v0.0.1" +} diff --git a/playbooks/infra-prometheus/setup-all.yml b/playbooks/infra-prometheus/setup-all.yml index 6fefe9e..974d3de 100644 --- a/playbooks/infra-prometheus/setup-all.yml +++ b/playbooks/infra-prometheus/setup-all.yml @@ -22,6 +22,8 @@ - "{{ playbook_dir }}/../../prometheus/generic/setup-prometheus" - "{{ playbook_dir }}/../../prometheus/generic/setup-alertmanager" - "{{ playbook_dir }}/../../prometheus/generic/update-thresholds" + - "{{ playbook_dir }}/../../grafana/generic/setup-grafana" + #- "{{ playbook_dir }}/../../grafana/generic/configure-grafana-datasource" tags: - prometheus - alertmanager @@ -31,8 +33,6 @@ - name: Setup onboard exporters hosts: monitoring-hosts become: True - vars: - provision_state: "started" roles: - "{{ playbook_dir }}/../../prometheus/generic/setup-ssl-exporter" - "{{ playbook_dir }}/../../prometheus/generic/setup-ilo-exporter" @@ -41,6 +41,8 @@ - "{{ playbook_dir }}/../../prometheus/generic/setup-openstack-exporter" - "{{ playbook_dir }}/../../prometheus/generic/setup-junos-exporter" - "{{ playbook_dir }}/../../prometheus/generic/setup-openstack-exporter" + - "{{ playbook_dir }}/../../prometheus/generic/setup-aws-sq-exporter" + - "{{ playbook_dir }}/../../prometheus/generic/setup-aws-cloudwatch-stacks-exporter" tags: - exporters - onboard-exporters diff --git a/prometheus/generic/add-target/tasks/main.yml b/prometheus/generic/add-target/tasks/main.yml index 4109d8c..dfd139a 100644 --- a/prometheus/generic/add-target/tasks/main.yml +++ b/prometheus/generic/add-target/tasks/main.yml @@ -89,6 +89,24 @@ seuser: system_u setype: container_file_t +- name: create aws-sq-exporter_targets directory + file: + path: "/var/prometheus_targets/aws_sq_exporter_targets" + state: directory + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + +- name: create aws_cfs_exporter_targets directory + file: + path: "/var/prometheus_targets/aws_cfs_exporter_targets" + state: directory + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + - name: create federated_prometheus_targets directory file: path: "/var/prometheus_targets/federated_targets" @@ -120,6 +138,28 @@ loop: "{{ groups['prometheus_target_haproxy'] }}" when: "'prometheus_target_haproxy' in groups" +- name: template the aws-sq-exporter_targets + template: + src: aws_sq_exporter.yml.j2 + dest: "/var/prometheus_targets/aws_sq_exporter_targets/aws-sq-exporter_target_{{ item.awsAccount }}.yml" + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + loop: "{{ ansible_sq_exporter }}" + when: "'monitoring-aws-sq-exporter' in groups" + +- name: template the aws-cfs-exporter_targets + template: + src: aws_cfs_exporter.yml.j2 + dest: "/var/prometheus_targets/aws_cfs_exporter_targets/aws-cfs-exporter_target_{{ item.awsAccount }}.yml" + mode: '0775' + group: monitoring-editors + seuser: system_u + setype: container_file_t + loop: "{{ ansible_cfs_exporter }}" + when: "'monitoring-aws-cfs-exporter' in groups" + - name: template the bind_targets template: src: bind_target.yml.j2 diff --git a/prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 b/prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 new file mode 100644 index 0000000..dfa20d6 --- /dev/null +++ b/prometheus/generic/add-target/templates/aws_cfs_exporter.yml.j2 @@ -0,0 +1,4 @@ +- targets: + - {{ ansible_ssh_host }}:{{ item.port }} + labels: + name: 'AWS CFS Exporter {{ item.awsAccount }}' diff --git a/prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 b/prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 new file mode 100644 index 0000000..f7fb3a7 --- /dev/null +++ b/prometheus/generic/add-target/templates/aws_sq_exporter.yml.j2 @@ -0,0 +1,4 @@ +- targets: + - {{ ansible_ssh_host }}:{{ item.port }} + labels: + name: 'AWS SQ Exporter {{ item.awsAccount }}' diff --git a/prometheus/generic/setup-alertmanager/tasks/docker.yml b/prometheus/generic/setup-alertmanager/tasks/docker.yml index 5b93c74..5411dd6 100644 --- a/prometheus/generic/setup-alertmanager/tasks/docker.yml +++ b/prometheus/generic/setup-alertmanager/tasks/docker.yml @@ -38,5 +38,6 @@ - "{{ alertmanager_port }}:9093" volumes: - "{{ monitoring_config_dir }}/alertmanager.yml:/etc/alertmanager/alertmanager.yml:Z" + - "/var/alertmanager:/alertmanager:Z"" state: "{{ provision_state }}" recreate: yes diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md new file mode 100644 index 0000000..08ace8a --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/README.md @@ -0,0 +1,69 @@ +setup-aws-cloudformation-stacks-exporter +========= + +This role will instantiate a AWS Cloudformation Stacks Exporter container on targeted hosts. Role accepts a list of AWS accounts to monitor, and will spin up one Docker container per account. + +Requirements +------------ + +Docker must be available and running on the targeted hosts. + +Role Variables +-------------- +## Default values of variables: +``` +--- +aws_cfs_exporter_image: 'prom/aws-cloudformation-stacks-exporter' +aws_cfs_exporter_image_version: 'latest' +aws_cfs_exporter_port: '8080' + +provision_state: "started" + +ansible_cfs_exporter: + - awsAccount: "Dummy-Account" + port: 9420 + apikey: 22222 + secretkey: 3333 + regions: "us-east-1,us-east-2" + debug: false +``` +``` +aws_cfs_exporter_image - The AWS CFS Exporter image to deploy. +aws_cfs_exporter_image_version - The image tag to deploy. +aws_cfs_exporter_port - The port to be exposed on container. +provision_state - Options: [absent, killed, present, reloaded, restarted, **started** (default), stopped] + +ansible_cfs_exporter: - variable holding individual account configuration + - awsAccount: "Dummy-Account" - AWS Account alias + port: 9420 - Port on which this specific container will be exposed for metrics scraping + apikey: 22222 - AWS Account API Key + secretkey: 3333 - AWS Account SecretKey + regions: "ex1,ex2" - Commaseparated list of regions to query for Cloudformations Stack statuses + debug: false - Increase logging verbosity +``` + + +Dependencies +------------ +``` +python >= 2.6 +docker-py >= 0.3.0 +The docker server >= 0.10.0 +``` + +Example Playbook +---------------- +``` +- name: Setup AWS CFS Exporter + hosts: prometheus_master + become: True + vars: + provision_state: "started" + roles: + - prometheus/generic/setup-aws-cloudformation-stacks-exporter +``` + +License +------- + +BSD diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml new file mode 100644 index 0000000..b39dc19 --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/defaults/main.yml @@ -0,0 +1,13 @@ +--- +aws_cfs_exporter_image: 'prom/aws-cloudformation-stacks-exporter' +aws_cfs_exporter_image_version: 'latest' +aws_cfs_exporter_port: '8080' + +provision_state: "started" + +ansible_cfs_exporter: + - awsAccount: "Dummy-Account" + port: 9420 + apikey: 22222 + secretkey: 3333 + regions: "us-east-1" diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml new file mode 100644 index 0000000..475933a --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/docker.yml @@ -0,0 +1,31 @@ +--- + +- name: Enable firewalld + service: + name: firewalld + enabled: yes + state: started + +- name: Open Firewall for Prometheus + firewalld: + port: "{{ item.port }}/tcp" + permanent: yes + state: enabled + immediate: yes + loop: "{{ ansible_cfs_exporter }}" + +- name: Run AWS SQ Exporter Docker container + docker_container: + name: "aws-cfs-exporter-{{ item.awsAccount }}" + image: "{{ aws_cfs_exporter_image }}:{{ aws_cfs_exporter_image_version }}" + restart_policy: unless-stopped + network_mode: host + state: "{{ provision_state }}" + command: | + /opt/app-root/src/aws_cfs_exporter.py + --apikey "{{ item.apikey}}" + --secretkey "{{ item.secretkey }}" + --regions "{{ item.regions}}" + --port "{{ item.port }}" + restart: yes + loop: "{{ ansible_cfs_exporter }}" diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml new file mode 100644 index 0000000..5affcdf --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/main.yml @@ -0,0 +1,6 @@ +--- +- name: Run prereqs + import_tasks: prereqs.yml + +- name: Run the docker images + import_tasks: docker.yml diff --git a/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml new file mode 100644 index 0000000..213f376 --- /dev/null +++ b/prometheus/generic/setup-aws-cloudwatch-stacks-exporter/tasks/prereqs.yml @@ -0,0 +1,35 @@ +--- +- name: "install EPEL GPG key - if specified" + rpm_key: + key: "{{ monitoring_host_epel_gpg_download_url }}" + state: present + when: + - monitoring_host_epel_gpg_download_url is defined + - monitoring_host_epel_gpg_download_url|trim != '' + - monitoring_host_epel_disable_gpg_check|lower == 'no' + +- name: "install epel-release" + yum: + name: "{{ monitoring_host_epel_download_url }}" + state: present + disable_gpg_check: "{{ monitoring_host_epel_disable_gpg_check | default('no') }}" + +- name: Ensure epel-release is installed + yum: + name: "{{ item }}" + state: present + with_items: + - epel-release + +- name: Ensure pip is installed + yum: + name: "{{ item }}" + state: present + with_items: + - python-pip + +- name: Install required python libraries + pip: + name: "docker-py" + state: present + diff --git a/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 b/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 index 6eb3067..986a8d4 100644 --- a/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 +++ b/prometheus/generic/setup-prometheus/templates/prometheus.yml.j2 @@ -31,6 +31,18 @@ scrape_configs: - files: - /etc/prometheus/targets/node_targets/*.yml + - job_name: 'aws_sq_exporter' + scrape_interval: 60s + file_sd_configs: + - files: + - /etc/prometheus/targets/aws_sq_exporter_targets/*.yml + + - job_name: 'aws_cloudformation_stacks_exporter' + scrape_interval: 60s + file_sd_configs: + - files: + - /etc/prometheus/targets/aws_cfs_exporter_targets/*.yml + {% if (groups['monitoring-hosts'] |length ) > 1 %} - job_name: 'federate-sanity-check' scrape_interval: 15s @@ -59,7 +71,6 @@ scrape_configs: - files: - /etc/prometheus/targets/federated_targets/*.yml - - job_name: 'haproxy_exporter' scrape_interval: 5s file_sd_configs: