From 118d68e1cd3b68db5111e86773881b7839c42144 Mon Sep 17 00:00:00 2001 From: Jakub Filipczak Date: Fri, 8 Apr 2022 13:03:13 +0200 Subject: [PATCH 1/2] AWS SQ exporter code and Dockerfile --- exporters/aws-sq-exporter/.dockerignore | 1 + exporters/aws-sq-exporter/Dockerfile | 15 ++ exporters/aws-sq-exporter/README.md | 53 +++++ exporters/aws-sq-exporter/metrics.yaml | 20 ++ exporters/aws-sq-exporter/sq_exporter.py | 241 +++++++++++++++++++++++ exporters/aws-sq-exporter/version.json | 3 + 6 files changed, 333 insertions(+) create mode 100644 exporters/aws-sq-exporter/.dockerignore create mode 100644 exporters/aws-sq-exporter/Dockerfile create mode 100644 exporters/aws-sq-exporter/README.md create mode 100644 exporters/aws-sq-exporter/metrics.yaml create mode 100755 exporters/aws-sq-exporter/sq_exporter.py create mode 100644 exporters/aws-sq-exporter/version.json diff --git a/exporters/aws-sq-exporter/.dockerignore b/exporters/aws-sq-exporter/.dockerignore new file mode 100644 index 0000000..42061c0 --- /dev/null +++ b/exporters/aws-sq-exporter/.dockerignore @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/exporters/aws-sq-exporter/Dockerfile b/exporters/aws-sq-exporter/Dockerfile new file mode 100644 index 0000000..a4b8c35 --- /dev/null +++ b/exporters/aws-sq-exporter/Dockerfile @@ -0,0 +1,15 @@ +FROM registry.access.redhat.com/ubi8/python-38 + +# Add application sources with correct permissions for OpenShift +USER 0 +ADD sq_exporter.py . +ADD metrics.yaml . +RUN chown -R 1001:0 ./ +USER 1001 +EXPOSE 8000 +# Install the dependencies +RUN pip install --upgrade pip && \ + pip install prometheus-client boto3 python-benedict + +# Run the application +CMD sq_exporter.py diff --git a/exporters/aws-sq-exporter/README.md b/exporters/aws-sq-exporter/README.md new file mode 100644 index 0000000..e9b136d --- /dev/null +++ b/exporters/aws-sq-exporter/README.md @@ -0,0 +1,53 @@ +## AWS Service Quotas Exporter ## +*** *** +This is a simple Prometheus Exporter that querries AWS API for quota values of specific configuration items and calculates actual usage of those quotas. + +### AWS SQs ### +*** *** +Currently there's support for only two SQs: +* L-0263D0A3 - number of Elastic IPs defined for the region +* L-F678F1CE - number of VCPs defined for the region + +## Building the exporter Docker image ## +Docker image should be based on provided Dockerfile, to build the image run that command from repository root directory: + + `export VERSION="0.1.1"; docker build -t aws-sq-exporter:${VERSION} exporters/aws-sq-exporter/` + +## Running the exporter and AWS credentials ## +Exporter uses AWS API directly, simplest way of injecting API keys is by mounting prepopulated .aws into the container: + + `docker run -p 8000:8000 -v /${HOME}/.aws:/home/exporter/.aws aws-sq-exporter:0.1.1` + +Other options are: + +* -a APIKEY, --apikey APIKEY : AWS Access Key ID +* -s SECRETKEY, --secretkey SECRETKEY : AWS Sercet Access Key +* -r REGION(S), --regions REGION : AWS Region or list of comma separated regions to be used for queries +* -t TIME, --time TIME : Sleep time between fetching the AWS API input +* -d, --debug : Should we be more verbose? +* -p PORT, --port PORT : TCP port to be used to expose metrics HTTP endpoint + +## Metric file format ## +Metric definitions should follow the example format: + +```yaml +--- +- metricNameUsage: "aws_vpc_per_region_quota_usage" + usageDesc: "Number of VPCs in use" + metricNameQuota: "aws_vpc_per_region_quota_value" + quotaDesc: "Administrative Quota set on VPCs per Region" + serviceCode: "vpc" + quotaCode: "L-F678F1CE" + usageRetrieval: "describe_vpcs" + usageFilter: "Vpcs" + paginate: True +``` +* metricNameUsage - a name for Prometheus metric showing actual usage +* usageDesc - description that will be added to Prometheus usage metric +* metricNameQuota - a name for Prometheus metric showing the quota value +* quotaDesc - description that will be added to Prometheus quota value metrics +* serviceCode - serviceCode that's assigned to the metric (see AWS CLI manual) +* quotaCode - unique quotaCode (see AWS CLI manual) +* usageRetrieval - name of method which presents the information used to count the actual usage values +* usageFiter - name of dictionary that AWS API returns for usageRetrieval query +* paginate - reserved for future development diff --git a/exporters/aws-sq-exporter/metrics.yaml b/exporters/aws-sq-exporter/metrics.yaml new file mode 100644 index 0000000..d30cf11 --- /dev/null +++ b/exporters/aws-sq-exporter/metrics.yaml @@ -0,0 +1,20 @@ +--- +- metricNameUsage: "aws_eip_quota_usage" + usageDesc: "Administrative Quota set on EIP" + metricNameQuota: "aws_eip_quota_value" + quotaDesc: "Number of Elastic IPs in use" + serviceCode: "ec2" + quotaCode: "L-0263D0A3" + usageRetrieval: "describe_addresses" + usageFilter: "Addresses" + paginate: False + +- metricNameUsage: "aws_vpc_per_region_quota_usage" + usageDesc: "Number of VPCs in use" + metricNameQuota: "aws_vpc_per_region_quota_value" + quotaDesc: "Administrative Quota set on VPCs per Region" + serviceCode: "vpc" + quotaCode: "L-F678F1CE" + usageRetrieval: "describe_vpcs" + usageFilter: "Vpcs" + paginate: True diff --git a/exporters/aws-sq-exporter/sq_exporter.py b/exporters/aws-sq-exporter/sq_exporter.py new file mode 100755 index 0000000..9845300 --- /dev/null +++ b/exporters/aws-sq-exporter/sq_exporter.py @@ -0,0 +1,241 @@ +#!/opt/app-root/bin/python + +import subprocess, os +from prometheus_client import start_http_server, Summary, Gauge, Counter +import argparse +import time +import boto3 +import botocore +from benedict import benedict + +# Generic function to fetch administrative quota values +def getQuotaValue(quotaCode, serviceCode, cSessions): + paginator = cSessions["service-quotas"].get_paginator("list_service_quotas") + pCursor = paginator.paginate(ServiceCode=serviceCode, PaginationConfig={"MaxItems": 1000, "PageSize": 10}) + currentValue = 0 + currentQ = 0 + for page in pCursor: + for quotas in page["Quotas"]: + if quotas["QuotaCode"] == quotaCode: + currentQ = str(quotas["Value"]) + currentValue = currentValue + 1 + return currentQ + + +# fetch actual usage of specific service, works for EIP and Vpcs +def getUsage(cSessions, usageRetrieval, usageFilter): + awsCall = getattr(cSessions["ec2"], usageRetrieval) + awsReturns = awsCall() + return len(awsReturns[usageFilter]) + + +def getAccountID(): + awsSession = boto3.client("sts", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey) + awsReturns = awsSession.get_caller_identity() + return awsReturns["Account"] + + +## If we want to fetch the usage for all of the regions on given account +## we'll need to fetch a list of regions available on this particular AWS account +def getRegions(): + awsSession = boto3.client("ec2", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey, region_name="us-east-1") + awsReturns = awsSession.describe_regions() + if args.debug == True: + print("Regions fetched from active account: " + str(awsReturns)) + regions = [] + for page in awsReturns["Regions"]: + regions.append(page["RegionName"]) + if args.debug == True: + print("Adding " + str(page["RegionName"]) + " to the region list") + return regions + + +if __name__ == "__main__": + # Fetch&parse args + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--apikey", help=" AWS Access Key ID ") + parser.add_argument("-s", "--secretkey", help=" AWS Sercet Access Key") + parser.add_argument("-r", "--regions", default="All", help="List of AWS Regions to be used for queries") + parser.add_argument( + "-t", "--time", type=int, default=900, help=" Sleep time between fetching the AWS API input" + ) + parser.add_argument("-d", "--debug", help=" Should we be more verbose?", action="store_true") + parser.add_argument( + "-p", "--port", default=8000, help=" TCP port to be used to expose metrics HTTP endpoint" + ) + parser.add_argument("-m", "--metricsfile", default="./metrics.yaml", help=" Metrics definition file") + args = parser.parse_args() + + ## Strip regions string from leading and trailing spaces + aRegions = str(args.regions).strip() + + ## Setting up basic variables + awsRegions = {} + awsRegionsList = [] + + ## slice the string if we find comma or space between regions names + if aRegions.find(" ") > 0: + awsRegionsList = aRegions.split("\s") + for region in awsRegionsList: + awsRegions[region] = {} + elif aRegions.find(",") > 0: + awsRegionsList = aRegions.split(",") + for region in awsRegionsList: + awsRegions[region] = {} + ## If no region was specified, we're defaulting to "All" + elif aRegions == "All": + print("Region parameter was not passed, fetching all available AWS Regions") + awsRegionsList = getRegions() + for region in awsRegionsList: + awsRegions[region] = {} + ## Falling back to a single specified region + else: + if args.debug == True: + print("Following AWS region will be scraped for data: ") + awsRegionsList.append(aRegions) + print(str(awsRegionsList)) + for region in awsRegionsList: + awsRegions[region] = {} + + print("Loading metrics definition file located at " + str(args.metricsfile)) + + # Getting AccountId + awsAccountID = getAccountID() + print("Exporter configured to calculate metrics on : " + str(awsAccountID)) + + ## Setting initial sessions, per region + for region in awsRegionsList: + awsRegions[region]["clientSession"] = {} + awsRegions[region]["clientSession"]["ec2"] = boto3.client( + "ec2", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + awsRegions[region]["clientSession"]["service-quotas"] = boto3.client( + "service-quotas", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + + # Loading up metrics configuration + promMetrics = benedict(args.metricsfile, format="yaml") + if args.debug == True: + print("Metric configuration: ") + print(str(promMetrics)) + + # Initializing Prometheus Gauge metrics + for metric in promMetrics["values"]: + if args.debug == True: + print("Creating metric for " + metric["quotaCode"] + " quota code") + metric["mObjectUsage"] = Gauge( + metric["metricNameUsage"], metric["usageDesc"], ["region", "accountid"] + ) + metric["mObjectQuota"] = Gauge( + metric["metricNameQuota"], metric["quotaDesc"], ["region", "accountid"] + ) + + ## Setting up Counter metrics to track AWS API call failures + # Setting variables + apiCallFailureMetricObjectID = "apiCallFailure" + apiCallFailureMetricName = "aws_api_failed_requests" + apiCallFailureMetricDesc = "Counter set on failed AWS API calls" + apiCallSuccessMetricObjectID = "apiCallSuccess" + apiCallSuccessMetricName = "aws_api_success_requests" + apiCallSuccessMetricDesc = "Counter set on succesfull AWS API calls" + # Initializing metrics + apiCallFails = Counter(apiCallFailureMetricName, apiCallFailureMetricDesc) + apiCallSuccess = Counter(apiCallSuccessMetricName, apiCallSuccessMetricDesc) + + # Resetting counters + apiCallFails.inc(0) + apiCallSuccess.inc(0) + + ## Initializing HTTP /metrics endpoint for Prometheus metrics + start_http_server(args.port) + print("Started AWS Service Quota Exporter listening on port: " + str(args.port)) + + # Variables controlling the flow on main loop + initialRequestsCounter = 0 + warmUpPeriod = 1 + requestDelay = 0.5 + requestCounterHardStop = 8196 + + if args.debug == True: + print("Total of ServiceQuotas Metric/Label set to be calculated: " + +str(len(awsRegionsList) * len(promMetrics["values"]))) + + ## Main loop, going through the regions and setting current metrics values for both value and usage + while True: + for region in awsRegionsList: + # Looping through metrics definitions: + for metric in promMetrics["values"]: + try: + quotaValue = getQuotaValue( + metric["quotaCode"], + metric["serviceCode"], + awsRegions[region]["clientSession"], + ) + apiCallSuccess.inc() + metric["mObjectQuota"].labels(region=region, accountid=awsAccountID).set(quotaValue) + except botocore.exceptions.EndpointConnectionError as error: + apiCallFails.inc() + print(str(error)) + except botocore.exceptions.ClientError as error: + apiCallFails.inc() + print(str(error)) + try: + usage = getUsage( + awsRegions[region]["clientSession"], + metric["usageRetrieval"], + metric["usageFilter"], + ) + apiCallSuccess.inc() + metric["mObjectUsage"].labels(region=region, accountid=awsAccountID).set(usage) + except botocore.exceptions.EndpointConnectionError as error: + apiCallFails.inc() + print(str(error)) + except botocore.exceptions.ClientError as error: + apiCallFails.inc() + print(str(error)) + + ## Initial Requests are executed quicker to ensure we got all values in metrics + #initialRequestsCounter = initialRequestsCounter + 1 + # Check if we completed initial run + # If so throttle down to delay value specified in command line + + if ( + initialRequestsCounter >= (len(awsRegionsList) * len(promMetrics["values"])) + and initialRequestsCounter != requestCounterHardStop): + + if args.debug == True: + print("Warmup completed after " + str(initialRequestsCounter) + ", throttling down") + requestDelay = args.time + warmUpPeriod = 0 + initialRequestsCounter = requestCounterHardStop + + if warmUpPeriod == 1: + initialRequestsCounter = initialRequestsCounter + 1 + + if args.debug == True: + print( + "Last obtained AWS Quota Value for " + + str(metric["mObjectQuota"]) + + " on " + + str(region) + + " is:" + ) + print(str(quotaValue)) + print( + "Last obtained AWS resource usage for " + + str(metric["mObjectUsage"]) + + " on " + + str(region) + + " is:" + ) + print(str(usage)) + ## Hardcoded sleep to ensure we don't choke on AWS API + time.sleep(0.5) + time.sleep(requestDelay) +exit() diff --git a/exporters/aws-sq-exporter/version.json b/exporters/aws-sq-exporter/version.json new file mode 100644 index 0000000..1159bb1 --- /dev/null +++ b/exporters/aws-sq-exporter/version.json @@ -0,0 +1,3 @@ +{ + "version": "v0.0.1" +} From a6e7a8ac2efa072a841f475ef64f98fff9a849c5 Mon Sep 17 00:00:00 2001 From: Jakub Filipczak Date: Tue, 19 Apr 2022 13:01:41 +0200 Subject: [PATCH 2/2] Add README --- exporters/aws-sq-exporter/README.md | 106 ++++++++++++++++++---------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/exporters/aws-sq-exporter/README.md b/exporters/aws-sq-exporter/README.md index e9b136d..8f39557 100644 --- a/exporters/aws-sq-exporter/README.md +++ b/exporters/aws-sq-exporter/README.md @@ -1,38 +1,38 @@ -## AWS Service Quotas Exporter ## -*** *** -This is a simple Prometheus Exporter that querries AWS API for quota values of specific configuration items and calculates actual usage of those quotas. +# AWS Service Quotas Exporter -### AWS SQs ### -*** *** -Currently there's support for only two SQs: -* L-0263D0A3 - number of Elastic IPs defined for the region -* L-F678F1CE - number of VCPs defined for the region +Exporter used for querying specified AWS account for quotas and quatas usage, and transforming that data into Prometheus metrics. -## Building the exporter Docker image ## -Docker image should be based on provided Dockerfile, to build the image run that command from repository root directory: - `export VERSION="0.1.1"; docker build -t aws-sq-exporter:${VERSION} exporters/aws-sq-exporter/` +### Parameters -## Running the exporter and AWS credentials ## -Exporter uses AWS API directly, simplest way of injecting API keys is by mounting prepopulated .aws into the container: - - `docker run -p 8000:8000 -v /${HOME}/.aws:/home/exporter/.aws aws-sq-exporter:0.1.1` - -Other options are: +``` +-a, --apikey : AWS Access Key ID +-s, --secretkey : AWS Sercet Access Key +-r, --regions : List of AWS Regions to be used for queries (if no provided all regions will be queried) +-t, --time : Sleep time between fetching the AWS API input (default is 900s) +-d, --debug : Set the exporter debug mode on +-p, --port : TCP port to be used to expose metrics HTTP endpoint (default is 8000) +-m, --metricsfile : a path to metrics definition file, should the default set would not be enough +``` -* -a APIKEY, --apikey APIKEY : AWS Access Key ID -* -s SECRETKEY, --secretkey SECRETKEY : AWS Sercet Access Key -* -r REGION(S), --regions REGION : AWS Region or list of comma separated regions to be used for queries -* -t TIME, --time TIME : Sleep time between fetching the AWS API input -* -d, --debug : Should we be more verbose? -* -p PORT, --port PORT : TCP port to be used to expose metrics HTTP endpoint -## Metric file format ## -Metric definitions should follow the example format: +### Metrics definition file +By default, we're exporting metrics for number of ElasticIP and VPCs per region, but if user want to introduce other items, there's a support for custom definition file -```yaml +Default file: +``` --- -- metricNameUsage: "aws_vpc_per_region_quota_usage" +- metricNameUsage: "aws_eip_quota_usage" + usageDesc: "Administrative Quota set on EIP" + metricNameQuota: "aws_eip_quota_value" + quotaDesc: "Number of Elastic IPs in use" + serviceCode: "ec2" + quotaCode: "L-0263D0A3" + usageRetrieval: "describe_addresses" + usageFilter: "Addresses" + paginate: False + +- metricNameUsage: "aws_vpc_per_region_quota_usage" usageDesc: "Number of VPCs in use" metricNameQuota: "aws_vpc_per_region_quota_value" quotaDesc: "Administrative Quota set on VPCs per Region" @@ -42,12 +42,46 @@ Metric definitions should follow the example format: usageFilter: "Vpcs" paginate: True ``` -* metricNameUsage - a name for Prometheus metric showing actual usage -* usageDesc - description that will be added to Prometheus usage metric -* metricNameQuota - a name for Prometheus metric showing the quota value -* quotaDesc - description that will be added to Prometheus quota value metrics -* serviceCode - serviceCode that's assigned to the metric (see AWS CLI manual) -* quotaCode - unique quotaCode (see AWS CLI manual) -* usageRetrieval - name of method which presents the information used to count the actual usage values -* usageFiter - name of dictionary that AWS API returns for usageRetrieval query -* paginate - reserved for future development + +Parameters explained: +``` +metricNameUsage : name for the usage metric +usageDesc : Description for the usage metric that will be presented on export +metricNameQuota : name for quota metric +quotaDesc : Description for the quota metric that will be presented on export +serviceCode : Service code that will be used on AWS API call +quotaCode : Quota Code that will be used to fetch quota value +usageRetrieval : A part of AWS API call that checks for specific item you want to fetch +usageFilter : a filter that's used to actually count specific items for usage +paginate : Should AWS API call support pagination +``` + + +### Running the exporter + +You can run the exporter directly in your console by running the script with parameters specified in the section above + + +### Docker + +To build the image: +``` +docker build --rm -t aws-sq-exporter . +``` + +To run the container +``` +docker run -p 8000:8000 aws-sq-exporter:latest +``` + +You can then call the web server on the defined endpoint, `/metrics` by default. +``` +curl 'http://127.0.0.1:8000/metrics' +``` + +Passing argument to the docker run command +``` +docker run -p 8000:8000 aws-sq-exporter:latest --port 8000 --apikey ABC --secretkey XYZ +``` + +