diff --git a/exporters/aws-sq-exporter/.dockerignore b/exporters/aws-sq-exporter/.dockerignore new file mode 100644 index 0000000..42061c0 --- /dev/null +++ b/exporters/aws-sq-exporter/.dockerignore @@ -0,0 +1 @@ +README.md \ No newline at end of file diff --git a/exporters/aws-sq-exporter/Dockerfile b/exporters/aws-sq-exporter/Dockerfile new file mode 100644 index 0000000..a4b8c35 --- /dev/null +++ b/exporters/aws-sq-exporter/Dockerfile @@ -0,0 +1,15 @@ +FROM registry.access.redhat.com/ubi8/python-38 + +# Add application sources with correct permissions for OpenShift +USER 0 +ADD sq_exporter.py . +ADD metrics.yaml . +RUN chown -R 1001:0 ./ +USER 1001 +EXPOSE 8000 +# Install the dependencies +RUN pip install --upgrade pip && \ + pip install prometheus-client boto3 python-benedict + +# Run the application +CMD sq_exporter.py diff --git a/exporters/aws-sq-exporter/README.md b/exporters/aws-sq-exporter/README.md new file mode 100644 index 0000000..8f39557 --- /dev/null +++ b/exporters/aws-sq-exporter/README.md @@ -0,0 +1,87 @@ +# AWS Service Quotas Exporter + +Exporter used for querying specified AWS account for quotas and quatas usage, and transforming that data into Prometheus metrics. + + +### Parameters + +``` +-a, --apikey : AWS Access Key ID +-s, --secretkey : AWS Sercet Access Key +-r, --regions : List of AWS Regions to be used for queries (if no provided all regions will be queried) +-t, --time : Sleep time between fetching the AWS API input (default is 900s) +-d, --debug : Set the exporter debug mode on +-p, --port : TCP port to be used to expose metrics HTTP endpoint (default is 8000) +-m, --metricsfile : a path to metrics definition file, should the default set would not be enough +``` + + +### Metrics definition file +By default, we're exporting metrics for number of ElasticIP and VPCs per region, but if user want to introduce other items, there's a support for custom definition file + +Default file: +``` +--- +- metricNameUsage: "aws_eip_quota_usage" + usageDesc: "Administrative Quota set on EIP" + metricNameQuota: "aws_eip_quota_value" + quotaDesc: "Number of Elastic IPs in use" + serviceCode: "ec2" + quotaCode: "L-0263D0A3" + usageRetrieval: "describe_addresses" + usageFilter: "Addresses" + paginate: False + +- metricNameUsage: "aws_vpc_per_region_quota_usage" + usageDesc: "Number of VPCs in use" + metricNameQuota: "aws_vpc_per_region_quota_value" + quotaDesc: "Administrative Quota set on VPCs per Region" + serviceCode: "vpc" + quotaCode: "L-F678F1CE" + usageRetrieval: "describe_vpcs" + usageFilter: "Vpcs" + paginate: True +``` + +Parameters explained: +``` +metricNameUsage : name for the usage metric +usageDesc : Description for the usage metric that will be presented on export +metricNameQuota : name for quota metric +quotaDesc : Description for the quota metric that will be presented on export +serviceCode : Service code that will be used on AWS API call +quotaCode : Quota Code that will be used to fetch quota value +usageRetrieval : A part of AWS API call that checks for specific item you want to fetch +usageFilter : a filter that's used to actually count specific items for usage +paginate : Should AWS API call support pagination +``` + + +### Running the exporter + +You can run the exporter directly in your console by running the script with parameters specified in the section above + + +### Docker + +To build the image: +``` +docker build --rm -t aws-sq-exporter . +``` + +To run the container +``` +docker run -p 8000:8000 aws-sq-exporter:latest +``` + +You can then call the web server on the defined endpoint, `/metrics` by default. +``` +curl 'http://127.0.0.1:8000/metrics' +``` + +Passing argument to the docker run command +``` +docker run -p 8000:8000 aws-sq-exporter:latest --port 8000 --apikey ABC --secretkey XYZ +``` + + diff --git a/exporters/aws-sq-exporter/metrics.yaml b/exporters/aws-sq-exporter/metrics.yaml new file mode 100644 index 0000000..d30cf11 --- /dev/null +++ b/exporters/aws-sq-exporter/metrics.yaml @@ -0,0 +1,20 @@ +--- +- metricNameUsage: "aws_eip_quota_usage" + usageDesc: "Administrative Quota set on EIP" + metricNameQuota: "aws_eip_quota_value" + quotaDesc: "Number of Elastic IPs in use" + serviceCode: "ec2" + quotaCode: "L-0263D0A3" + usageRetrieval: "describe_addresses" + usageFilter: "Addresses" + paginate: False + +- metricNameUsage: "aws_vpc_per_region_quota_usage" + usageDesc: "Number of VPCs in use" + metricNameQuota: "aws_vpc_per_region_quota_value" + quotaDesc: "Administrative Quota set on VPCs per Region" + serviceCode: "vpc" + quotaCode: "L-F678F1CE" + usageRetrieval: "describe_vpcs" + usageFilter: "Vpcs" + paginate: True diff --git a/exporters/aws-sq-exporter/sq_exporter.py b/exporters/aws-sq-exporter/sq_exporter.py new file mode 100755 index 0000000..9845300 --- /dev/null +++ b/exporters/aws-sq-exporter/sq_exporter.py @@ -0,0 +1,241 @@ +#!/opt/app-root/bin/python + +import subprocess, os +from prometheus_client import start_http_server, Summary, Gauge, Counter +import argparse +import time +import boto3 +import botocore +from benedict import benedict + +# Generic function to fetch administrative quota values +def getQuotaValue(quotaCode, serviceCode, cSessions): + paginator = cSessions["service-quotas"].get_paginator("list_service_quotas") + pCursor = paginator.paginate(ServiceCode=serviceCode, PaginationConfig={"MaxItems": 1000, "PageSize": 10}) + currentValue = 0 + currentQ = 0 + for page in pCursor: + for quotas in page["Quotas"]: + if quotas["QuotaCode"] == quotaCode: + currentQ = str(quotas["Value"]) + currentValue = currentValue + 1 + return currentQ + + +# fetch actual usage of specific service, works for EIP and Vpcs +def getUsage(cSessions, usageRetrieval, usageFilter): + awsCall = getattr(cSessions["ec2"], usageRetrieval) + awsReturns = awsCall() + return len(awsReturns[usageFilter]) + + +def getAccountID(): + awsSession = boto3.client("sts", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey) + awsReturns = awsSession.get_caller_identity() + return awsReturns["Account"] + + +## If we want to fetch the usage for all of the regions on given account +## we'll need to fetch a list of regions available on this particular AWS account +def getRegions(): + awsSession = boto3.client("ec2", aws_access_key_id=args.apikey, aws_secret_access_key=args.secretkey, region_name="us-east-1") + awsReturns = awsSession.describe_regions() + if args.debug == True: + print("Regions fetched from active account: " + str(awsReturns)) + regions = [] + for page in awsReturns["Regions"]: + regions.append(page["RegionName"]) + if args.debug == True: + print("Adding " + str(page["RegionName"]) + " to the region list") + return regions + + +if __name__ == "__main__": + # Fetch&parse args + parser = argparse.ArgumentParser() + parser.add_argument("-a", "--apikey", help=" AWS Access Key ID ") + parser.add_argument("-s", "--secretkey", help=" AWS Sercet Access Key") + parser.add_argument("-r", "--regions", default="All", help="List of AWS Regions to be used for queries") + parser.add_argument( + "-t", "--time", type=int, default=900, help=" Sleep time between fetching the AWS API input" + ) + parser.add_argument("-d", "--debug", help=" Should we be more verbose?", action="store_true") + parser.add_argument( + "-p", "--port", default=8000, help=" TCP port to be used to expose metrics HTTP endpoint" + ) + parser.add_argument("-m", "--metricsfile", default="./metrics.yaml", help=" Metrics definition file") + args = parser.parse_args() + + ## Strip regions string from leading and trailing spaces + aRegions = str(args.regions).strip() + + ## Setting up basic variables + awsRegions = {} + awsRegionsList = [] + + ## slice the string if we find comma or space between regions names + if aRegions.find(" ") > 0: + awsRegionsList = aRegions.split("\s") + for region in awsRegionsList: + awsRegions[region] = {} + elif aRegions.find(",") > 0: + awsRegionsList = aRegions.split(",") + for region in awsRegionsList: + awsRegions[region] = {} + ## If no region was specified, we're defaulting to "All" + elif aRegions == "All": + print("Region parameter was not passed, fetching all available AWS Regions") + awsRegionsList = getRegions() + for region in awsRegionsList: + awsRegions[region] = {} + ## Falling back to a single specified region + else: + if args.debug == True: + print("Following AWS region will be scraped for data: ") + awsRegionsList.append(aRegions) + print(str(awsRegionsList)) + for region in awsRegionsList: + awsRegions[region] = {} + + print("Loading metrics definition file located at " + str(args.metricsfile)) + + # Getting AccountId + awsAccountID = getAccountID() + print("Exporter configured to calculate metrics on : " + str(awsAccountID)) + + ## Setting initial sessions, per region + for region in awsRegionsList: + awsRegions[region]["clientSession"] = {} + awsRegions[region]["clientSession"]["ec2"] = boto3.client( + "ec2", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + awsRegions[region]["clientSession"]["service-quotas"] = boto3.client( + "service-quotas", + aws_access_key_id=args.apikey, + aws_secret_access_key=args.secretkey, + region_name=region, + ) + + # Loading up metrics configuration + promMetrics = benedict(args.metricsfile, format="yaml") + if args.debug == True: + print("Metric configuration: ") + print(str(promMetrics)) + + # Initializing Prometheus Gauge metrics + for metric in promMetrics["values"]: + if args.debug == True: + print("Creating metric for " + metric["quotaCode"] + " quota code") + metric["mObjectUsage"] = Gauge( + metric["metricNameUsage"], metric["usageDesc"], ["region", "accountid"] + ) + metric["mObjectQuota"] = Gauge( + metric["metricNameQuota"], metric["quotaDesc"], ["region", "accountid"] + ) + + ## Setting up Counter metrics to track AWS API call failures + # Setting variables + apiCallFailureMetricObjectID = "apiCallFailure" + apiCallFailureMetricName = "aws_api_failed_requests" + apiCallFailureMetricDesc = "Counter set on failed AWS API calls" + apiCallSuccessMetricObjectID = "apiCallSuccess" + apiCallSuccessMetricName = "aws_api_success_requests" + apiCallSuccessMetricDesc = "Counter set on succesfull AWS API calls" + # Initializing metrics + apiCallFails = Counter(apiCallFailureMetricName, apiCallFailureMetricDesc) + apiCallSuccess = Counter(apiCallSuccessMetricName, apiCallSuccessMetricDesc) + + # Resetting counters + apiCallFails.inc(0) + apiCallSuccess.inc(0) + + ## Initializing HTTP /metrics endpoint for Prometheus metrics + start_http_server(args.port) + print("Started AWS Service Quota Exporter listening on port: " + str(args.port)) + + # Variables controlling the flow on main loop + initialRequestsCounter = 0 + warmUpPeriod = 1 + requestDelay = 0.5 + requestCounterHardStop = 8196 + + if args.debug == True: + print("Total of ServiceQuotas Metric/Label set to be calculated: " + +str(len(awsRegionsList) * len(promMetrics["values"]))) + + ## Main loop, going through the regions and setting current metrics values for both value and usage + while True: + for region in awsRegionsList: + # Looping through metrics definitions: + for metric in promMetrics["values"]: + try: + quotaValue = getQuotaValue( + metric["quotaCode"], + metric["serviceCode"], + awsRegions[region]["clientSession"], + ) + apiCallSuccess.inc() + metric["mObjectQuota"].labels(region=region, accountid=awsAccountID).set(quotaValue) + except botocore.exceptions.EndpointConnectionError as error: + apiCallFails.inc() + print(str(error)) + except botocore.exceptions.ClientError as error: + apiCallFails.inc() + print(str(error)) + try: + usage = getUsage( + awsRegions[region]["clientSession"], + metric["usageRetrieval"], + metric["usageFilter"], + ) + apiCallSuccess.inc() + metric["mObjectUsage"].labels(region=region, accountid=awsAccountID).set(usage) + except botocore.exceptions.EndpointConnectionError as error: + apiCallFails.inc() + print(str(error)) + except botocore.exceptions.ClientError as error: + apiCallFails.inc() + print(str(error)) + + ## Initial Requests are executed quicker to ensure we got all values in metrics + #initialRequestsCounter = initialRequestsCounter + 1 + # Check if we completed initial run + # If so throttle down to delay value specified in command line + + if ( + initialRequestsCounter >= (len(awsRegionsList) * len(promMetrics["values"])) + and initialRequestsCounter != requestCounterHardStop): + + if args.debug == True: + print("Warmup completed after " + str(initialRequestsCounter) + ", throttling down") + requestDelay = args.time + warmUpPeriod = 0 + initialRequestsCounter = requestCounterHardStop + + if warmUpPeriod == 1: + initialRequestsCounter = initialRequestsCounter + 1 + + if args.debug == True: + print( + "Last obtained AWS Quota Value for " + + str(metric["mObjectQuota"]) + + " on " + + str(region) + + " is:" + ) + print(str(quotaValue)) + print( + "Last obtained AWS resource usage for " + + str(metric["mObjectUsage"]) + + " on " + + str(region) + + " is:" + ) + print(str(usage)) + ## Hardcoded sleep to ensure we don't choke on AWS API + time.sleep(0.5) + time.sleep(requestDelay) +exit() diff --git a/exporters/aws-sq-exporter/version.json b/exporters/aws-sq-exporter/version.json new file mode 100644 index 0000000..1159bb1 --- /dev/null +++ b/exporters/aws-sq-exporter/version.json @@ -0,0 +1,3 @@ +{ + "version": "v0.0.1" +}