From bb06d9b0f78d42ffee5b5d804719d31d0ea4f330 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Mon, 13 Oct 2025 15:00:19 -0500 Subject: [PATCH 01/22] feat: initial, untested implementation --- serverless.example.yml | 4 + src/lambdas/api/app.js | 89 +++++++++++ src/lib/api.js | 77 +++++++-- src/lib/asset-proxy.js | 344 +++++++++++++++++++++++++++++++++++++++++ src/lib/ingest.js | 7 +- 5 files changed, 506 insertions(+), 15 deletions(-) create mode 100644 src/lib/asset-proxy.js diff --git a/serverless.example.yml b/serverless.example.yml index ffa96de1..6960c3f2 100644 --- a/serverless.example.yml +++ b/serverless.example.yml @@ -34,6 +34,10 @@ provider: STAC_API_URL: "https://some-stac-server.example.com" CORS_ORIGIN: "https://ui.example.com" CORS_CREDENTIALS: true + # Asset Proxy Configuration + # ASSET_PROXY_BUCKET_OPTION: "NONE" # Options: NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST + # ASSET_PROXY_BUCKET_LIST: "bucket1,bucket2,bucket3" # Required only when ASSET_PROXY_BUCKET_OPTION is LIST + # ASSET_PROXY_URL_EXPIRY: 300 # Pre-signed URL expiry in seconds (default: 300) iam: role: statements: diff --git a/src/lambdas/api/app.js b/src/lambdas/api/app.js index 489b1c4e..cd15207e 100644 --- a/src/lambdas/api/app.js +++ b/src/lambdas/api/app.js @@ -11,6 +11,14 @@ import { NotFoundError, ValidationError } from '../../lib/errors.js' import { readFile } from '../../lib/fs.js' import addEndpoint from './middleware/add-endpoint.js' import logger from '../../lib/logger.js' +import { + getCachedProxyConfig, + parseS3Url, + shouldProxyAssets, + generatePresignedUrl, + determineS3Region, + getProxyConfig, +} from '../../lib/asset-proxy.js' /** * @typedef {import('express').Request} Request @@ -19,6 +27,9 @@ import logger from '../../lib/logger.js' * @typedef {import('express').ErrorRequestHandler} ErrorRequestHandler */ +// Initialize asset proxy configuration at startup +await getProxyConfig() + const txnEnabled = process.env['ENABLE_TRANSACTIONS_EXTENSION'] === 'true' export const app = express() @@ -452,6 +463,81 @@ app.get('/collections/:collectionId/items/:itemId/thumbnail', async (req, res, n } }) +/** + * Redirects a request for a proxied asset to a presigned S3 URL + * @param {Request} req - Express request + * @param {Response} res - Express response + * @param {NextFunction} next - Express next function + * @returns {Promise} Resolves when done + */ +const redirectProxiedAssetRequest = async (req, res, next) => { + logger.debug('Asset proxy request', { params: req.params }) + try { + const proxyConfig = getCachedProxyConfig() + if (!proxyConfig.enabled) { + return next(createError(403)) + } + + const { collectionId, itemId, assetKey } = req.params + const itemOrCollection = itemId // itemId is only defined for item assets + ? await api.getItem(database, collectionId, itemId, req.endpoint, req.query, req.headers) + : await api.getCollection(database, collectionId, req.endpoint, req.query, req.headers) + if (itemOrCollection instanceof NotFoundError) { + return next(createError(404)) + } + if (itemOrCollection instanceof Error) { + return next(createError(500)) + } + + // @ts-ignore - assetKey guaranteed by Express route + const asset = itemOrCollection.assets?.[assetKey] || null + if (!asset) { + return next(createError(404)) + } + + const alternateHref = asset.alternate?.s3?.href || null + if (!alternateHref) { + return next(createError(404)) + } + + const s3Info = parseS3Url(alternateHref) + if (!s3Info) { + return next(createError(500, 'Asset S3 href is invalid')) + } + + if (!shouldProxyAssets(s3Info.bucket, proxyConfig)) { + return next(createError(403)) + } + + let region = null + if (s3Info.region) { + region = s3Info.region + } else { + region = determineS3Region(asset, itemOrCollection) + } + + const presignedUrl = await generatePresignedUrl( + s3Info.bucket, + s3Info.key, + region, + proxyConfig.urlExpiry + ) + + return res.redirect(presignedUrl) + } catch (error) { + return next(error) + } +} + +app.get('/collections/:collectionId/items/:itemId/assets/:assetKey', + async (req, res, next) => { + await redirectProxiedAssetRequest(req, res, next) + }) + +app.get('/collections/:collectionId/assets/:assetKey', async (req, res, next) => { + await redirectProxiedAssetRequest(req, res, next) +}) + // catch 404 and forward to error handler app.use((_req, _res, next) => { next(createError(404)) @@ -469,6 +555,9 @@ app.use( case 400: res.json({ code: 'BadRequest', description: err.message }) break + case 403: + res.json({ code: 'Forbidden', description: 'Forbidden' }) + break case 404: res.json({ code: 'NotFound', description: 'Not Found' }) break diff --git a/src/lib/api.js b/src/lib/api.js index 398769ec..9c059286 100644 --- a/src/lib/api.js +++ b/src/lib/api.js @@ -6,6 +6,10 @@ import { NotFoundError, ValidationError } from './errors.js' import { isIndexNotFoundError } from './database.js' import logger from './logger.js' import { bboxToPolygon } from './geo-utils.js' +import { + getCachedProxyConfig, + proxyAssets, +} from './asset-proxy.js' // max number of collections to retrieve const COLLECTION_LIMIT = process.env['STAC_SERVER_COLLECTION_LIMIT'] || 100 @@ -51,6 +55,8 @@ const ALL_AGGREGATION_NAMES = DEFAULT_AGGREGATIONS.map((x) => x.name).concat( ] ) +const ALTERNATE_ASSETS_EXTENSION = 'https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json' + export const extractIntersects = function (params) { let intersectsGeometry const { intersects } = params @@ -555,6 +561,45 @@ export const addItemLinks = function (results, endpoint) { return results } +// Impure - mutates results +export const proxyStacObjectAssets = function (results, endpoint) { + const proxyConfig = getCachedProxyConfig() + if (!proxyConfig.enabled) { + return results + } + + results.forEach((result) => { + if (!result.assets || typeof result.assets !== 'object') { + return + } + + const itemId = result.collection ? result.id : null + const collectionId = result.collection ? result.collection : result.id + + const { assets, wasProxied } = proxyAssets( + result.assets, + endpoint, + collectionId, + itemId, + proxyConfig + ) + + if (wasProxied) { + result.assets = assets + + if (!result.stac_extensions) { + result.stac_extensions = [] + } + + if (!result.stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) { + result.stac_extensions.push(ALTERNATE_ASSETS_EXTENSION) + } + } + }) + + return results +} + const wrapResponseInFeatureCollection = function (features, links, numberMatched, numberReturned, limit) { const fc = { @@ -758,8 +803,10 @@ const searchItems = async function ( }) } - const items = addItemLinks(responseItems, endpoint) - return wrapResponseInFeatureCollection(items, links, numberMatched, numberReturned, limit) + addItemLinks(responseItems, endpoint) + proxyStacObjectAssets(responseItems, endpoint) + + return wrapResponseInFeatureCollection(responseItems, links, numberMatched, numberReturned, limit) } const agg = function (esAggs, name, dataType) { @@ -1270,7 +1317,9 @@ const getCollections = async function (backend, endpoint, parameters, headers) { deleteUnusedFields(collection) } - const linkedCollections = addCollectionLinks(collections, endpoint) + addCollectionLinks(collections, endpoint) + proxyStacObjectAssets(collections, endpoint) + const resp = { collections, links: [ @@ -1294,8 +1343,8 @@ const getCollections = async function (backend, endpoint, parameters, headers) { resp['context'] = { page: 1, limit: COLLECTION_LIMIT, - matched: linkedCollections && linkedCollections.length, - returned: linkedCollections && linkedCollections.length + matched: collections && collections.length, + returned: collections && collections.length } } return resp @@ -1312,12 +1361,10 @@ const getCollection = async function (backend, collectionId, endpoint, parameter } deleteUnusedFields(result) + addCollectionLinks([result], endpoint) + proxyStacObjectAssets([result], endpoint) - const col = addCollectionLinks([result], endpoint) - if (col.length > 0) { - return col[0] - } - return new Error('Collection retrieval failed') + return result } const createCollection = async function (backend, collection) { @@ -1343,7 +1390,10 @@ const getItem = async function (backend, collectionId, itemId, endpoint, params, const { results } = await backend.search(itemQuery, 1) - const [it] = addItemLinks(results, endpoint) + addItemLinks(results, endpoint) + proxyStacObjectAssets(results, endpoint) + + const [it] = results if (it) { return it } @@ -1355,7 +1405,9 @@ const partialUpdateItem = async function (backend, const response = await backend.partialUpdateItem(collectionId, itemId, parameters) logger.debug('Partial Update Item: %j', response) if (response) { - return addItemLinks([response.body.get._source], endpoint)[0] + const items = addItemLinks([response.body.get._source], endpoint) + proxyStacObjectAssets(items, endpoint) + return items[0] } return new Error(`Error partially updating item ${itemId}`) } @@ -1479,4 +1531,5 @@ export default { getCollectionQueryables, getGlobalAggregations, getCollectionAggregations, + proxyStacObjectAssets, } diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js new file mode 100644 index 00000000..93273c41 --- /dev/null +++ b/src/lib/asset-proxy.js @@ -0,0 +1,344 @@ +import { S3Client, GetObjectCommand, ListBucketsCommand } from '@aws-sdk/client-s3' +import { getSignedUrl } from '@aws-sdk/s3-request-presigner' +import logger from './logger.js' + +export const BucketOption = Object.freeze({ + NONE: 'NONE', + ALL: 'ALL', + ALL_BUCKETS_IN_ACCOUNT: 'ALL_BUCKETS_IN_ACCOUNT', + LIST: 'LIST' +}) + +// Cached configuration - initialized once at startup +let cachedProxyConfig = null + +// Cached S3 clients by region to avoid creating new clients on each request +const s3ClientCache = new Map() + +/** + * Get or create an S3Client for a specific region + * @param {string} region - AWS region + * @returns {S3Client} Cached or new S3 client + */ +const getS3Client = (region) => { + if (s3ClientCache.has(region)) { + return s3ClientCache.get(region) + } + + const client = new S3Client({ region }) + s3ClientCache.set(region, client) + return client +} + +/** + * Fetch all bucket names in the AWS account + * This is called once during configuration initialization if mode is ALL_BUCKETS_IN_ACCOUNT + * @returns {Promise>} Set of bucket names + */ +const fetchAllBucketsInAccount = async () => { + try { + const region = process.env['AWS_REGION'] || 'us-west-2' + const client = getS3Client(region) + const command = new ListBucketsCommand({}) + const response = await client.send(command) + + const bucketNames = response.Buckets?.map((b) => b.Name) + ?.filter((name) => typeof name === 'string') || [] + const buckets = new Set(bucketNames) + logger.info(`Fetched ${buckets.size} buckets from AWS account for asset proxy`) + return buckets + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error) + logger.error('Failed to fetch buckets from AWS account', { error: errorMessage }) + throw new Error(`Failed to fetch buckets for asset proxy: ${errorMessage}`) + } +} + +/** + * Load and cache proxy configuration from environment variables + * This function is called once at app startup and the result is cached. + * @returns {Promise} Configuration object + */ +export const getProxyConfig = async () => { + // Return cached config if already loaded + if (cachedProxyConfig) { + return cachedProxyConfig + } + + const bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || BucketOption.NONE + const bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] || '' + const urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) + + if (bucketOption === BucketOption.NONE) { + cachedProxyConfig = { + enabled: false, + mode: BucketOption.NONE, + buckets: new Set(), + urlExpiry + } + } else if (bucketOption === BucketOption.ALL) { + cachedProxyConfig = { + enabled: true, + mode: BucketOption.ALL, + buckets: new Set(), + urlExpiry + } + } else if (bucketOption === BucketOption.ALL_BUCKETS_IN_ACCOUNT) { + const buckets = await fetchAllBucketsInAccount() + cachedProxyConfig = { + enabled: true, + mode: BucketOption.ALL_BUCKETS_IN_ACCOUNT, + buckets, + urlExpiry + } + } else if (bucketOption === BucketOption.LIST) { + const buckets = bucketList.split(',').map((b) => b.trim()).filter((b) => b) + cachedProxyConfig = { + enabled: true, + mode: BucketOption.LIST, + buckets: new Set(buckets), + urlExpiry + } + } else { + const validOptions = Object.values(BucketOption).join(', ') + throw new Error( + `Invalid ASSET_PROXY_BUCKET_OPTION: ${bucketOption}. Must be one of: ${validOptions}` + ) + } + + logger.debug('Asset proxy configuration loaded', { + mode: cachedProxyConfig.mode, + enabled: cachedProxyConfig.enabled, + bucketCount: cachedProxyConfig.buckets.size, + urlExpiry: cachedProxyConfig.urlExpiry + }) + + return cachedProxyConfig +} + +/** + * Get the cached proxy configuration synchronously + * @returns {Object} Cached configuration object + */ +export const getCachedProxyConfig = () => { + if (!cachedProxyConfig) { + throw new Error('Asset proxy config not initialized. Call getProxyConfig() at startup.') + } + return cachedProxyConfig +} + +/** + * Parse S3 URL (URI or HTTPS) into components + * Supports: + * - s3://bucket/key + * - https://bucket.s3.amazonaws.com/key + * - https://bucket.s3.region.amazonaws.com/key + * - https://s3.amazonaws.com/bucket/key + * - https://s3.region.amazonaws.com/bucket/key + * + * @param {string} url - S3 URL to parse + * @returns {Object|null} {bucket, key, region} or null if not a valid S3 URL + */ +export const parseS3Url = (url) => { + if (!url || typeof url !== 'string') { + return null + } + + // S3 URI format: s3://bucket/key + if (url.startsWith('s3://')) { + const withoutProtocol = url.substring(5) + const firstSlash = withoutProtocol.indexOf('/') + + if (firstSlash === -1) { + return null // No key provided + } + + const bucket = withoutProtocol.substring(0, firstSlash) + const key = withoutProtocol.substring(firstSlash + 1) + + if (!bucket || !key) { + return null + } + + return { bucket, key, region: null } + } + + // HTTPS URL formats + if (url.startsWith('https://')) { + try { + const urlObj = new URL(url) + const hostname = urlObj.hostname + const pathname = urlObj.pathname + + // Virtual-hosted style: bucket.s3.region.amazonaws.com or bucket.s3.amazonaws.com + const virtualHostMatch = hostname.match(/^([^.]+)\.s3(?:\.([^.]+))?\.amazonaws\.com$/) + if (virtualHostMatch) { + const bucket = virtualHostMatch[1] + const region = virtualHostMatch[2] || null + const key = pathname.startsWith('/') ? pathname.substring(1) : pathname + + if (!key) { + return null + } + + return { bucket, key, region } + } + + // Path style: s3.region.amazonaws.com/bucket/key or s3.amazonaws.com/bucket/key + const pathStyleMatch = hostname.match(/^s3(?:\.([^.]+))?\.amazonaws\.com$/) + if (pathStyleMatch) { + const region = pathStyleMatch[1] || null + const pathParts = pathname.split('/').filter((p) => p) + + if (pathParts.length < 2) { + return null // Need at least bucket and key + } + + const bucket = pathParts[0] + const key = pathParts.slice(1).join('/') + + return { bucket, key, region } + } + } catch (_error) { + // Invalid URL + return null + } + } + + return null +} + +/** + * Determine if a asset hrefs should be proxied + * @param {string} bucket - asset S3 bucket + * @param {Object} proxyConfig - Proxy configuration + * @returns {boolean} True if should be proxied + */ +export const shouldProxyAssets = (bucket, proxyConfig) => { + if (!proxyConfig.enabled) { + return false + } + + if (proxyConfig.mode === BucketOption.ALL) { + return true + } + + // For LIST and ALL_BUCKETS_IN_ACCOUNT modes + return proxyConfig.buckets.has(bucket) +} + +/** + * Generate a pre-signed URL for S3 object access + * Uses cached S3 clients per region for better performance. + * + * @param {string} bucket - S3 bucket name + * @param {string} key - S3 object key + * @param {string} region - AWS region + * @param {number} expirySeconds - URL expiry time in seconds + * @returns {Promise} Pre-signed URL + */ +export const generatePresignedUrl = async (bucket, key, region, expirySeconds) => { + const client = getS3Client(region) + + const command = new GetObjectCommand({ + Bucket: bucket, + Key: key, + RequestPayer: 'requester' + }) + + const presignedUrl = await getSignedUrl(client, command, { + expiresIn: expirySeconds + }) + + logger.debug('Generated pre-signed URL for asset', { + bucket, + key, + region, + expirySeconds, + }) + + return presignedUrl +} + +/** + * Proxy asset hrefs and add original href as alternate + * @param {Object} assets - Assets object + * @param {string} endpoint - API endpoint base URL + * @param {string} collectionId - Collection ID + * @param {string|null} itemId - Item ID (null for collection assets) + * @param {Object} proxyConfig - Proxy configuration + * @returns {Object} {assets: Proxied assets object, wasProxied: boolean} + */ +export const proxyAssets = (assets, endpoint, collectionId, itemId, proxyConfig) => { + const ProxiedAssets = {} + let wasProxied = false + + for (const [assetKey, asset] of Object.entries(assets)) { + if (!asset || !asset.href) { + ProxiedAssets[assetKey] = asset + // eslint-disable-next-line no-continue + continue + } + + const s3Info = parseS3Url(asset.href) + if (!s3Info || !shouldProxyAssets(s3Info.bucket, proxyConfig)) { + ProxiedAssets[assetKey] = asset + // eslint-disable-next-line no-continue + continue + } + + wasProxied = true + + const proxyHref = itemId + ? `${endpoint}/collections/${collectionId}/items/${itemId}/assets/${assetKey}` + : `${endpoint}/collections/${collectionId}/assets/${assetKey}` + + ProxiedAssets[assetKey] = { + ...asset, + href: proxyHref, + alternate: { + ...(asset.alternate || {}), + s3: { + href: asset.href + } + } + } + } + + return { assets: ProxiedAssets, wasProxied } +} + +/** + * Determine S3 region STAC Storage Extension, if it exists + * @param {Object} asset - Asset object + * @param {Object} itemOrCollection - Item or Collection object + * @returns {string} AWS region + */ +export const determineS3Region = (asset, itemOrCollection) => { + // Storage Extension v1 + const v1Region = asset['storage:region'] || itemOrCollection.properties?.['storage:region'] + if (v1Region) { + return v1Region + } + + // Storage Extension v2 + const storageSchemes = itemOrCollection.properties?.['storage:schemes'] + || itemOrCollection['storage:schemes'] + const v2Region = storageSchemes?.[asset['storage:refs']]?.region + if (v2Region) { + return v2Region + } + + // Default to environment or us-west-2 + return process.env['AWS_REGION'] || 'us-west-2' +} + +export default { + getProxyConfig, + getCachedProxyConfig, + parseS3Url, + shouldProxyAssets, + generatePresignedUrl, + proxyAssets, + determineS3Region, +} diff --git a/src/lib/ingest.js b/src/lib/ingest.js index 70e1d69e..487af4af 100644 --- a/src/lib/ingest.js +++ b/src/lib/ingest.js @@ -1,5 +1,5 @@ import { getItemCreated } from './database.js' -import { addItemLinks, addCollectionLinks } from './api.js' +import { addItemLinks, addCollectionLinks, proxyStacObjectAssets } from './api.js' import { dbClient, createIndex } from './database-client.js' import logger from './logger.js' import { publishRecordToSns } from './sns.js' @@ -164,7 +164,7 @@ export async function processMessages(msgs) { /* eslint-enable no-await-in-loop */ // Impure - mutates record -function updateLinksWithinRecord(record) { +function updateLinksAndHrefsWithinRecord(record) { const endpoint = process.env['STAC_API_URL'] if (!endpoint) { logger.info('STAC_API_URL not set, not updating links within ingested record') @@ -179,6 +179,7 @@ function updateLinksWithinRecord(record) { } else if (isCollection(record)) { addCollectionLinks([record], endpoint) } + proxyStacObjectAssets([record], endpoint) return record } @@ -186,7 +187,7 @@ export async function publishResultsToSns(results, topicArn) { await Promise.allSettled(results.map(async (result) => { if (isStacEntity(result.record)) { if (result.record && !result.error) { - updateLinksWithinRecord(result.record) + updateLinksAndHrefsWithinRecord(result.record) } await publishRecordToSns(topicArn, result.record, result.error) } From 83cdb908074bc51b8ffc8e2bc155c4acc886d682 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Tue, 14 Oct 2025 06:48:14 -0500 Subject: [PATCH 02/22] tests: WIP, not all running --- src/lambdas/api/app.js | 895 ++++++++++++++------------- src/lambdas/api/index.js | 10 +- src/lambdas/api/local.ts | 3 +- src/lib/asset-proxy.js | 72 ++- src/lib/database-client.js | 2 +- tests/helpers/api.js | 3 +- tests/system/test-api-asset-proxy.js | 147 +++++ tests/system/test-api-item-get.js | 24 + tests/system/test-api-search-post.js | 27 + tests/system/test-ingest.js | 35 ++ tests/unit/test-asset-proxy.js | 315 ++++++++++ 11 files changed, 1050 insertions(+), 483 deletions(-) create mode 100644 tests/system/test-api-asset-proxy.js create mode 100644 tests/unit/test-asset-proxy.js diff --git a/src/lambdas/api/app.js b/src/lambdas/api/app.js index cd15207e..40a3ca3a 100644 --- a/src/lambdas/api/app.js +++ b/src/lambdas/api/app.js @@ -17,7 +17,7 @@ import { shouldProxyAssets, generatePresignedUrl, determineS3Region, - getProxyConfig, + initProxyConfig, } from '../../lib/asset-proxy.js' /** @@ -27,243 +27,201 @@ import { * @typedef {import('express').ErrorRequestHandler} ErrorRequestHandler */ -// Initialize asset proxy configuration at startup -await getProxyConfig() - -const txnEnabled = process.env['ENABLE_TRANSACTIONS_EXTENSION'] === 'true' - -export const app = express() - -if (process.env['REQUEST_LOGGING_ENABLED'] !== 'false') { - app.use( - [ - // Setting `immediate: true` allows us to log at request start - // in case the lambda times out it's helpful to have the request ID - // Using console out will allow us to capture the request ID from lambda - morgan('Request Start - :method :url', - { immediate: true, stream: { write: (message) => console.info(`${message}`) } }), - // Logs at the end of the request - // Using console out will allow us to capture the request ID from lambda - morgan(process.env['REQUEST_LOGGING_FORMAT'] || 'tiny', - { stream: { write: (message) => console.info(message) } }) - ] - ) -} - -app.use(cors({ - origin: process.env['CORS_ORIGIN'] || '*', - credentials: process.env['CORS_CREDENTIALS'] === 'true', - methods: process.env['CORS_METHODS'] || 'GET,HEAD,PUT,PATCH,POST,DELETE', // default - allowedHeaders: process.env['CORS_HEADERS'] || '', -})) - -app.use(express.json({ limit: '1mb' })) +export const createApp = async () => { + await initProxyConfig() + + const txnEnabled = process.env['ENABLE_TRANSACTIONS_EXTENSION'] === 'true' + + const app = express() + + if (process.env['REQUEST_LOGGING_ENABLED'] !== 'false') { + app.use( + [ + // Setting `immediate: true` allows us to log at request start + // in case the lambda times out it's helpful to have the request ID + // Using console out will allow us to capture the request ID from lambda + morgan('Request Start - :method :url', + { immediate: true, stream: { write: (message) => console.info(`${message}`) } }), + // Logs at the end of the request + // Using console out will allow us to capture the request ID from lambda + morgan(process.env['REQUEST_LOGGING_FORMAT'] || 'tiny', + { stream: { write: (message) => console.info(message) } }) + ] + ) + } -if (process.env['ENABLE_RESPONSE_COMPRESSION'] !== 'false') { - app.use(compression()) -} + app.use(cors({ + origin: process.env['CORS_ORIGIN'] || '*', + credentials: process.env['CORS_CREDENTIALS'] === 'true', + methods: process.env['CORS_METHODS'] || 'GET,HEAD,PUT,PATCH,POST,DELETE', // default + allowedHeaders: process.env['CORS_HEADERS'] || '', + })) -app.use(addEndpoint) + app.use(express.json({ limit: '1mb' })) -app.get('/', async (req, res, next) => { - try { - const response = await api.getCatalog(txnEnabled, req.endpoint) - if (response instanceof Error) next(createError(500, response.message)) - else res.json(response) - } catch (error) { - next(error) + if (process.env['ENABLE_RESPONSE_COMPRESSION'] !== 'false') { + app.use(compression()) } -}) -app.get('/healthcheck', async (_req, res, next) => { - try { - res.json(await api.healthCheck(database)) - } catch (error) { - next(error) - } -}) + app.use(addEndpoint) + + app.get('/', async (req, res, next) => { + try { + const response = await api.getCatalog(txnEnabled, req.endpoint) + if (response instanceof Error) next(createError(500, response.message)) + else res.json(response) + } catch (error) { + next(error) + } + }) -const pathName = process.env['LAMBDA_TASK_ROOT'] - ? process.env['LAMBDA_TASK_ROOT'] : path.dirname(fileURLToPath(import.meta.url)) + app.get('/healthcheck', async (_req, res, next) => { + try { + res.json(await api.healthCheck(database)) + } catch (error) { + next(error) + } + }) -app.get('/api', async (_req, res, next) => { - try { - res.type('application/vnd.oai.openapi') - res.download(path.resolve(pathName, 'openapi.yaml')) - } catch (error) { - next(error) - } -}) - -app.get('/api.html', async (_req, res, next) => { - try { - res.type('text/html') - res.send(await readFile(path.resolve(pathName, 'redoc.html'), 'utf8')) - } catch (error) { - next(error) - } -}) + const pathName = process.env['LAMBDA_TASK_ROOT'] + ? process.env['LAMBDA_TASK_ROOT'] : path.dirname(fileURLToPath(import.meta.url)) -app.get('/conformance', async (_req, res, next) => { - try { - res.json(await api.getConformance(txnEnabled)) - } catch (error) { - next(error) - } -}) - -app.get('/queryables', async (req, res, next) => { - try { - res.type('application/schema+json') - res.json(await api.getGlobalQueryables(req.endpoint)) - } catch (error) { - next(error) - } -}) - -app.get('/search', async (req, res, next) => { - try { - res.type('application/geo+json') - res.json(await api.searchItems(database, 'GET', null, req.endpoint, req.query, req.headers)) - } catch (error) { - if (error instanceof ValidationError) { - next(createError(400, error.message)) - } else { + app.get('/api', async (_req, res, next) => { + try { + res.type('application/vnd.oai.openapi') + res.download(path.resolve(pathName, 'openapi.yaml')) + } catch (error) { next(error) } - } -}) - -app.post('/search', async (req, res, next) => { - try { - res.type('application/geo+json') - res.json(await api.searchItems(database, 'POST', null, req.endpoint, req.body, req.headers)) - } catch (error) { - if (error instanceof ValidationError) { - next(createError(400, error.message)) - } else { + }) + + app.get('/api.html', async (_req, res, next) => { + try { + res.type('text/html') + res.send(await readFile(path.resolve(pathName, 'redoc.html'), 'utf8')) + } catch (error) { next(error) } - } -}) - -app.get('/aggregate', async (req, res, next) => { - try { - res.json(await api.aggregate(database, 'GET', null, req.endpoint, req.query, req.headers)) - } catch (error) { - if (error instanceof ValidationError) { - next(createError(400, error.message)) - } else { + }) + + app.get('/conformance', async (_req, res, next) => { + try { + res.json(await api.getConformance(txnEnabled)) + } catch (error) { next(error) } - } -}) + }) -app.get('/aggregations', async (req, res, next) => { - try { - res.json(await api.getGlobalAggregations(req.endpoint)) - } catch (error) { - next(error) - } -}) - -app.get('/collections', async (req, res, next) => { - try { - const response = await api.getCollections(database, req.endpoint, req.query, req.headers) - if (response instanceof Error) next(createError(500, response.message)) - else res.json(response) - } catch (error) { - next(error) - } -}) + app.get('/queryables', async (req, res, next) => { + try { + res.type('application/schema+json') + res.json(await api.getGlobalQueryables(req.endpoint)) + } catch (error) { + next(error) + } + }) -app.post('/collections', async (req, res, next) => { - if (txnEnabled) { - const collectionId = req.body.collection + app.get('/search', async (req, res, next) => { try { - await api.createCollection(database, req.body) - res.location(`${req.endpoint}/collections/${collectionId}`) - res.sendStatus(201) + res.type('application/geo+json') + res.json(await api.searchItems(database, 'GET', null, req.endpoint, req.query, req.headers)) } catch (error) { - if (error instanceof Error - && error.name === 'ResponseError' - && error.message.includes('version_conflict_engine_exception')) { - res.sendStatus(409) + if (error instanceof ValidationError) { + next(createError(400, error.message)) } else { next(error) } } - } else { - next(createError(404)) - } -}) + }) -app.get('/collections/:collectionId', async (req, res, next) => { - const { collectionId } = req.params - try { - const response = await api.getCollection( - database, collectionId, req.endpoint, req.query, req.headers - ) - if (response instanceof Error) next(createError(404)) - else res.json(response) - } catch (error) { - next(error) - } -}) + app.post('/search', async (req, res, next) => { + try { + res.type('application/geo+json') + res.json(await api.searchItems(database, 'POST', null, req.endpoint, req.body, req.headers)) + } catch (error) { + if (error instanceof ValidationError) { + next(createError(400, error.message)) + } else { + next(error) + } + } + }) -app.get('/collections/:collectionId/queryables', async (req, res, next) => { - const { collectionId } = req.params - try { - const queryables = await api.getCollectionQueryables( - database, collectionId, req.endpoint, req.query, req.headers - ) + app.get('/aggregate', async (req, res, next) => { + try { + res.json(await api.aggregate(database, 'GET', null, req.endpoint, req.query, req.headers)) + } catch (error) { + if (error instanceof ValidationError) { + next(createError(400, error.message)) + } else { + next(error) + } + } + }) - if (queryables instanceof Error) next(createError(404)) - else { - res.type('application/schema+json') - res.json(queryables) + app.get('/aggregations', async (req, res, next) => { + try { + res.json(await api.getGlobalAggregations(req.endpoint)) + } catch (error) { + next(error) } - } catch (error) { - if (error instanceof ValidationError) { - next(createError(400, error.message)) - } else { + }) + + app.get('/collections', async (req, res, next) => { + try { + const response = await api.getCollections(database, req.endpoint, req.query, req.headers) + if (response instanceof Error) next(createError(500, response.message)) + else res.json(response) + } catch (error) { next(error) } - } -}) + }) -app.get('/collections/:collectionId/aggregations', async (req, res, next) => { - const { collectionId } = req.params - try { - const aggs = await api.getCollectionAggregations( - database, collectionId, req.endpoint, req.query, req.headers - ) - if (aggs instanceof Error) next(createError(404)) - else res.json(aggs) - } catch (error) { - if (error instanceof ValidationError) { - next(createError(400, error.message)) + app.post('/collections', async (req, res, next) => { + if (txnEnabled) { + const collectionId = req.body.collection + try { + await api.createCollection(database, req.body) + res.location(`${req.endpoint}/collections/${collectionId}`) + res.sendStatus(201) + } catch (error) { + if (error instanceof Error + && error.name === 'ResponseError' + && error.message.includes('version_conflict_engine_exception')) { + res.sendStatus(409) + } else { + next(error) + } + } } else { - next(error) + next(createError(404)) } - } -}) + }) -app.get('/collections/:collectionId/aggregate', - async (req, res, next) => { + app.get('/collections/:collectionId', async (req, res, next) => { const { collectionId } = req.params try { const response = await api.getCollection( database, collectionId, req.endpoint, req.query, req.headers ) - if (response instanceof Error) next(createError(404)) + else res.json(response) + } catch (error) { + next(error) + } + }) + + app.get('/collections/:collectionId/queryables', async (req, res, next) => { + const { collectionId } = req.params + try { + const queryables = await api.getCollectionQueryables( + database, collectionId, req.endpoint, req.query, req.headers + ) + + if (queryables instanceof Error) next(createError(404)) else { - res.json( - await api.aggregate( - database, 'GET', collectionId, req.endpoint, req.query, req.headers - ) - ) + res.type('application/schema+json') + res.json(queryables) } } catch (error) { if (error instanceof ValidationError) { @@ -274,299 +232,344 @@ app.get('/collections/:collectionId/aggregate', } }) -app.get('/collections/:collectionId/items', async (req, res, next) => { - const { collectionId } = req.params - try { - if ( - (await api.getCollection(database, collectionId, req.endpoint, req.query, req.headers) - ) instanceof Error) { - next(createError(404)) + app.get('/collections/:collectionId/aggregations', async (req, res, next) => { + const { collectionId } = req.params + try { + const aggs = await api.getCollectionAggregations( + database, collectionId, req.endpoint, req.query, req.headers + ) + if (aggs instanceof Error) next(createError(404)) + else res.json(aggs) + } catch (error) { + if (error instanceof ValidationError) { + next(createError(400, error.message)) + } else { + next(error) + } } + }) - res.type('application/geo+json') - res.json( - await api.searchItems(database, 'GET', collectionId, req.endpoint, req.query, req.headers) - ) - } catch (error) { - if (error instanceof ValidationError) { - next(createError(400, error.message)) - } else { - next(error) - } - } -}) + app.get('/collections/:collectionId/aggregate', + async (req, res, next) => { + const { collectionId } = req.params + try { + const response = await api.getCollection( + database, collectionId, req.endpoint, req.query, req.headers + ) + + if (response instanceof Error) next(createError(404)) + else { + res.json( + await api.aggregate( + database, 'GET', collectionId, req.endpoint, req.query, req.headers + ) + ) + } + } catch (error) { + if (error instanceof ValidationError) { + next(createError(400, error.message)) + } else { + next(error) + } + } + }) -app.post('/collections/:collectionId/items', async (req, res, next) => { - if (txnEnabled) { + app.get('/collections/:collectionId/items', async (req, res, next) => { const { collectionId } = req.params - const itemId = req.body.id + try { + if ( + (await api.getCollection(database, collectionId, req.endpoint, req.query, req.headers) + ) instanceof Error) { + next(createError(404)) + } - if (req.body.collection && req.body.collection !== collectionId) { - next(createError(400, 'Collection resource URI must match collection in body')) - } else { - const collectionRes = await api.getCollection( - database, collectionId, req.endpoint, req.query, req.headers + res.type('application/geo+json') + res.json( + await api.searchItems(database, 'GET', collectionId, req.endpoint, req.query, req.headers) ) - if (collectionRes instanceof Error) next(createError(404)) - else { - try { - req.body.collection = collectionId - await api.createItem(database, req.body) - res.location(`${req.endpoint}/collections/${collectionId}/items/${itemId}`) - res.sendStatus(201) - } catch (error) { - if (error instanceof Error - && error.name === 'ResponseError' - && error.message.includes('version_conflict_engine_exception')) { - res.sendStatus(409) - } else { - next(error) + } catch (error) { + if (error instanceof ValidationError) { + next(createError(400, error.message)) + } else { + next(error) + } + } + }) + + app.post('/collections/:collectionId/items', async (req, res, next) => { + if (txnEnabled) { + const { collectionId } = req.params + const itemId = req.body.id + + if (req.body.collection && req.body.collection !== collectionId) { + next(createError(400, 'Collection resource URI must match collection in body')) + } else { + const collectionRes = await api.getCollection( + database, collectionId, req.endpoint, req.query, req.headers + ) + if (collectionRes instanceof Error) next(createError(404)) + else { + try { + req.body.collection = collectionId + await api.createItem(database, req.body) + res.location(`${req.endpoint}/collections/${collectionId}/items/${itemId}`) + res.sendStatus(201) + } catch (error) { + if (error instanceof Error + && error.name === 'ResponseError' + && error.message.includes('version_conflict_engine_exception')) { + res.sendStatus(409) + } else { + next(error) + } } } } + } else { + next(createError(404)) } - } else { - next(createError(404)) - } -}) - -app.get('/collections/:collectionId/items/:itemId', async (req, res, next) => { - try { - const { itemId, collectionId } = req.params - - const response = await api.getItem( - database, - collectionId, - itemId, - req.endpoint, - req.query, - req.headers, - ) + }) - if (response instanceof NotFoundError) { - next(createError(404)) - } else if (response instanceof Error) { - next(createError(500)) - } else { - res.type('application/geo+json') - res.json(response) + app.get('/collections/:collectionId/items/:itemId', async (req, res, next) => { + try { + const { itemId, collectionId } = req.params + + const response = await api.getItem( + database, + collectionId, + itemId, + req.endpoint, + req.query, + req.headers, + ) + + if (response instanceof NotFoundError) { + next(createError(404)) + } else if (response instanceof Error) { + next(createError(500)) + } else { + res.type('application/geo+json') + res.json(response) + } + } catch (error) { + next(error) } - } catch (error) { - next(error) - } -}) + }) -app.put('/collections/:collectionId/items/:itemId', async (req, res, next) => { - if (txnEnabled) { - const { collectionId, itemId } = req.params + app.put('/collections/:collectionId/items/:itemId', async (req, res, next) => { + if (txnEnabled) { + const { collectionId, itemId } = req.params - if (req.body.collection && req.body.collection !== collectionId) { - next(createError(400, 'Collection ID in resource URI must match collection in body')) - } else if (req.body.id && req.body.id !== itemId) { - next(createError(400, 'Item ID in resource URI must match id in body')) - } else { - const itemRes = await api.getItem( - database, collectionId, itemId, req.endpoint, req.query, req.headers - ) + if (req.body.collection && req.body.collection !== collectionId) { + next(createError(400, 'Collection ID in resource URI must match collection in body')) + } else if (req.body.id && req.body.id !== itemId) { + next(createError(400, 'Item ID in resource URI must match id in body')) + } else { + const itemRes = await api.getItem( + database, collectionId, itemId, req.endpoint, req.query, req.headers + ) - if (itemRes instanceof Error) next(createError(404)) - else { - req.body.collection = collectionId - req.body.id = itemId - try { - await api.updateItem(database, req.body) - res.sendStatus(204) - } catch (error) { - if (error instanceof Error - && error.name === 'ResponseError' - && error.message.includes('version_conflict_engine_exception')) { - res.sendStatus(409) - } else { - next(error) + if (itemRes instanceof Error) next(createError(404)) + else { + req.body.collection = collectionId + req.body.id = itemId + try { + await api.updateItem(database, req.body) + res.sendStatus(204) + } catch (error) { + if (error instanceof Error + && error.name === 'ResponseError' + && error.message.includes('version_conflict_engine_exception')) { + res.sendStatus(409) + } else { + next(error) + } } } } + } else { + next(createError(404)) } - } else { - next(createError(404)) - } -}) + }) -app.patch('/collections/:collectionId/items/:itemId', async (req, res, next) => { - if (txnEnabled) { - const { collectionId, itemId } = req.params + app.patch('/collections/:collectionId/items/:itemId', async (req, res, next) => { + if (txnEnabled) { + const { collectionId, itemId } = req.params - if (req.body.collection && req.body.collection !== collectionId) { - next(createError(400, 'Collection ID in resource URI must match collection in body')) - } else if (req.body.id && req.body.id !== itemId) { - next(createError(400, 'Item ID in resource URI must match id in body')) + if (req.body.collection && req.body.collection !== collectionId) { + next(createError(400, 'Collection ID in resource URI must match collection in body')) + } else if (req.body.id && req.body.id !== itemId) { + next(createError(400, 'Item ID in resource URI must match id in body')) + } else { + const itemRes = await api.getItem( + database, collectionId, itemId, req.endpoint, req.query, req.headers + ) + if (itemRes instanceof Error) next(createError(404)) + else { + try { + //const item = + await api.partialUpdateItem(database, + collectionId, + itemId, + req.endpoint, + req.body) + // res.type('application/geo+json') + // res.json(item) + res.sendStatus(204) + } catch (error) { + next(error) + } + } + } } else { - const itemRes = await api.getItem( - database, collectionId, itemId, req.endpoint, req.query, req.headers - ) - if (itemRes instanceof Error) next(createError(404)) - else { - try { - //const item = - await api.partialUpdateItem(database, - collectionId, - itemId, - req.endpoint, - req.body) - // res.type('application/geo+json') - // res.json(item) + next(createError(404)) + } + }) + + app.delete('/collections/:collectionId/items/:itemId', async (req, res, next) => { + if (txnEnabled) { + const { collectionId, itemId } = req.params + try { + const response = await api.deleteItem(database, collectionId, itemId) + if (response instanceof Error) next(createError(500)) + else { res.sendStatus(204) - } catch (error) { - next(error) } + } catch (error) { + next(error) } + } else { + next(createError(404)) } - } else { - next(createError(404)) - } -}) + }) -app.delete('/collections/:collectionId/items/:itemId', async (req, res, next) => { - if (txnEnabled) { - const { collectionId, itemId } = req.params + app.get('/collections/:collectionId/items/:itemId/thumbnail', async (req, res, next) => { try { - const response = await api.deleteItem(database, collectionId, itemId) - if (response instanceof Error) next(createError(500)) - else { - res.sendStatus(204) + const { itemId, collectionId } = req.params + + const response = await api.getItemThumbnail( + database, collectionId, itemId, req.query, req.headers + ) + + if (response instanceof NotFoundError) { + next(createError(404)) + } else if (response instanceof Error) { + next(createError(500)) + } else { + res.redirect(response.location) } } catch (error) { next(error) } - } else { - next(createError(404)) - } -}) - -app.get('/collections/:collectionId/items/:itemId/thumbnail', async (req, res, next) => { - try { - const { itemId, collectionId } = req.params + }) - const response = await api.getItemThumbnail( - database, collectionId, itemId, req.query, req.headers - ) + /** + * Redirects a request for a proxied asset to a presigned S3 URL + * @param {Request} req - Express request + * @param {Response} res - Express response + * @param {NextFunction} next - Express next function + * @returns {Promise} Resolves when done + */ + const redirectProxiedAssetRequest = async (req, res, next) => { + logger.debug('Asset proxy request', { params: req.params }) + try { + const proxyConfig = getCachedProxyConfig() + if (!proxyConfig.enabled) { + return next(createError(403)) + } - if (response instanceof NotFoundError) { - next(createError(404)) - } else if (response instanceof Error) { - next(createError(500)) - } else { - res.redirect(response.location) - } - } catch (error) { - next(error) - } -}) + const { collectionId, itemId, assetKey } = req.params + const itemOrCollection = itemId // itemId is only defined for item assets + ? await api.getItem(database, collectionId, itemId, req.endpoint, req.query, req.headers) + : await api.getCollection(database, collectionId, req.endpoint, req.query, req.headers) + if (itemOrCollection instanceof NotFoundError) { + return next(createError(404)) + } + if (itemOrCollection instanceof Error) { + return next(createError(500)) + } -/** - * Redirects a request for a proxied asset to a presigned S3 URL - * @param {Request} req - Express request - * @param {Response} res - Express response - * @param {NextFunction} next - Express next function - * @returns {Promise} Resolves when done - */ -const redirectProxiedAssetRequest = async (req, res, next) => { - logger.debug('Asset proxy request', { params: req.params }) - try { - const proxyConfig = getCachedProxyConfig() - if (!proxyConfig.enabled) { - return next(createError(403)) - } + // @ts-ignore - assetKey guaranteed by Express route + const asset = itemOrCollection.assets?.[assetKey] || null + if (!asset) { + return next(createError(404)) + } - const { collectionId, itemId, assetKey } = req.params - const itemOrCollection = itemId // itemId is only defined for item assets - ? await api.getItem(database, collectionId, itemId, req.endpoint, req.query, req.headers) - : await api.getCollection(database, collectionId, req.endpoint, req.query, req.headers) - if (itemOrCollection instanceof NotFoundError) { - return next(createError(404)) - } - if (itemOrCollection instanceof Error) { - return next(createError(500)) - } + const alternateHref = asset.alternate?.s3?.href || null + if (!alternateHref) { + return next(createError(404)) + } - // @ts-ignore - assetKey guaranteed by Express route - const asset = itemOrCollection.assets?.[assetKey] || null - if (!asset) { - return next(createError(404)) - } + const s3Info = parseS3Url(alternateHref) + if (!s3Info) { + return next(createError(500, 'Asset S3 href is invalid')) + } - const alternateHref = asset.alternate?.s3?.href || null - if (!alternateHref) { - return next(createError(404)) - } + if (!shouldProxyAssets(s3Info.bucket, proxyConfig)) { + return next(createError(403)) + } - const s3Info = parseS3Url(alternateHref) - if (!s3Info) { - return next(createError(500, 'Asset S3 href is invalid')) - } + let region = null + if (s3Info.region) { + region = s3Info.region + } else { + region = determineS3Region(asset, itemOrCollection) + } - if (!shouldProxyAssets(s3Info.bucket, proxyConfig)) { - return next(createError(403)) - } + const presignedUrl = await generatePresignedUrl( + s3Info.bucket, + s3Info.key, + region, + proxyConfig.urlExpiry + ) - let region = null - if (s3Info.region) { - region = s3Info.region - } else { - region = determineS3Region(asset, itemOrCollection) + return res.redirect(presignedUrl) + } catch (error) { + return next(error) } - - const presignedUrl = await generatePresignedUrl( - s3Info.bucket, - s3Info.key, - region, - proxyConfig.urlExpiry - ) - - return res.redirect(presignedUrl) - } catch (error) { - return next(error) } -} -app.get('/collections/:collectionId/items/:itemId/assets/:assetKey', - async (req, res, next) => { + app.get('/collections/:collectionId/items/:itemId/assets/:assetKey', + async (req, res, next) => { + await redirectProxiedAssetRequest(req, res, next) + }) + + app.get('/collections/:collectionId/assets/:assetKey', async (req, res, next) => { await redirectProxiedAssetRequest(req, res, next) }) -app.get('/collections/:collectionId/assets/:assetKey', async (req, res, next) => { - await redirectProxiedAssetRequest(req, res, next) -}) - -// catch 404 and forward to error handler -app.use((_req, _res, next) => { - next(createError(404)) -}) - -// error handler -app.use( - // eslint-disable-next-line @typescript-eslint/no-unused-vars - /** @type {ErrorRequestHandler} */ ((err, _req, res, _next) => { - res.status(err.status || 500) - - res.type('application/json') - - switch (err.status) { - case 400: - res.json({ code: 'BadRequest', description: err.message }) - break - case 403: - res.json({ code: 'Forbidden', description: 'Forbidden' }) - break - case 404: - res.json({ code: 'NotFound', description: 'Not Found' }) - break - default: - logger.error(err) - res.json({ code: 'InternalServerError', description: err.message }) - break - } + // catch 404 and forward to error handler + app.use((_req, _res, next) => { + next(createError(404)) }) -) -export default { app } + // error handler + app.use( + // eslint-disable-next-line @typescript-eslint/no-unused-vars + /** @type {ErrorRequestHandler} */ ((err, _req, res, _next) => { + res.status(err.status || 500) + + res.type('application/json') + + switch (err.status) { + case 400: + res.json({ code: 'BadRequest', description: err.message }) + break + case 403: + res.json({ code: 'Forbidden', description: 'Forbidden' }) + break + case 404: + res.json({ code: 'NotFound', description: 'Not Found' }) + break + default: + logger.error(err) + res.json({ code: 'InternalServerError', description: err.message }) + break + } + }) + ) + + return app +} + +export default { createApp } diff --git a/src/lambdas/api/index.js b/src/lambdas/api/index.js index 51257777..5874094b 100644 --- a/src/lambdas/api/index.js +++ b/src/lambdas/api/index.js @@ -9,7 +9,7 @@ import { z } from 'zod' import serverless from 'serverless-http' import { Lambda } from '@aws-sdk/client-lambda' -import { app } from './app.js' +import { createApp } from './app.js' import _default from './types.js' import logger from '../../lib/logger.js' @@ -156,13 +156,19 @@ const invokePostHook = async (lambda, postHook, payload) => { return hookResult } +let appInstance = null + /** * @param {APIGatewayProxyEvent} event * @param {Context} context * @returns {Promise} */ const callServerlessApp = async (event, context) => { - const result = await serverless(app)(event, context) + if (!appInstance) { + appInstance = await createApp() + } + + const result = await serverless(appInstance)(event, context) try { return APIGatewayProxyResultSchema.parse(result) diff --git a/src/lambdas/api/local.ts b/src/lambdas/api/local.ts index 6833a06b..c4f518d7 100644 --- a/src/lambdas/api/local.ts +++ b/src/lambdas/api/local.ts @@ -1,5 +1,5 @@ import winston from 'winston' -import { app } from './app.js' +import { createApp } from './app.js' const logger = winston.createLogger({ level: process.env['LOG_LEVEL'] || 'warn', @@ -11,6 +11,7 @@ const logger = winston.createLogger({ const port = 3000 +const app = await createApp() app.listen(port, () => { logger.warn(`stac-server listening on port ${port}`) }) diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index 93273c41..e1ba81ef 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -9,8 +9,8 @@ export const BucketOption = Object.freeze({ LIST: 'LIST' }) -// Cached configuration - initialized once at startup -let cachedProxyConfig = null +// Cached configuration +let proxyConfigCache = null // Cached S3 clients by region to avoid creating new clients on each request const s3ClientCache = new Map() @@ -55,65 +55,76 @@ const fetchAllBucketsInAccount = async () => { } /** - * Load and cache proxy configuration from environment variables - * This function is called once at app startup and the result is cached. + * Initialize asset proxy configuration. + * The config is cached after first initialization. Subsequent calls return the cached value. * @returns {Promise} Configuration object */ -export const getProxyConfig = async () => { - // Return cached config if already loaded - if (cachedProxyConfig) { - return cachedProxyConfig +export const initProxyConfig = async () => { + if (proxyConfigCache) { + return proxyConfigCache } const bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || BucketOption.NONE const bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] || '' const urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) - if (bucketOption === BucketOption.NONE) { - cachedProxyConfig = { + switch (bucketOption) { + case BucketOption.NONE: + proxyConfigCache = { enabled: false, mode: BucketOption.NONE, buckets: new Set(), urlExpiry } - } else if (bucketOption === BucketOption.ALL) { - cachedProxyConfig = { + break + + case BucketOption.ALL: + proxyConfigCache = { enabled: true, mode: BucketOption.ALL, buckets: new Set(), urlExpiry } - } else if (bucketOption === BucketOption.ALL_BUCKETS_IN_ACCOUNT) { + break + + case BucketOption.ALL_BUCKETS_IN_ACCOUNT: { const buckets = await fetchAllBucketsInAccount() - cachedProxyConfig = { + proxyConfigCache = { enabled: true, mode: BucketOption.ALL_BUCKETS_IN_ACCOUNT, buckets, urlExpiry } - } else if (bucketOption === BucketOption.LIST) { + break + } + + case BucketOption.LIST: { const buckets = bucketList.split(',').map((b) => b.trim()).filter((b) => b) - cachedProxyConfig = { + proxyConfigCache = { enabled: true, mode: BucketOption.LIST, buckets: new Set(buckets), urlExpiry } - } else { + break + } + + default: { const validOptions = Object.values(BucketOption).join(', ') throw new Error( `Invalid ASSET_PROXY_BUCKET_OPTION: ${bucketOption}. Must be one of: ${validOptions}` ) } + } logger.debug('Asset proxy configuration loaded', { - mode: cachedProxyConfig.mode, - enabled: cachedProxyConfig.enabled, - bucketCount: cachedProxyConfig.buckets.size, - urlExpiry: cachedProxyConfig.urlExpiry + mode: proxyConfigCache.mode, + enabled: proxyConfigCache.enabled, + bucketCount: proxyConfigCache.buckets.size, + urlExpiry: proxyConfigCache.urlExpiry }) - return cachedProxyConfig + return proxyConfigCache } /** @@ -121,10 +132,10 @@ export const getProxyConfig = async () => { * @returns {Object} Cached configuration object */ export const getCachedProxyConfig = () => { - if (!cachedProxyConfig) { - throw new Error('Asset proxy config not initialized. Call getProxyConfig() at startup.') + if (!proxyConfigCache) { + throw new Error('Asset proxy config not initialized.') } - return cachedProxyConfig + return proxyConfigCache } /** @@ -140,10 +151,6 @@ export const getCachedProxyConfig = () => { * @returns {Object|null} {bucket, key, region} or null if not a valid S3 URL */ export const parseS3Url = (url) => { - if (!url || typeof url !== 'string') { - return null - } - // S3 URI format: s3://bucket/key if (url.startsWith('s3://')) { const withoutProtocol = url.substring(5) @@ -184,8 +191,9 @@ export const parseS3Url = (url) => { return { bucket, key, region } } - // Path style: s3.region.amazonaws.com/bucket/key or s3.amazonaws.com/bucket/key - const pathStyleMatch = hostname.match(/^s3(?:\.([^.]+))?\.amazonaws\.com$/) + // Path style: s3.region.amazonaws.com/bucket/key, + // s3-region.amazonaws.com/bucket/key, or s3.amazonaws.com/bucket/key + const pathStyleMatch = hostname.match(/^s3(?:[.-]([^.]+))?\.amazonaws\.com$/) if (pathStyleMatch) { const region = pathStyleMatch[1] || null const pathParts = pathname.split('/').filter((p) => p) @@ -334,7 +342,7 @@ export const determineS3Region = (asset, itemOrCollection) => { } export default { - getProxyConfig, + initProxyConfig, getCachedProxyConfig, parseS3Url, shouldProxyAssets, diff --git a/src/lib/database-client.js b/src/lib/database-client.js index 2929b9c9..7ba76769 100644 --- a/src/lib/database-client.js +++ b/src/lib/database-client.js @@ -74,7 +74,7 @@ export async function dbClient() { return _dbClient } -export async function createIndex(index) { +export async function createIndecreateIndexcreateIndexx(index) { const client = await dbClient() const exists = await client.indices.exists({ index }) if (!exists.body) { diff --git a/tests/helpers/api.js b/tests/helpers/api.js index 0845a974..a662e5a3 100644 --- a/tests/helpers/api.js +++ b/tests/helpers/api.js @@ -1,6 +1,6 @@ import got from 'got' // eslint-disable-line import/no-unresolved import { once } from 'events' -import { app } from '../../src/lambdas/api/app.js' +import { createApp } from '../../src/lambdas/api/app.js' /** * @typedef {import('got').Got} Got @@ -30,6 +30,7 @@ const apiClient = (url) => got.extend({ * @returns {Promise} */ export const startApi = async () => { + const app = await createApp() const server = app.listen(0, '127.0.0.1') await once(server, 'listening') diff --git a/tests/system/test-api-asset-proxy.js b/tests/system/test-api-asset-proxy.js new file mode 100644 index 00000000..d5d93f36 --- /dev/null +++ b/tests/system/test-api-asset-proxy.js @@ -0,0 +1,147 @@ +// @ts-nocheck + +/** + * Asset Proxy System Tests + * + * These tests verify the asset proxy endpoints work correctly. + * The env var is set before starting the API to test with proxying enabled. + */ + +// Set env var before starting the API +process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + +/* eslint-disable import/first */ +import test from 'ava' +import { deleteAllIndices } from '../helpers/database.js' +import { ingestItem } from '../helpers/ingest.js' +import { randomId, loadFixture } from '../helpers/utils.js' +import { setup } from '../helpers/system-tests.js' +/* eslint-enable import/first */ + +test.before(async (t) => { + await deleteAllIndices() + const standUpResult = await setup() + + t.context = standUpResult + + t.context.collectionId = randomId('collection') + + const collection = await loadFixture( + 'landsat-8-l1-collection.json', + { id: t.context.collectionId } + ) + + await ingestItem({ + ingestQueueUrl: t.context.ingestQueueUrl, + ingestTopicArn: t.context.ingestTopicArn, + item: collection + }) + + t.context.itemId = randomId('item') + + const item = await loadFixture( + 'stac/LC80100102015082LGN00.json', + { + id: t.context.itemId, + collection: t.context.collectionId + } + ) + + await ingestItem({ + ingestQueueUrl: t.context.ingestQueueUrl, + ingestTopicArn: t.context.ingestTopicArn, + item + }) +}) + +test.after.always(async (t) => { + if (t.context.api) await t.context.api.close() +}) + +test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 302 redirect to presigned URL', async (t) => { + const { collectionId, itemId } = t.context + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/${itemId}/assets/B1`, + { + resolveBodyOnly: false, + throwHttpErrors: false, + followRedirect: false + } + ) + + t.is(response.statusCode, 302) + t.truthy(response.headers.location) + t.true(response.headers.location.includes('landsat-pds')) + t.true(response.headers.location.includes('X-Amz-Algorithm')) + t.true(response.headers.location.includes('X-Amz-Signature')) +}) + +test('GET /collections/:collectionId/assets/:assetKey - 302 redirect for collection assets', async (t) => { + const { collectionId } = t.context + + const collection = await t.context.api.client.get( + `collections/${collectionId}`, + { resolveBodyOnly: false } + ) + + if (!collection.body.assets || Object.keys(collection.body.assets).length === 0) { + t.pass('Collection has no assets to test') + return + } + + const assetKey = Object.keys(collection.body.assets)[0] + + const response = await t.context.api.client.get( + `collections/${collectionId}/assets/${assetKey}`, + { + resolveBodyOnly: false, + throwHttpErrors: false, + followRedirect: false + } + ) + + t.is(response.statusCode, 302) + t.truthy(response.headers.location) + t.true(response.headers.location.includes('X-Amz-Algorithm')) +}) + +test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 404 for non-existent asset', async (t) => { + const { collectionId, itemId } = t.context + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/${itemId}/assets/DOES_NOT_EXIST`, + { + resolveBodyOnly: false, + throwHttpErrors: false + } + ) + + t.is(response.statusCode, 404) +}) + +test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 404 for non-existent item', async (t) => { + const { collectionId } = t.context + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/DOES_NOT_EXIST/assets/B1`, + { + resolveBodyOnly: false, + throwHttpErrors: false + } + ) + + t.is(response.statusCode, 404) +}) + +test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 404 for non-existent collection', async (t) => { + const response = await t.context.api.client.get( + 'collections/DOES_NOT_EXIST/items/DOES_NOT_EXIST/assets/B1', + { + resolveBodyOnly: false, + throwHttpErrors: false + } + ) + + t.is(response.statusCode, 404) +}) diff --git a/tests/system/test-api-item-get.js b/tests/system/test-api-item-get.js index 517787f5..c33bba1b 100644 --- a/tests/system/test-api-item-get.js +++ b/tests/system/test-api-item-get.js @@ -46,6 +46,8 @@ test.beforeEach(async (_) => { delete process.env['ENABLE_COLLECTIONS_AUTHX'] delete process.env['ENABLE_FILTER_AUTHX'] delete process.env['ENABLE_THUMBNAILS'] + delete process.env['ASSET_PROXY_BUCKET_OPTION'] + delete process.env['ASSET_PROXY_BUCKET_LIST'] }) test.after.always(async (t) => { @@ -302,3 +304,25 @@ test('GET /collections/:collectionId/items/:itemId/thumbnail with filter authx r }) } })).statusCode, 302) }) + +test.serial('GET /collections/:collectionId/items/:itemId with asset proxying transforms assets', async (t) => { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const { collectionId, itemId } = t.context + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/${itemId}`, + { resolveBodyOnly: false } + ) + + t.is(response.statusCode, 200) + + const { assets } = response.body + t.truthy(assets.B1) + + const b1Asset = assets.B1 + t.true(b1Asset.href.includes(`/collections/${collectionId}/items/${itemId}/assets/B1`)) + t.truthy(b1Asset.alternate) + t.truthy(b1Asset.alternate.s3) + t.true(b1Asset.alternate.s3.href.includes('landsat-pds')) +}) diff --git a/tests/system/test-api-search-post.js b/tests/system/test-api-search-post.js index e0f2048d..0ced65c0 100644 --- a/tests/system/test-api-search-post.js +++ b/tests/system/test-api-search-post.js @@ -45,6 +45,8 @@ test.before(async (t) => { test.beforeEach(async (_) => { delete process.env['ENABLE_COLLECTIONS_AUTHX'] delete process.env['ENABLE_FILTER_AUTHX'] + delete process.env['ASSET_PROXY_BUCKET_OPTION'] + delete process.env['ASSET_PROXY_BUCKET_LIST'] }) test.after.always(async (t) => { @@ -1615,3 +1617,28 @@ test('/search - context extension - context added when enabled', async (t) => { t.is(response.context.returned, 3) t.is(response.context.limit, 10) }) + +test.serial('POST /search with asset proxying transforms assets', async (t) => { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const response = await t.context.api.client.post('search', { + json: { + collections: ['landsat-8-l1'], + limit: 1 + } + }) + + t.is(response.features.length, 1) + + const item = response.features[0] + t.truthy(item.assets) + + const assetKeys = Object.keys(item.assets) + t.true(assetKeys.length > 0) + + const sampleAsset = item.assets[assetKeys[0]] + t.true(sampleAsset.href.includes(`/collections/${item.collection}/items/${item.id}/assets/${assetKeys[0]}`)) + t.truthy(sampleAsset.alternate) + t.truthy(sampleAsset.alternate.s3) + t.true(sampleAsset.alternate.s3.href.includes('s3') || sampleAsset.alternate.s3.href.includes('amazonaws')) +}) diff --git a/tests/system/test-ingest.js b/tests/system/test-ingest.js index c5aff86a..a4fedd41 100644 --- a/tests/system/test-ingest.js +++ b/tests/system/test-ingest.js @@ -37,6 +37,8 @@ test.beforeEach(async (t) => { await purgeQueue(ingestQueueUrl) delete process.env['ENABLE_INGEST_ACTION_TRUNCATE'] + delete process.env['ASSET_PROXY_BUCKET_OPTION'] + delete process.env['ASSET_PROXY_BUCKET_LIST'] }) test.afterEach.always(() => { @@ -517,6 +519,39 @@ test('Ingested item is published to post-ingest SNS topic with updated links', a } }) +test.serial('Ingested item is published to post-ingest SNS topic with transformed assets', async (t) => { + const envBeforeTest = { ...process.env } + try { + const hostname = 'some-stac-server.com' + const endpoint = `https://${hostname}` + process.env['STAC_API_URL'] = endpoint + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const collection = await ingestCollectionAndPurgePostIngestQueue(t) + + const item = await loadFixture( + 'stac/LC80100102015082LGN00.json', + { id: randomId('item'), collection: collection.id } + ) + + const { message } = await testPostIngestSNS(t, item) + + t.truthy(message.record.assets) + + const assetKeys = Object.keys(message.record.assets) + t.true(assetKeys.length > 0) + + const b1Asset = message.record.assets.B1 + t.truthy(b1Asset) + t.true(b1Asset.href.includes(`/collections/${collection.id}/items/${item.id}/assets/B1`)) + t.truthy(b1Asset.alternate) + t.truthy(b1Asset.alternate.s3) + t.true(b1Asset.alternate.s3.href.includes('landsat-pds')) + } finally { + process.env = envBeforeTest + } +}) + test('Ingested item failure is published to post-ingest SNS topic without updated links', async (t) => { const envBeforeTest = { ...process.env } try { diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js new file mode 100644 index 00000000..6e17f4e7 --- /dev/null +++ b/tests/unit/test-asset-proxy.js @@ -0,0 +1,315 @@ +import test from 'ava' +import { + parseS3Url, + proxyAssets, + shouldProxyAssets, + determineS3Region, + BucketOption +} from '../../src/lib/asset-proxy.js' + +test('parseS3Url - s3:// URI format', (t) => { + const result = parseS3Url('s3://my-bucket/path/to/file.tif') + t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: null }) +}) + +test('parseS3Url - virtual-hosted style with region', (t) => { + const result = parseS3Url('https://my-bucket.s3.us-west-2.amazonaws.com/path/to/file.tif') + t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: 'us-west-2' }) +}) + +test('parseS3Url - virtual-hosted style without region', (t) => { + const result = parseS3Url('https://my-bucket.s3.amazonaws.com/path/to/file.tif') + t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: null }) +}) + +test('parseS3Url - path style with region (dot format)', (t) => { + const result = parseS3Url('https://s3.us-east-1.amazonaws.com/my-bucket/path/to/file.tif') + t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: 'us-east-1' }) +}) + +test('parseS3Url - path style with region (hyphen format - legacy)', (t) => { + const result = parseS3Url('https://s3-us-west-2.amazonaws.com/landsat-pds/L8/file.tif') + t.deepEqual(result, { bucket: 'landsat-pds', key: 'L8/file.tif', region: 'us-west-2' }) +}) + +test('parseS3Url - path style without region', (t) => { + const result = parseS3Url('https://s3.amazonaws.com/my-bucket/path/to/file.tif') + t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: null }) +}) + +test('parseS3Url - invalid URLs', (t) => { + t.is(parseS3Url('https://example.com/file.tif'), null) + t.is(parseS3Url('s3://bucket'), null) + t.is(parseS3Url(''), null) +}) + +test('shouldProxyAssets - ALL mode', (t) => { + const config = { + enabled: true, + mode: BucketOption.ALL, + buckets: new Set(), + urlExpiry: 300 + } + t.true(shouldProxyAssets('any-bucket', config)) +}) + +test('shouldProxyAssets - NONE mode', (t) => { + const config = { + enabled: false, + mode: BucketOption.NONE, + buckets: new Set(), + urlExpiry: 300 + } + t.false(shouldProxyAssets('any-bucket', config)) +}) + +test('shouldProxyAssets - LIST mode with matching bucket', (t) => { + const config = { + enabled: true, + mode: BucketOption.LIST, + buckets: new Set(['bucket1', 'bucket2']), + urlExpiry: 300 + } + t.true(shouldProxyAssets('bucket1', config)) + t.false(shouldProxyAssets('bucket3', config)) +}) + +test('shouldProxyAssets - ALL_BUCKETS_IN_ACCOUNT mode', (t) => { + const config = { + enabled: true, + mode: BucketOption.ALL_BUCKETS_IN_ACCOUNT, + buckets: new Set(['account-bucket-1', 'account-bucket-2']), + urlExpiry: 300 + } + t.true(shouldProxyAssets('account-bucket-1', config)) + t.false(shouldProxyAssets('other-bucket', config)) +}) + +test('proxyAssets - transforms assets with ALL mode', (t) => { + const config = { + enabled: true, + mode: BucketOption.ALL, + buckets: new Set(), + urlExpiry: 300 + } + + const assets = { + thumbnail: { + href: 's3://my-bucket/thumb.jpg', + type: 'image/jpeg' + }, + data: { + href: 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif', + type: 'image/tiff' + } + } + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1', + config + ) + + t.true(wasProxied) + t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/items/item1/assets/thumbnail') + t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/thumb.jpg') + t.is(proxied.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') + t.is(proxied.data.alternate.s3.href, 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif') +}) + +test('proxyAssets - no transformation with NONE mode', (t) => { + const config = { + enabled: false, + mode: BucketOption.NONE, + buckets: new Set(), + urlExpiry: 300 + } + + const assets = { + thumbnail: { + href: 's3://my-bucket/thumb.jpg', + type: 'image/jpeg' + } + } + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1', + config + ) + + t.false(wasProxied) + t.is(proxied.thumbnail.href, 's3://my-bucket/thumb.jpg') + t.is(proxied.thumbnail.alternate, undefined) +}) + +test('proxyAssets - LIST mode only transforms matching buckets', (t) => { + const config = { + enabled: true, + mode: BucketOption.LIST, + buckets: new Set(['proxied-bucket']), + urlExpiry: 300 + } + + const assets = { + proxied: { + href: 's3://proxied-bucket/file.tif', + type: 'image/tiff' + }, + notProxied: { + href: 's3://other-bucket/file.tif', + type: 'image/tiff' + } + } + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1', + config + ) + + t.true(wasProxied) + t.is(proxied.proxied.href, 'https://api.example.com/collections/collection1/items/item1/assets/proxied') + t.is(proxied.notProxied.href, 's3://other-bucket/file.tif') + t.is(proxied.notProxied.alternate, undefined) +}) + +test('proxyAssets - collection assets (no itemId)', (t) => { + const config = { + enabled: true, + mode: BucketOption.ALL, + buckets: new Set(), + urlExpiry: 300 + } + + const assets = { + thumbnail: { + href: 's3://my-bucket/collection-thumb.jpg', + type: 'image/jpeg' + } + } + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + null, + config + ) + + t.true(wasProxied) + t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/assets/thumbnail') + t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/collection-thumb.jpg') +}) + +test('proxyAssets - preserves existing alternate links', (t) => { + const config = { + enabled: true, + mode: BucketOption.ALL, + buckets: new Set(), + urlExpiry: 300 + } + + const assets = { + data: { + href: 's3://my-bucket/data.tif', + type: 'image/tiff', + alternate: { + http: { href: 'https://example.com/data.tif' } + } + } + } + + const { assets: proxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1', + config + ) + + t.is(proxied.data.alternate.http.href, 'https://example.com/data.tif') + t.is(proxied.data.alternate.s3.href, 's3://my-bucket/data.tif') +}) + +test('proxyAssets - handles non-S3 assets', (t) => { + const config = { + enabled: true, + mode: BucketOption.ALL, + buckets: new Set(), + urlExpiry: 300 + } + + const assets = { + metadata: { + href: 'https://example.com/metadata.xml', + type: 'application/xml' + } + } + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1', + config + ) + + t.false(wasProxied) + t.is(proxied.metadata.href, 'https://example.com/metadata.xml') + t.is(proxied.metadata.alternate, undefined) +}) + +test('determineS3Region - v1 asset-level storage extension', (t) => { + const asset = { 'storage:region': 'us-east-1' } + const item = {} + t.is(determineS3Region(asset, item), 'us-east-1') +}) + +test('determineS3Region - v1 item-level storage extension', (t) => { + const asset = {} + const item = { properties: { 'storage:region': 'eu-west-1' } } + t.is(determineS3Region(asset, item), 'eu-west-1') +}) + +test('determineS3Region - v2 storage extension', (t) => { + const asset = { 'storage:refs': 'scheme1' } + const item = { + 'storage:schemes': { + scheme1: { region: 'ap-southeast-2' } + } + } + t.is(determineS3Region(asset, item), 'ap-southeast-2') +}) + +test('determineS3Region - default fallback', (t) => { + const originalRegion = process.env['AWS_REGION'] + delete process.env['AWS_REGION'] + + const asset = {} + const item = {} + t.is(determineS3Region(asset, item), 'us-west-2') + + if (originalRegion) process.env['AWS_REGION'] = originalRegion +}) + +test('determineS3Region - environment variable fallback', (t) => { + const originalRegion = process.env['AWS_REGION'] + process.env['AWS_REGION'] = 'us-west-1' + + const asset = {} + const item = {} + t.is(determineS3Region(asset, item), 'us-west-1') + + if (originalRegion) { + process.env['AWS_REGION'] = originalRegion + } else { + delete process.env['AWS_REGION'] + } +}) From c969b9cdb47da8a6d2df2676f49b487adef2c7a0 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Tue, 14 Oct 2025 17:05:43 -0500 Subject: [PATCH 03/22] refactor: second iteration of initial implementation --- src/lambdas/api/app.js | 136 +++++++-------- src/lib/api.js | 6 +- src/lib/asset-proxy.js | 296 +++++++++++++++------------------ src/lib/database-client.js | 2 +- src/lib/errors.js | 7 + src/lib/ingest.js | 5 +- tests/unit/test-asset-proxy.js | 257 ++++++++++++++++------------ 7 files changed, 362 insertions(+), 347 deletions(-) diff --git a/src/lambdas/api/app.js b/src/lambdas/api/app.js index 40a3ca3a..f63378da 100644 --- a/src/lambdas/api/app.js +++ b/src/lambdas/api/app.js @@ -7,18 +7,11 @@ import path from 'path' import { fileURLToPath } from 'url' import database from '../../lib/database.js' import api from '../../lib/api.js' -import { NotFoundError, ValidationError } from '../../lib/errors.js' +import { NotFoundError, ValidationError, ForbiddenError } from '../../lib/errors.js' import { readFile } from '../../lib/fs.js' import addEndpoint from './middleware/add-endpoint.js' import logger from '../../lib/logger.js' -import { - getCachedProxyConfig, - parseS3Url, - shouldProxyAssets, - generatePresignedUrl, - determineS3Region, - initProxyConfig, -} from '../../lib/asset-proxy.js' +import { getAssetProxyBuckets, getAssetPresignedUrl } from '../../lib/asset-proxy.js' /** * @typedef {import('express').Request} Request @@ -28,7 +21,7 @@ import { */ export const createApp = async () => { - await initProxyConfig() + await getAssetProxyBuckets() const txnEnabled = process.env['ENABLE_TRANSACTIONS_EXTENSION'] === 'true' @@ -463,79 +456,72 @@ export const createApp = async () => { } }) - /** - * Redirects a request for a proxied asset to a presigned S3 URL - * @param {Request} req - Express request - * @param {Response} res - Express response - * @param {NextFunction} next - Express next function - * @returns {Promise} Resolves when done - */ - const redirectProxiedAssetRequest = async (req, res, next) => { - logger.debug('Asset proxy request', { params: req.params }) - try { - const proxyConfig = getCachedProxyConfig() - if (!proxyConfig.enabled) { - return next(createError(403)) - } - - const { collectionId, itemId, assetKey } = req.params - const itemOrCollection = itemId // itemId is only defined for item assets - ? await api.getItem(database, collectionId, itemId, req.endpoint, req.query, req.headers) - : await api.getCollection(database, collectionId, req.endpoint, req.query, req.headers) - if (itemOrCollection instanceof NotFoundError) { - return next(createError(404)) - } - if (itemOrCollection instanceof Error) { - return next(createError(500)) - } - - // @ts-ignore - assetKey guaranteed by Express route - const asset = itemOrCollection.assets?.[assetKey] || null - if (!asset) { - return next(createError(404)) - } - - const alternateHref = asset.alternate?.s3?.href || null - if (!alternateHref) { - return next(createError(404)) - } + app.get('/collections/:collectionId/items/:itemId/assets/:assetKey', + async (req, res, next) => { + try { + const item = await api.getItem( + database, + req.params.collectionId, + req.params.itemId, + req.endpoint, + req.query, + req.headers, + ) - const s3Info = parseS3Url(alternateHref) - if (!s3Info) { - return next(createError(500, 'Asset S3 href is invalid')) + if (item instanceof NotFoundError) { + next(createError(404)) + } else if (item instanceof Error) { + next(createError(500)) + } else { + const presignedUrl = await getAssetPresignedUrl(item, req.params.assetKey) + if (presignedUrl instanceof ValidationError) { + next(createError(400)) + } else if (presignedUrl instanceof ForbiddenError) { + next(createError(403)) + } else if (presignedUrl instanceof NotFoundError) { + next(createError(404)) + } else if (presignedUrl instanceof Error) { + next(createError(500)) + } else { + res.redirect(presignedUrl) + } + } + } catch (error) { + next(error) } + }) - if (!shouldProxyAssets(s3Info.bucket, proxyConfig)) { - return next(createError(403)) - } + app.get('/collections/:collectionId/assets/:assetKey', async (req, res, next) => { + try { + const collection = await api.getCollection( + database, + req.params.collectionId, + req.endpoint, + req.query, + req.headers, + ) - let region = null - if (s3Info.region) { - region = s3Info.region + if (collection instanceof NotFoundError) { + next(createError(404)) + } else if (collection instanceof Error) { + next(createError(500)) } else { - region = determineS3Region(asset, itemOrCollection) + const presignedUrl = await getAssetPresignedUrl(collection, req.params.assetKey) + if (presignedUrl instanceof ValidationError) { + next(createError(400)) + } else if (presignedUrl instanceof ForbiddenError) { + next(createError(403)) + } else if (presignedUrl instanceof NotFoundError) { + next(createError(404)) + } else if (presignedUrl instanceof Error) { + next(createError(500)) + } else { + res.redirect(presignedUrl) + } } - - const presignedUrl = await generatePresignedUrl( - s3Info.bucket, - s3Info.key, - region, - proxyConfig.urlExpiry - ) - - return res.redirect(presignedUrl) } catch (error) { - return next(error) + next(error) } - } - - app.get('/collections/:collectionId/items/:itemId/assets/:assetKey', - async (req, res, next) => { - await redirectProxiedAssetRequest(req, res, next) - }) - - app.get('/collections/:collectionId/assets/:assetKey', async (req, res, next) => { - await redirectProxiedAssetRequest(req, res, next) }) // catch 404 and forward to error handler diff --git a/src/lib/api.js b/src/lib/api.js index 9c059286..dfdcb353 100644 --- a/src/lib/api.js +++ b/src/lib/api.js @@ -7,7 +7,7 @@ import { isIndexNotFoundError } from './database.js' import logger from './logger.js' import { bboxToPolygon } from './geo-utils.js' import { - getCachedProxyConfig, + isAssetProxyEnabled, proxyAssets, } from './asset-proxy.js' @@ -563,8 +563,7 @@ export const addItemLinks = function (results, endpoint) { // Impure - mutates results export const proxyStacObjectAssets = function (results, endpoint) { - const proxyConfig = getCachedProxyConfig() - if (!proxyConfig.enabled) { + if (!isAssetProxyEnabled()) { return results } @@ -581,7 +580,6 @@ export const proxyStacObjectAssets = function (results, endpoint) { endpoint, collectionId, itemId, - proxyConfig ) if (wasProxied) { diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index e1ba81ef..7a505ff0 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -1,6 +1,18 @@ -import { S3Client, GetObjectCommand, ListBucketsCommand } from '@aws-sdk/client-s3' +import { GetObjectCommand, ListBucketsCommand } from '@aws-sdk/client-s3' import { getSignedUrl } from '@aws-sdk/s3-request-presigner' +import { s3 } from './aws-clients.js' import logger from './logger.js' +import { NotFoundError, ValidationError, ForbiddenError } from './errors.js' + +const VIRTUAL_HOST_PATTERN = /^([^.]+)\.s3(?:\.([^.]+))?\.amazonaws\.com$/ +const PATH_STYLE_PATTERN = /^s3(?:[.-]([^.]+))?\.amazonaws\.com$/ + +const s3ClientCache = new Map() +let assetProxyBucketsCache = null + +const getBucketOption = () => process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' +const getBucketList = () => process.env['ASSET_PROXY_BUCKET_LIST'] +const getUrlExpiry = () => parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) export const BucketOption = Object.freeze({ NONE: 'NONE', @@ -9,144 +21,65 @@ export const BucketOption = Object.freeze({ LIST: 'LIST' }) -// Cached configuration -let proxyConfigCache = null - -// Cached S3 clients by region to avoid creating new clients on each request -const s3ClientCache = new Map() - /** - * Get or create an S3Client for a specific region + * Get or create an S3 client for a specific region * @param {string} region - AWS region - * @returns {S3Client} Cached or new S3 client + * @returns {Object} Cached or new S3 client */ const getS3Client = (region) => { if (s3ClientCache.has(region)) { return s3ClientCache.get(region) } - const client = new S3Client({ region }) + const client = s3({ region }) s3ClientCache.set(region, client) return client } /** - * Fetch all bucket names in the AWS account - * This is called once during configuration initialization if mode is ALL_BUCKETS_IN_ACCOUNT - * @returns {Promise>} Set of bucket names + * Cache bucket names for asset proxying based on configuration. + * @returns {Promise} */ -const fetchAllBucketsInAccount = async () => { - try { - const region = process.env['AWS_REGION'] || 'us-west-2' - const client = getS3Client(region) - const command = new ListBucketsCommand({}) - const response = await client.send(command) - - const bucketNames = response.Buckets?.map((b) => b.Name) - ?.filter((name) => typeof name === 'string') || [] - const buckets = new Set(bucketNames) - logger.info(`Fetched ${buckets.size} buckets from AWS account for asset proxy`) - return buckets - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error) - logger.error('Failed to fetch buckets from AWS account', { error: errorMessage }) - throw new Error(`Failed to fetch buckets for asset proxy: ${errorMessage}`) - } -} - -/** - * Initialize asset proxy configuration. - * The config is cached after first initialization. Subsequent calls return the cached value. - * @returns {Promise} Configuration object - */ -export const initProxyConfig = async () => { - if (proxyConfigCache) { - return proxyConfigCache - } - - const bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || BucketOption.NONE - const bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] || '' - const urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) +export const getAssetProxyBuckets = async () => { + const bucketOption = getBucketOption() + const bucketList = getBucketList() switch (bucketOption) { - case BucketOption.NONE: - proxyConfigCache = { - enabled: false, - mode: BucketOption.NONE, - buckets: new Set(), - urlExpiry - } - break - - case BucketOption.ALL: - proxyConfigCache = { - enabled: true, - mode: BucketOption.ALL, - buckets: new Set(), - urlExpiry + case BucketOption.LIST: + if (bucketList) { + const bucketNames = bucketList.split(',').map((b) => b.trim()).filter((b) => b) + assetProxyBucketsCache = new Set(bucketNames) + logger.info( + `Parsed ${assetProxyBucketsCache.size} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` + ) + } else { + throw new Error('ASSET_PROXY_BUCKET_LIST must be set when ASSET_PROXY_BUCKET_OPTION is LIST') } break - case BucketOption.ALL_BUCKETS_IN_ACCOUNT: { - const buckets = await fetchAllBucketsInAccount() - proxyConfigCache = { - enabled: true, - mode: BucketOption.ALL_BUCKETS_IN_ACCOUNT, - buckets, - urlExpiry + case BucketOption.ALL_BUCKETS_IN_ACCOUNT: + try { + const region = process.env['AWS_REGION'] || 'us-west-2' + const client = getS3Client(region) + const command = new ListBucketsCommand({}) + const response = await client.send(command) + const bucketNames = response.Buckets?.map((b) => b.Name) + ?.filter((name) => typeof name === 'string') || [] + assetProxyBucketsCache = new Set(bucketNames) + logger.info(`Fetched ${assetProxyBucketsCache.size} buckets from AWS account for asset proxy`) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + throw new Error(`Failed to fetch buckets for asset proxy: ${message}`) } break - } - case BucketOption.LIST: { - const buckets = bucketList.split(',').map((b) => b.trim()).filter((b) => b) - proxyConfigCache = { - enabled: true, - mode: BucketOption.LIST, - buckets: new Set(buckets), - urlExpiry - } + default: break } - - default: { - const validOptions = Object.values(BucketOption).join(', ') - throw new Error( - `Invalid ASSET_PROXY_BUCKET_OPTION: ${bucketOption}. Must be one of: ${validOptions}` - ) - } - } - - logger.debug('Asset proxy configuration loaded', { - mode: proxyConfigCache.mode, - enabled: proxyConfigCache.enabled, - bucketCount: proxyConfigCache.buckets.size, - urlExpiry: proxyConfigCache.urlExpiry - }) - - return proxyConfigCache -} - -/** - * Get the cached proxy configuration synchronously - * @returns {Object} Cached configuration object - */ -export const getCachedProxyConfig = () => { - if (!proxyConfigCache) { - throw new Error('Asset proxy config not initialized.') - } - return proxyConfigCache } /** * Parse S3 URL (URI or HTTPS) into components - * Supports: - * - s3://bucket/key - * - https://bucket.s3.amazonaws.com/key - * - https://bucket.s3.region.amazonaws.com/key - * - https://s3.amazonaws.com/bucket/key - * - https://s3.region.amazonaws.com/bucket/key - * * @param {string} url - S3 URL to parse * @returns {Object|null} {bucket, key, region} or null if not a valid S3 URL */ @@ -178,7 +111,7 @@ export const parseS3Url = (url) => { const pathname = urlObj.pathname // Virtual-hosted style: bucket.s3.region.amazonaws.com or bucket.s3.amazonaws.com - const virtualHostMatch = hostname.match(/^([^.]+)\.s3(?:\.([^.]+))?\.amazonaws\.com$/) + const virtualHostMatch = hostname.match(VIRTUAL_HOST_PATTERN) if (virtualHostMatch) { const bucket = virtualHostMatch[1] const region = virtualHostMatch[2] || null @@ -193,7 +126,7 @@ export const parseS3Url = (url) => { // Path style: s3.region.amazonaws.com/bucket/key, // s3-region.amazonaws.com/bucket/key, or s3.amazonaws.com/bucket/key - const pathStyleMatch = hostname.match(/^s3(?:[.-]([^.]+))?\.amazonaws\.com$/) + const pathStyleMatch = hostname.match(PATH_STYLE_PATTERN) if (pathStyleMatch) { const region = pathStyleMatch[1] || null const pathParts = pathname.split('/').filter((p) => p) @@ -217,55 +150,26 @@ export const parseS3Url = (url) => { } /** - * Determine if a asset hrefs should be proxied - * @param {string} bucket - asset S3 bucket - * @param {Object} proxyConfig - Proxy configuration - * @returns {boolean} True if should be proxied + * Determine if asset proxying is enabled + * @returns {boolean} True if enabled */ -export const shouldProxyAssets = (bucket, proxyConfig) => { - if (!proxyConfig.enabled) { +export const isAssetProxyEnabled = () => { + if (getBucketOption() === BucketOption.NONE) { return false } - - if (proxyConfig.mode === BucketOption.ALL) { - return true - } - - // For LIST and ALL_BUCKETS_IN_ACCOUNT modes - return proxyConfig.buckets.has(bucket) + return true } /** - * Generate a pre-signed URL for S3 object access - * Uses cached S3 clients per region for better performance. - * - * @param {string} bucket - S3 bucket name - * @param {string} key - S3 object key - * @param {string} region - AWS region - * @param {number} expirySeconds - URL expiry time in seconds - * @returns {Promise} Pre-signed URL + * Determine if a bucket's assets should be proxied + * @param {string} bucket - S3 bucket + * @returns {boolean} True if assets should be proxied */ -export const generatePresignedUrl = async (bucket, key, region, expirySeconds) => { - const client = getS3Client(region) - - const command = new GetObjectCommand({ - Bucket: bucket, - Key: key, - RequestPayer: 'requester' - }) - - const presignedUrl = await getSignedUrl(client, command, { - expiresIn: expirySeconds - }) - - logger.debug('Generated pre-signed URL for asset', { - bucket, - key, - region, - expirySeconds, - }) - - return presignedUrl +export const shouldProxyAssets = (bucket) => { + if (getBucketOption() === BucketOption.ALL || assetProxyBucketsCache?.has(bucket)) { + return true + } + return false } /** @@ -274,22 +178,21 @@ export const generatePresignedUrl = async (bucket, key, region, expirySeconds) = * @param {string} endpoint - API endpoint base URL * @param {string} collectionId - Collection ID * @param {string|null} itemId - Item ID (null for collection assets) - * @param {Object} proxyConfig - Proxy configuration * @returns {Object} {assets: Proxied assets object, wasProxied: boolean} */ -export const proxyAssets = (assets, endpoint, collectionId, itemId, proxyConfig) => { +export const proxyAssets = (assets, endpoint, collectionId, itemId) => { const ProxiedAssets = {} let wasProxied = false for (const [assetKey, asset] of Object.entries(assets)) { - if (!asset || !asset.href) { + if (!asset?.href) { ProxiedAssets[assetKey] = asset // eslint-disable-next-line no-continue continue } const s3Info = parseS3Url(asset.href) - if (!s3Info || !shouldProxyAssets(s3Info.bucket, proxyConfig)) { + if (!s3Info || !(shouldProxyAssets(s3Info.bucket))) { ProxiedAssets[assetKey] = asset // eslint-disable-next-line no-continue continue @@ -317,7 +220,7 @@ export const proxyAssets = (assets, endpoint, collectionId, itemId, proxyConfig) } /** - * Determine S3 region STAC Storage Extension, if it exists + * Determine S3 region from STAC Storage Extension * @param {Object} asset - Asset object * @param {Object} itemOrCollection - Item or Collection object * @returns {string} AWS region @@ -341,12 +244,79 @@ export const determineS3Region = (asset, itemOrCollection) => { return process.env['AWS_REGION'] || 'us-west-2' } +/** + * Create a pre-signed URL for S3 object access + * @param {string} bucket - S3 bucket name + * @param {string} key - S3 object key + * @param {string} region - AWS region + * @returns {Promise} Pre-signed URL + */ +export const createPresignedS3Url = async (bucket, key, region) => { + const client = getS3Client(region) + const urlExpiry = getUrlExpiry() + + const command = new GetObjectCommand({ + Bucket: bucket, + Key: key, + RequestPayer: 'requester' + }) + + const presignedUrl = await getSignedUrl(client, command, { + expiresIn: urlExpiry + }) + + logger.debug('Generated pre-signed URL for asset', { + bucket, + key, + region, + urlExpiry, + }) + + return presignedUrl +} + +/** + * Generate a presigned URL for an asset + * @param {Object} itemOrCollection - STAC Item or Collection + * @param {string} assetKey - Asset key to generate presigned URL for + * @returns {Promise} Pre-signed URL or Error + */ +export const getAssetPresignedUrl = async (itemOrCollection, assetKey) => { + if (!isAssetProxyEnabled()) { + return new ForbiddenError() + } + + const asset = itemOrCollection.assets?.[assetKey] || null + if (!asset) { + return new NotFoundError() + } + + const alternateS3Href = asset.alternate?.s3?.href || null + if (!alternateS3Href) { + return new NotFoundError() + } + + const s3Info = parseS3Url(alternateS3Href) + if (!s3Info) { + return new ValidationError('Asset S3 href is invalid') + } + + if (!shouldProxyAssets(s3Info.bucket)) { + return new ForbiddenError() + } + + const region = s3Info.region || determineS3Region(asset, itemOrCollection) + const presignedUrl = await createPresignedS3Url(s3Info.bucket, s3Info.key, region) + + return presignedUrl +} + export default { - initProxyConfig, - getCachedProxyConfig, + getAssetProxyBuckets, parseS3Url, + isAssetProxyEnabled, shouldProxyAssets, - generatePresignedUrl, + createPresignedS3Url, proxyAssets, determineS3Region, } diff --git a/src/lib/database-client.js b/src/lib/database-client.js index 7ba76769..2929b9c9 100644 --- a/src/lib/database-client.js +++ b/src/lib/database-client.js @@ -74,7 +74,7 @@ export async function dbClient() { return _dbClient } -export async function createIndecreateIndexcreateIndexx(index) { +export async function createIndex(index) { const client = await dbClient() const exists = await client.indices.exists({ index }) if (!exists.body) { diff --git a/src/lib/errors.js b/src/lib/errors.js index 5a7f0d65..b0309c28 100644 --- a/src/lib/errors.js +++ b/src/lib/errors.js @@ -13,3 +13,10 @@ export class NotFoundError extends Error { this.name = this.constructor.name } } + +export class ForbiddenError extends Error { + constructor(message) { + super(message) + this.name = this.constructor.name + } +} diff --git a/src/lib/ingest.js b/src/lib/ingest.js index 487af4af..dfc09dae 100644 --- a/src/lib/ingest.js +++ b/src/lib/ingest.js @@ -4,6 +4,7 @@ import { dbClient, createIndex } from './database-client.js' import logger from './logger.js' import { publishRecordToSns } from './sns.js' import { isCollection, isItem, isAction, isStacEntity } from './stac-utils.js' +import { isAssetProxyEnabled } from './asset-proxy.js' const COLLECTIONS_INDEX = process.env['COLLECTIONS_INDEX'] || 'collections' @@ -179,7 +180,9 @@ function updateLinksAndHrefsWithinRecord(record) { } else if (isCollection(record)) { addCollectionLinks([record], endpoint) } - proxyStacObjectAssets([record], endpoint) + if (isAssetProxyEnabled()) { + proxyStacObjectAssets([record], endpoint) + } return record } diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js index 6e17f4e7..240f9ec2 100644 --- a/tests/unit/test-asset-proxy.js +++ b/tests/unit/test-asset-proxy.js @@ -4,6 +4,7 @@ import { proxyAssets, shouldProxyAssets, determineS3Region, + isAssetProxyEnabled, BucketOption } from '../../src/lib/asset-proxy.js' @@ -40,58 +41,84 @@ test('parseS3Url - path style without region', (t) => { test('parseS3Url - invalid URLs', (t) => { t.is(parseS3Url('https://example.com/file.tif'), null) t.is(parseS3Url('s3://bucket'), null) + t.is(parseS3Url('s3://bucket-only-no-key'), null) t.is(parseS3Url(''), null) }) -test('shouldProxyAssets - ALL mode', (t) => { - const config = { - enabled: true, - mode: BucketOption.ALL, - buckets: new Set(), - urlExpiry: 300 +test('parseS3Url - handles nested paths', (t) => { + const result = parseS3Url('s3://my-bucket/deeply/nested/path/to/file.tif') + t.deepEqual(result, { bucket: 'my-bucket', key: 'deeply/nested/path/to/file.tif', region: null }) +}) + +test('isAssetProxyEnabled - NONE mode', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' + + t.false(isAssetProxyEnabled()) + + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } - t.true(shouldProxyAssets('any-bucket', config)) }) -test('shouldProxyAssets - NONE mode', (t) => { - const config = { - enabled: false, - mode: BucketOption.NONE, - buckets: new Set(), - urlExpiry: 300 +test('isAssetProxyEnabled - ALL mode', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + t.true(isAssetProxyEnabled()) + + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } - t.false(shouldProxyAssets('any-bucket', config)) }) -test('shouldProxyAssets - LIST mode with matching bucket', (t) => { - const config = { - enabled: true, - mode: BucketOption.LIST, - buckets: new Set(['bucket1', 'bucket2']), - urlExpiry: 300 +test('isAssetProxyEnabled - LIST mode', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' + + t.true(isAssetProxyEnabled()) + + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } - t.true(shouldProxyAssets('bucket1', config)) - t.false(shouldProxyAssets('bucket3', config)) }) -test('shouldProxyAssets - ALL_BUCKETS_IN_ACCOUNT mode', (t) => { - const config = { - enabled: true, - mode: BucketOption.ALL_BUCKETS_IN_ACCOUNT, - buckets: new Set(['account-bucket-1', 'account-bucket-2']), - urlExpiry: 300 +test('shouldProxyAssets - NONE mode returns false', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' + + t.false(shouldProxyAssets('any-bucket')) + + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } - t.true(shouldProxyAssets('account-bucket-1', config)) - t.false(shouldProxyAssets('other-bucket', config)) }) -test('proxyAssets - transforms assets with ALL mode', (t) => { - const config = { - enabled: true, - mode: BucketOption.ALL, - buckets: new Set(), - urlExpiry: 300 +test('shouldProxyAssets - ALL mode returns true for any bucket', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + t.true(shouldProxyAssets('any-bucket')) + t.true(shouldProxyAssets('another-bucket')) + + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } +}) + +test('proxyAssets - ALL mode transforms item assets', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' const assets = { thumbnail: { @@ -108,8 +135,7 @@ test('proxyAssets - transforms assets with ALL mode', (t) => { assets, 'https://api.example.com', 'collection1', - 'item1', - config + 'item1' ) t.true(wasProxied) @@ -117,15 +143,17 @@ test('proxyAssets - transforms assets with ALL mode', (t) => { t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/thumb.jpg') t.is(proxied.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') t.is(proxied.data.alternate.s3.href, 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif') -}) -test('proxyAssets - no transformation with NONE mode', (t) => { - const config = { - enabled: false, - mode: BucketOption.NONE, - buckets: new Set(), - urlExpiry: 300 + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } +}) + +test('proxyAssets - NONE mode does not transform assets', (t) => { + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' const assets = { thumbnail: { @@ -138,55 +166,23 @@ test('proxyAssets - no transformation with NONE mode', (t) => { assets, 'https://api.example.com', 'collection1', - 'item1', - config + 'item1' ) t.false(wasProxied) t.is(proxied.thumbnail.href, 's3://my-bucket/thumb.jpg') t.is(proxied.thumbnail.alternate, undefined) -}) - -test('proxyAssets - LIST mode only transforms matching buckets', (t) => { - const config = { - enabled: true, - mode: BucketOption.LIST, - buckets: new Set(['proxied-bucket']), - urlExpiry: 300 - } - const assets = { - proxied: { - href: 's3://proxied-bucket/file.tif', - type: 'image/tiff' - }, - notProxied: { - href: 's3://other-bucket/file.tif', - type: 'image/tiff' - } + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } - - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1', - config - ) - - t.true(wasProxied) - t.is(proxied.proxied.href, 'https://api.example.com/collections/collection1/items/item1/assets/proxied') - t.is(proxied.notProxied.href, 's3://other-bucket/file.tif') - t.is(proxied.notProxied.alternate, undefined) }) test('proxyAssets - collection assets (no itemId)', (t) => { - const config = { - enabled: true, - mode: BucketOption.ALL, - buckets: new Set(), - urlExpiry: 300 - } + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' const assets = { thumbnail: { @@ -199,22 +195,23 @@ test('proxyAssets - collection assets (no itemId)', (t) => { assets, 'https://api.example.com', 'collection1', - null, - config + null ) t.true(wasProxied) t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/assets/thumbnail') t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/collection-thumb.jpg') + + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] + } }) test('proxyAssets - preserves existing alternate links', (t) => { - const config = { - enabled: true, - mode: BucketOption.ALL, - buckets: new Set(), - urlExpiry: 300 - } + const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' const assets = { data: { @@ -230,22 +227,20 @@ test('proxyAssets - preserves existing alternate links', (t) => { assets, 'https://api.example.com', 'collection1', - 'item1', - config + 'item1' ) t.is(proxied.data.alternate.http.href, 'https://example.com/data.tif') t.is(proxied.data.alternate.s3.href, 's3://my-bucket/data.tif') -}) -test('proxyAssets - handles non-S3 assets', (t) => { - const config = { - enabled: true, - mode: BucketOption.ALL, - buckets: new Set(), - urlExpiry: 300 + if (originalOption !== undefined) { + process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption + } else { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] } +}) +test('proxyAssets - handles non-S3 assets', (t) => { const assets = { metadata: { href: 'https://example.com/metadata.xml', @@ -257,8 +252,7 @@ test('proxyAssets - handles non-S3 assets', (t) => { assets, 'https://api.example.com', 'collection1', - 'item1', - config + 'item1' ) t.false(wasProxied) @@ -266,6 +260,38 @@ test('proxyAssets - handles non-S3 assets', (t) => { t.is(proxied.metadata.alternate, undefined) }) +test('proxyAssets - handles assets without href', (t) => { + const assets = { + metadata: { + type: 'application/xml' + } + } + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.false(wasProxied) + t.deepEqual(proxied.metadata, { type: 'application/xml' }) +}) + +test('proxyAssets - handles empty assets object', (t) => { + const assets = {} + + const { assets: proxied, wasProxied } = proxyAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.false(wasProxied) + t.deepEqual(proxied, {}) +}) + test('determineS3Region - v1 asset-level storage extension', (t) => { const asset = { 'storage:region': 'us-east-1' } const item = {} @@ -288,6 +314,24 @@ test('determineS3Region - v2 storage extension', (t) => { t.is(determineS3Region(asset, item), 'ap-southeast-2') }) +test('determineS3Region - v2 storage extension in properties', (t) => { + const asset = { 'storage:refs': 'scheme1' } + const item = { + properties: { + 'storage:schemes': { + scheme1: { region: 'ap-southeast-2' } + } + } + } + t.is(determineS3Region(asset, item), 'ap-southeast-2') +}) + +test('determineS3Region - asset-level takes precedence over item-level', (t) => { + const asset = { 'storage:region': 'us-east-1' } + const item = { properties: { 'storage:region': 'eu-west-1' } } + t.is(determineS3Region(asset, item), 'us-east-1') +}) + test('determineS3Region - default fallback', (t) => { const originalRegion = process.env['AWS_REGION'] delete process.env['AWS_REGION'] @@ -313,3 +357,10 @@ test('determineS3Region - environment variable fallback', (t) => { delete process.env['AWS_REGION'] } }) + +test('BucketOption - exports expected constants', (t) => { + t.is(BucketOption.NONE, 'NONE') + t.is(BucketOption.ALL, 'ALL') + t.is(BucketOption.ALL_BUCKETS_IN_ACCOUNT, 'ALL_BUCKETS_IN_ACCOUNT') + t.is(BucketOption.LIST, 'LIST') +}) From 4237b5287466110a94353e44c8877b6825f98cf4 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Thu, 16 Oct 2025 16:45:02 -0500 Subject: [PATCH 04/22] refactor: use app.local state, sundry fixups, working tests --- src/lambdas/api/app.js | 48 +- src/lambdas/ingest/index.js | 19 +- src/lib/api.js | 49 -- src/lib/asset-proxy.js | 416 ++++++----- src/lib/ingest.js | 13 +- .../fixtures/stac/collection-with-asset.json | 115 +++ tests/helpers/api.js | 2 + tests/helpers/asset-proxy.js | 15 + tests/system/test-api-asset-proxy-disabled.js | 169 +++++ tests/system/test-api-asset-proxy.js | 115 ++- tests/system/test-api-item-get.js | 24 - tests/system/test-api-search-post.js | 27 - tests/system/test-ingest.js | 32 +- tests/unit/test-asset-proxy.js | 690 +++++++++++------- 14 files changed, 1081 insertions(+), 653 deletions(-) create mode 100644 tests/fixtures/stac/collection-with-asset.json create mode 100644 tests/helpers/asset-proxy.js create mode 100644 tests/system/test-api-asset-proxy-disabled.js diff --git a/src/lambdas/api/app.js b/src/lambdas/api/app.js index f63378da..99980793 100644 --- a/src/lambdas/api/app.js +++ b/src/lambdas/api/app.js @@ -11,7 +11,7 @@ import { NotFoundError, ValidationError, ForbiddenError } from '../../lib/errors import { readFile } from '../../lib/fs.js' import addEndpoint from './middleware/add-endpoint.js' import logger from '../../lib/logger.js' -import { getAssetProxyBuckets, getAssetPresignedUrl } from '../../lib/asset-proxy.js' +import { AssetProxy } from '../../lib/asset-proxy.js' /** * @typedef {import('express').Request} Request @@ -21,12 +21,13 @@ import { getAssetProxyBuckets, getAssetPresignedUrl } from '../../lib/asset-prox */ export const createApp = async () => { - await getAssetProxyBuckets() - const txnEnabled = process.env['ENABLE_TRANSACTIONS_EXTENSION'] === 'true' const app = express() + app.locals['assetProxy'] = new AssetProxy() + await app.locals['assetProxy'].initialize() + if (process.env['REQUEST_LOGGING_ENABLED'] !== 'false') { app.use( [ @@ -116,8 +117,12 @@ export const createApp = async () => { app.get('/search', async (req, res, next) => { try { + const result = await api.searchItems( + database, 'GET', null, req.endpoint, req.query, req.headers + ) + req.app.locals['assetProxy'].addProxiedAssets(result.features, req.endpoint) res.type('application/geo+json') - res.json(await api.searchItems(database, 'GET', null, req.endpoint, req.query, req.headers)) + res.json(result) } catch (error) { if (error instanceof ValidationError) { next(createError(400, error.message)) @@ -129,8 +134,12 @@ export const createApp = async () => { app.post('/search', async (req, res, next) => { try { + const result = await api.searchItems( + database, 'POST', null, req.endpoint, req.body, req.headers + ) + req.app.locals['assetProxy'].addProxiedAssets(result.features, req.endpoint) res.type('application/geo+json') - res.json(await api.searchItems(database, 'POST', null, req.endpoint, req.body, req.headers)) + res.json(result) } catch (error) { if (error instanceof ValidationError) { next(createError(400, error.message)) @@ -164,7 +173,10 @@ export const createApp = async () => { try { const response = await api.getCollections(database, req.endpoint, req.query, req.headers) if (response instanceof Error) next(createError(500, response.message)) - else res.json(response) + else { + req.app.locals['assetProxy'].addProxiedAssets(response.collections, req.endpoint) + res.json(response) + } } catch (error) { next(error) } @@ -198,7 +210,10 @@ export const createApp = async () => { database, collectionId, req.endpoint, req.query, req.headers ) if (response instanceof Error) next(createError(404)) - else res.json(response) + else { + req.app.locals['assetProxy'].addProxiedAssets([response], req.endpoint) + res.json(response) + } } catch (error) { next(error) } @@ -276,10 +291,12 @@ export const createApp = async () => { next(createError(404)) } - res.type('application/geo+json') - res.json( - await api.searchItems(database, 'GET', collectionId, req.endpoint, req.query, req.headers) + const result = await api.searchItems( + database, 'GET', collectionId, req.endpoint, req.query, req.headers ) + req.app.locals['assetProxy'].addProxiedAssets(result.features, req.endpoint) + res.type('application/geo+json') + res.json(result) } catch (error) { if (error instanceof ValidationError) { next(createError(400, error.message)) @@ -341,6 +358,7 @@ export const createApp = async () => { } else if (response instanceof Error) { next(createError(500)) } else { + req.app.locals['assetProxy'].addProxiedAssets([response], req.endpoint) res.type('application/geo+json') res.json(response) } @@ -473,7 +491,10 @@ export const createApp = async () => { } else if (item instanceof Error) { next(createError(500)) } else { - const presignedUrl = await getAssetPresignedUrl(item, req.params.assetKey) + const presignedUrl = await req.app.locals['assetProxy'].getAssetPresignedUrl( + item, + req.params.assetKey + ) if (presignedUrl instanceof ValidationError) { next(createError(400)) } else if (presignedUrl instanceof ForbiddenError) { @@ -506,7 +527,10 @@ export const createApp = async () => { } else if (collection instanceof Error) { next(createError(500)) } else { - const presignedUrl = await getAssetPresignedUrl(collection, req.params.assetKey) + const presignedUrl = await req.app.locals['assetProxy'].getAssetPresignedUrl( + collection, + req.params.assetKey + ) if (presignedUrl instanceof ValidationError) { next(createError(400)) } else if (presignedUrl instanceof ForbiddenError) { diff --git a/src/lambdas/ingest/index.js b/src/lambdas/ingest/index.js index dc697e26..27fe5a27 100644 --- a/src/lambdas/ingest/index.js +++ b/src/lambdas/ingest/index.js @@ -4,6 +4,22 @@ import { createIndex } from '../../lib/database-client.js' import { processMessages, publishResultsToSns } from '../../lib/ingest.js' import getObjectJson from '../../lib/s3-utils.js' import logger from '../../lib/logger.js' +import { AssetProxy } from '../../lib/asset-proxy.js' + +let assetProxyInstance = null + +const getAssetProxy = async () => { + if (!assetProxyInstance) { + assetProxyInstance = new AssetProxy() + await assetProxyInstance.initialize() + } + + return assetProxyInstance +} + +export const resetAssetProxy = () => { + assetProxyInstance = null +} const isSqsEvent = (event) => 'Records' in event @@ -76,7 +92,8 @@ export const handler = async (event, _context) => { if (postIngestTopicArn) { logger.debug('Publishing to post-ingest topic: %s', postIngestTopicArn) - await publishResultsToSns(results, postIngestTopicArn) + const assetProxy = await getAssetProxy() + await publishResultsToSns(results, postIngestTopicArn, assetProxy) } else { logger.debug('Skipping post-ingest notification since no topic is configured') } diff --git a/src/lib/api.js b/src/lib/api.js index dfdcb353..f28dcf6e 100644 --- a/src/lib/api.js +++ b/src/lib/api.js @@ -6,10 +6,6 @@ import { NotFoundError, ValidationError } from './errors.js' import { isIndexNotFoundError } from './database.js' import logger from './logger.js' import { bboxToPolygon } from './geo-utils.js' -import { - isAssetProxyEnabled, - proxyAssets, -} from './asset-proxy.js' // max number of collections to retrieve const COLLECTION_LIMIT = process.env['STAC_SERVER_COLLECTION_LIMIT'] || 100 @@ -55,8 +51,6 @@ const ALL_AGGREGATION_NAMES = DEFAULT_AGGREGATIONS.map((x) => x.name).concat( ] ) -const ALTERNATE_ASSETS_EXTENSION = 'https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json' - export const extractIntersects = function (params) { let intersectsGeometry const { intersects } = params @@ -561,43 +555,6 @@ export const addItemLinks = function (results, endpoint) { return results } -// Impure - mutates results -export const proxyStacObjectAssets = function (results, endpoint) { - if (!isAssetProxyEnabled()) { - return results - } - - results.forEach((result) => { - if (!result.assets || typeof result.assets !== 'object') { - return - } - - const itemId = result.collection ? result.id : null - const collectionId = result.collection ? result.collection : result.id - - const { assets, wasProxied } = proxyAssets( - result.assets, - endpoint, - collectionId, - itemId, - ) - - if (wasProxied) { - result.assets = assets - - if (!result.stac_extensions) { - result.stac_extensions = [] - } - - if (!result.stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) { - result.stac_extensions.push(ALTERNATE_ASSETS_EXTENSION) - } - } - }) - - return results -} - const wrapResponseInFeatureCollection = function (features, links, numberMatched, numberReturned, limit) { const fc = { @@ -802,7 +759,6 @@ const searchItems = async function ( } addItemLinks(responseItems, endpoint) - proxyStacObjectAssets(responseItems, endpoint) return wrapResponseInFeatureCollection(responseItems, links, numberMatched, numberReturned, limit) } @@ -1316,7 +1272,6 @@ const getCollections = async function (backend, endpoint, parameters, headers) { } addCollectionLinks(collections, endpoint) - proxyStacObjectAssets(collections, endpoint) const resp = { collections, @@ -1360,7 +1315,6 @@ const getCollection = async function (backend, collectionId, endpoint, parameter deleteUnusedFields(result) addCollectionLinks([result], endpoint) - proxyStacObjectAssets([result], endpoint) return result } @@ -1389,7 +1343,6 @@ const getItem = async function (backend, collectionId, itemId, endpoint, params, const { results } = await backend.search(itemQuery, 1) addItemLinks(results, endpoint) - proxyStacObjectAssets(results, endpoint) const [it] = results if (it) { @@ -1404,7 +1357,6 @@ const partialUpdateItem = async function (backend, logger.debug('Partial Update Item: %j', response) if (response) { const items = addItemLinks([response.body.get._source], endpoint) - proxyStacObjectAssets(items, endpoint) return items[0] } return new Error(`Error partially updating item ${itemId}`) @@ -1529,5 +1481,4 @@ export default { getCollectionQueryables, getGlobalAggregations, getCollectionAggregations, - proxyStacObjectAssets, } diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index 7a505ff0..d608948c 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -7,12 +7,7 @@ import { NotFoundError, ValidationError, ForbiddenError } from './errors.js' const VIRTUAL_HOST_PATTERN = /^([^.]+)\.s3(?:\.([^.]+))?\.amazonaws\.com$/ const PATH_STYLE_PATTERN = /^s3(?:[.-]([^.]+))?\.amazonaws\.com$/ -const s3ClientCache = new Map() -let assetProxyBucketsCache = null - -const getBucketOption = () => process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' -const getBucketList = () => process.env['ASSET_PROXY_BUCKET_LIST'] -const getUrlExpiry = () => parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) +export const ALTERNATE_ASSETS_EXTENSION = 'https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json' export const BucketOption = Object.freeze({ NONE: 'NONE', @@ -22,68 +17,10 @@ export const BucketOption = Object.freeze({ }) /** - * Get or create an S3 client for a specific region - * @param {string} region - AWS region - * @returns {Object} Cached or new S3 client - */ -const getS3Client = (region) => { - if (s3ClientCache.has(region)) { - return s3ClientCache.get(region) - } - - const client = s3({ region }) - s3ClientCache.set(region, client) - return client -} - -/** - * Cache bucket names for asset proxying based on configuration. - * @returns {Promise} - */ -export const getAssetProxyBuckets = async () => { - const bucketOption = getBucketOption() - const bucketList = getBucketList() - - switch (bucketOption) { - case BucketOption.LIST: - if (bucketList) { - const bucketNames = bucketList.split(',').map((b) => b.trim()).filter((b) => b) - assetProxyBucketsCache = new Set(bucketNames) - logger.info( - `Parsed ${assetProxyBucketsCache.size} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` - ) - } else { - throw new Error('ASSET_PROXY_BUCKET_LIST must be set when ASSET_PROXY_BUCKET_OPTION is LIST') - } - break - - case BucketOption.ALL_BUCKETS_IN_ACCOUNT: - try { - const region = process.env['AWS_REGION'] || 'us-west-2' - const client = getS3Client(region) - const command = new ListBucketsCommand({}) - const response = await client.send(command) - const bucketNames = response.Buckets?.map((b) => b.Name) - ?.filter((name) => typeof name === 'string') || [] - assetProxyBucketsCache = new Set(bucketNames) - logger.info(`Fetched ${assetProxyBucketsCache.size} buckets from AWS account for asset proxy`) - } catch (error) { - const message = error instanceof Error ? error.message : String(error) - throw new Error(`Failed to fetch buckets for asset proxy: ${message}`) - } - break - - default: - break - } -} - -/** - * Parse S3 URL (URI or HTTPS) into components * @param {string} url - S3 URL to parse * @returns {Object|null} {bucket, key, region} or null if not a valid S3 URL */ -export const parseS3Url = (url) => { +const parseS3Url = (url) => { // S3 URI format: s3://bucket/key if (url.startsWith('s3://')) { const withoutProtocol = url.substring(5) @@ -150,82 +87,11 @@ export const parseS3Url = (url) => { } /** - * Determine if asset proxying is enabled - * @returns {boolean} True if enabled - */ -export const isAssetProxyEnabled = () => { - if (getBucketOption() === BucketOption.NONE) { - return false - } - return true -} - -/** - * Determine if a bucket's assets should be proxied - * @param {string} bucket - S3 bucket - * @returns {boolean} True if assets should be proxied - */ -export const shouldProxyAssets = (bucket) => { - if (getBucketOption() === BucketOption.ALL || assetProxyBucketsCache?.has(bucket)) { - return true - } - return false -} - -/** - * Proxy asset hrefs and add original href as alternate - * @param {Object} assets - Assets object - * @param {string} endpoint - API endpoint base URL - * @param {string} collectionId - Collection ID - * @param {string|null} itemId - Item ID (null for collection assets) - * @returns {Object} {assets: Proxied assets object, wasProxied: boolean} - */ -export const proxyAssets = (assets, endpoint, collectionId, itemId) => { - const ProxiedAssets = {} - let wasProxied = false - - for (const [assetKey, asset] of Object.entries(assets)) { - if (!asset?.href) { - ProxiedAssets[assetKey] = asset - // eslint-disable-next-line no-continue - continue - } - - const s3Info = parseS3Url(asset.href) - if (!s3Info || !(shouldProxyAssets(s3Info.bucket))) { - ProxiedAssets[assetKey] = asset - // eslint-disable-next-line no-continue - continue - } - - wasProxied = true - - const proxyHref = itemId - ? `${endpoint}/collections/${collectionId}/items/${itemId}/assets/${assetKey}` - : `${endpoint}/collections/${collectionId}/assets/${assetKey}` - - ProxiedAssets[assetKey] = { - ...asset, - href: proxyHref, - alternate: { - ...(asset.alternate || {}), - s3: { - href: asset.href - } - } - } - } - - return { assets: ProxiedAssets, wasProxied } -} - -/** - * Determine S3 region from STAC Storage Extension * @param {Object} asset - Asset object * @param {Object} itemOrCollection - Item or Collection object * @returns {string} AWS region */ -export const determineS3Region = (asset, itemOrCollection) => { +const determineS3Region = (asset, itemOrCollection) => { // Storage Extension v1 const v1Region = asset['storage:region'] || itemOrCollection.properties?.['storage:region'] if (v1Region) { @@ -240,83 +106,233 @@ export const determineS3Region = (asset, itemOrCollection) => { return v2Region } - // Default to environment or us-west-2 return process.env['AWS_REGION'] || 'us-west-2' } -/** - * Create a pre-signed URL for S3 object access - * @param {string} bucket - S3 bucket name - * @param {string} key - S3 object key - * @param {string} region - AWS region - * @returns {Promise} Pre-signed URL - */ -export const createPresignedS3Url = async (bucket, key, region) => { - const client = getS3Client(region) - const urlExpiry = getUrlExpiry() - - const command = new GetObjectCommand({ - Bucket: bucket, - Key: key, - RequestPayer: 'requester' - }) - - const presignedUrl = await getSignedUrl(client, command, { - expiresIn: urlExpiry - }) - - logger.debug('Generated pre-signed URL for asset', { - bucket, - key, - region, - urlExpiry, - }) - - return presignedUrl -} +export class AssetProxy { + constructor() { + this.bucketsCache = null + this.s3ClientCache = new Map() + this.bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' + this.bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] + this.urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) + } -/** - * Generate a presigned URL for an asset - * @param {Object} itemOrCollection - STAC Item or Collection - * @param {string} assetKey - Asset key to generate presigned URL for - * @returns {Promise} Pre-signed URL or Error - */ -export const getAssetPresignedUrl = async (itemOrCollection, assetKey) => { - if (!isAssetProxyEnabled()) { - return new ForbiddenError() + /** + * @returns {Promise} + */ + async initialize() { + switch (this.bucketOption) { + case BucketOption.LIST: + if (this.bucketList) { + const bucketNames = this.bucketList.split(',').map((b) => b.trim()).filter((b) => b) + this.bucketsCache = new Set(bucketNames) + logger.info( + `Parsed ${this.bucketsCache.size} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` + ) + } else { + throw new Error( + 'ASSET_PROXY_BUCKET_LIST must be set when ASSET_PROXY_BUCKET_OPTION is LIST' + ) + } + break + + case BucketOption.ALL_BUCKETS_IN_ACCOUNT: + try { + const region = process.env['AWS_REGION'] || 'us-west-2' + const client = this.getS3Client(region) + const command = new ListBucketsCommand({}) + const response = await client.send(command) + const bucketNames = response.Buckets?.map((b) => b.Name) + ?.filter((name) => typeof name === 'string') || [] + this.bucketsCache = new Set(bucketNames) + logger.info(`Fetched ${this.bucketsCache.size} buckets from AWS account for asset proxy`) + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + throw new Error(`Failed to fetch buckets for asset proxy: ${message}`) + } + break + + default: + break + } } - const asset = itemOrCollection.assets?.[assetKey] || null - if (!asset) { - return new NotFoundError() + /** + * @param {string} region - AWS region + * @returns {Object} S3 client instance + */ + getS3Client(region) { + if (this.s3ClientCache.has(region)) { + return this.s3ClientCache.get(region) + } + + const client = s3({ region }) + this.s3ClientCache.set(region, client) + return client } - const alternateS3Href = asset.alternate?.s3?.href || null - if (!alternateS3Href) { - return new NotFoundError() + /** + * @returns {boolean} + */ + isEnabled() { + return this.bucketOption !== BucketOption.NONE } - const s3Info = parseS3Url(alternateS3Href) - if (!s3Info) { - return new ValidationError('Asset S3 href is invalid') + /** + * @param {string} bucket - S3 bucket name + * @returns {boolean} True if bucket should be proxied + */ + shouldProxyBucket(bucket) { + if (this.bucketOption === BucketOption.ALL || this.bucketsCache?.has(bucket)) { + return true + } + return false } - if (!shouldProxyAssets(s3Info.bucket)) { - return new ForbiddenError() + /** + * @param {Object} assets - Assets object + * @param {string} endpoint - API endpoint base URL + * @param {string} collectionId - Collection ID + * @param {string|null} itemId - Item ID (null for collection assets) + * @returns {Object} Object with proxied assets and wasProxied flag + */ + getProxiedAssets(assets, endpoint, collectionId, itemId) { + const proxiedAssets = {} + let wasProxied = false + + for (const [assetKey, asset] of Object.entries(assets)) { + if (!asset?.href) { + proxiedAssets[assetKey] = asset + // eslint-disable-next-line no-continue + continue + } + + const s3Info = parseS3Url(asset.href) + if (!s3Info || !(this.shouldProxyBucket(s3Info.bucket))) { + proxiedAssets[assetKey] = asset + // eslint-disable-next-line no-continue + continue + } + + wasProxied = true + + const proxyHref = itemId + ? `${endpoint}/collections/${collectionId}/items/${itemId}/assets/${assetKey}` + : `${endpoint}/collections/${collectionId}/assets/${assetKey}` + + proxiedAssets[assetKey] = { + ...asset, + href: proxyHref, + alternate: { + ...(asset.alternate || {}), + s3: { + href: asset.href + } + } + } + } + + return { assets: proxiedAssets, wasProxied } } - const region = s3Info.region || determineS3Region(asset, itemOrCollection) - const presignedUrl = await createPresignedS3Url(s3Info.bucket, s3Info.key, region) + /** + * @param {Array} results - Array of STAC items or collections + * @param {string} endpoint - API endpoint base URL + * @returns {Array} Mutated results array with proxied assets + */ + addProxiedAssets(results, endpoint) { + if (!this.isEnabled()) { + return results + } - return presignedUrl -} + results.forEach((result) => { + if (!result.assets || typeof result.assets !== 'object') { + return + } -export default { - getAssetProxyBuckets, - parseS3Url, - isAssetProxyEnabled, - shouldProxyAssets, - createPresignedS3Url, - proxyAssets, - determineS3Region, + const itemId = result.collection ? result.id : null + const collectionId = result.collection ? result.collection : result.id + + const { assets, wasProxied } = this.getProxiedAssets( + result.assets, + endpoint, + collectionId, + itemId + ) + + if (wasProxied) { + result.assets = assets + + if (!result.stac_extensions) { + result.stac_extensions = [] + } + + if (!result.stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) { + result.stac_extensions.push(ALTERNATE_ASSETS_EXTENSION) + } + } + }) + + return results + } + + /** + * @param {string} bucket - S3 bucket name + * @param {string} key - S3 object key + * @param {string} region - AWS region + * @returns {Promise} Pre-signed URL + */ + async createPresignedUrl(bucket, key, region) { + const client = this.getS3Client(region) + + const command = new GetObjectCommand({ + Bucket: bucket, + Key: key, + RequestPayer: 'requester' + }) + + const presignedUrl = await getSignedUrl(client, command, { + expiresIn: this.urlExpiry + }) + + logger.debug('Generated pre-signed URL for asset', { + bucket, + key, + region, + urlExpiry: this.urlExpiry, + }) + + return presignedUrl + } + + /** + * @param {Object} itemOrCollection - STAC Item or Collection + * @param {string} assetKey - Asset key to generate presigned URL for + * @returns {Promise} Pre-signed URL or Error + */ + async getAssetPresignedUrl(itemOrCollection, assetKey) { + if (!this.isEnabled()) { + return new ForbiddenError() + } + + const asset = itemOrCollection.assets?.[assetKey] || null + if (!asset || !asset.href) { + return new NotFoundError() + } + + const s3Info = parseS3Url(asset.href) + if (!s3Info) { + return new ValidationError('Asset href is not a valid S3 URL') + } + + if (!this.shouldProxyBucket(s3Info.bucket)) { + return new ForbiddenError() + } + + const region = s3Info.region || determineS3Region(asset, itemOrCollection) + const presignedUrl = await this.createPresignedUrl(s3Info.bucket, s3Info.key, region) + + return presignedUrl + } } diff --git a/src/lib/ingest.js b/src/lib/ingest.js index dfc09dae..6bcb75dc 100644 --- a/src/lib/ingest.js +++ b/src/lib/ingest.js @@ -1,10 +1,9 @@ import { getItemCreated } from './database.js' -import { addItemLinks, addCollectionLinks, proxyStacObjectAssets } from './api.js' +import { addItemLinks, addCollectionLinks } from './api.js' import { dbClient, createIndex } from './database-client.js' import logger from './logger.js' import { publishRecordToSns } from './sns.js' import { isCollection, isItem, isAction, isStacEntity } from './stac-utils.js' -import { isAssetProxyEnabled } from './asset-proxy.js' const COLLECTIONS_INDEX = process.env['COLLECTIONS_INDEX'] || 'collections' @@ -165,7 +164,7 @@ export async function processMessages(msgs) { /* eslint-enable no-await-in-loop */ // Impure - mutates record -function updateLinksAndHrefsWithinRecord(record) { +function updateLinksAndHrefsWithinRecord(record, assetProxy) { const endpoint = process.env['STAC_API_URL'] if (!endpoint) { logger.info('STAC_API_URL not set, not updating links within ingested record') @@ -180,17 +179,17 @@ function updateLinksAndHrefsWithinRecord(record) { } else if (isCollection(record)) { addCollectionLinks([record], endpoint) } - if (isAssetProxyEnabled()) { - proxyStacObjectAssets([record], endpoint) + if (assetProxy.isEnabled()) { + assetProxy.addProxiedAssets([record], endpoint) } return record } -export async function publishResultsToSns(results, topicArn) { +export async function publishResultsToSns(results, topicArn, assetProxy) { await Promise.allSettled(results.map(async (result) => { if (isStacEntity(result.record)) { if (result.record && !result.error) { - updateLinksAndHrefsWithinRecord(result.record) + updateLinksAndHrefsWithinRecord(result.record, assetProxy) } await publishRecordToSns(topicArn, result.record, result.error) } diff --git a/tests/fixtures/stac/collection-with-asset.json b/tests/fixtures/stac/collection-with-asset.json new file mode 100644 index 00000000..764964c6 --- /dev/null +++ b/tests/fixtures/stac/collection-with-asset.json @@ -0,0 +1,115 @@ +{ + "id": "landsat-8-l1", + "type": "Collection", + "stac_version": "1.1.0", + "description": "Landat-8 L1 Collection-1 imagery radiometrically calibrated and orthorectified using gound points and Digital Elevation Model (DEM) data to correct relief displacement.", + "links": [ + { + "href": "collection.json", + "rel": "self", + "type": "application/json" + }, + { + "href": "catalog.json", + "rel": "root", + "type": "application/json" + }, + { + "href": "catalog.json", + "rel": "parent", + "type": "application/json" + }, + { + "href": "LC80100102015050LGN00.json", + "rel": "item", + "type": "application/json" + }, + { + "href": "LC80100102015082LGN00.json", + "rel": "item", + "type": "application/json" + }, + { + "href": "badGeometryItem.json", + "rel": "item", + "type": "application/json" + } + ], + "stac_extensions": [], + "title": "Landsat-8 L1 Collection-1", + "keywords": [ + "landsat", + "earth observation", + "usgs" + ], + "providers": [ + { + "name": "USGS", + "roles": [ + "producer" + ], + "url": "https://landsat.usgs.gov/" + }, + { + "name": "Planet Labs", + "roles": [ + "processor" + ], + "url": "https://github.com/landsat-pds/landsat_ingestor" + }, + { + "name": "AWS", + "roles": [ + "host" + ], + "url": "https://landsatonaws.com/" + }, + { + "name": "Element 84", + "roles": [ + "processor" + ], + "url": "https://element84.com/" + } + ], + "summaries": { + "gsd": [ + 30 + ], + "platform": [ + "landsat-8" + ], + "instruments": [ + "oli", + "tirs" + ] + }, + "extent": { + "spatial": { + "bbox": [ + [ + -180, + -90, + 180, + 90 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2013-06-01T00:00:00Z", + null + ] + ] + } + }, + "license": "PDDL-1.0", + "assets": { + "thumbnail": { + "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", + "title": "Thumbnail image", + "type": "image/jpeg" + } + } +} diff --git a/tests/helpers/api.js b/tests/helpers/api.js index a662e5a3..e42a9e6a 100644 --- a/tests/helpers/api.js +++ b/tests/helpers/api.js @@ -21,6 +21,7 @@ const apiClient = (url) => got.extend({ /** * @typedef {Object} ApiInstance + * @property {import('express').Application} app * @property {Got} client * @property {() => Promise} close * @property {string} url @@ -50,6 +51,7 @@ export const startApi = async () => { } return Object.freeze({ + app, client, close, url diff --git a/tests/helpers/asset-proxy.js b/tests/helpers/asset-proxy.js new file mode 100644 index 00000000..51e3d2d2 --- /dev/null +++ b/tests/helpers/asset-proxy.js @@ -0,0 +1,15 @@ +import { AssetProxy } from '../../src/lib/asset-proxy.js' + +const setupAssetProxy = async (assetProxyBucketOption) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = assetProxyBucketOption + const assetProxy = new AssetProxy() + await assetProxy.initialize() + return assetProxy + } finally { + process.env = before + } +} + +export default setupAssetProxy diff --git a/tests/system/test-api-asset-proxy-disabled.js b/tests/system/test-api-asset-proxy-disabled.js new file mode 100644 index 00000000..4c2de005 --- /dev/null +++ b/tests/system/test-api-asset-proxy-disabled.js @@ -0,0 +1,169 @@ +// @ts-nocheck + +import test from 'ava' +import { ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' +import { deleteAllIndices } from '../helpers/database.js' +import { ingestItem } from '../helpers/ingest.js' +import { randomId, loadFixture } from '../helpers/utils.js' +import { setup } from '../helpers/system-tests.js' +import setupAssetProxy from '../helpers/asset-proxy.js' + +const COLLECTION_FIXTURE = 'landsat-8-l1-collection.json' +const ITEM_FIXTURE = 'stac/LC80100102015082LGN00.json' +const COLLECTION_WITH_ASSET_FIXTURE = 'stac/collection-with-asset.json' + +test.before(async (t) => { + await deleteAllIndices() + const standUpResult = await setup() + + standUpResult.api.app.locals['assetProxy'] = await setupAssetProxy('NONE') + + t.context = standUpResult + + t.context.collectionId = randomId('collection') + const collection = await loadFixture(COLLECTION_FIXTURE, { id: t.context.collectionId }) + await ingestItem({ + ingestQueueUrl: t.context.ingestQueueUrl, + ingestTopicArn: t.context.ingestTopicArn, + item: collection + }) + + t.context.itemId = randomId('item') + const item = await loadFixture(ITEM_FIXTURE, { + id: t.context.itemId, + collection: t.context.collectionId + }) + await ingestItem({ + ingestQueueUrl: t.context.ingestQueueUrl, + ingestTopicArn: t.context.ingestTopicArn, + item + }) + + t.context.collectionWithAssetId = randomId('collection-with-asset') + const collectionWithAsset = await loadFixture( + COLLECTION_WITH_ASSET_FIXTURE, + { id: t.context.collectionWithAssetId } + ) + await ingestItem({ + ingestQueueUrl: t.context.ingestQueueUrl, + ingestTopicArn: t.context.ingestTopicArn, + item: collectionWithAsset + }) +}) + +test.after.always(async (t) => { + if (t.context.api) await t.context.api.close() +}) + +test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - returns 403 when proxy disabled', async (t) => { + const { collectionId, itemId } = t.context + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/${itemId}/assets/B1`, + { + resolveBodyOnly: false, + throwHttpErrors: false, + followRedirect: false + } + ) + + t.is(response.statusCode, 403) +}) + +test('GET /collections/:collectionId/assets/:assetKey - returns 403 when proxy disabled', async (t) => { + const { collectionWithAssetId } = t.context + + const response = await t.context.api.client.get( + `collections/${collectionWithAssetId}/assets/thumbnail`, + { + resolveBodyOnly: false, + throwHttpErrors: false, + followRedirect: false + } + ) + + t.is(response.statusCode, 403) +}) + +test('GET /collections/:collectionId/items/:itemId - item asset hrefs unchanged when proxy disabled', async (t) => { + const { collectionId, itemId } = t.context + + const item = await loadFixture(ITEM_FIXTURE) + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/${itemId}`, + { resolveBodyOnly: false } + ) + + t.is(response.statusCode, 200) + t.is(response.body.assets.B1.href, item.assets.B1.href) + t.falsy(response.body.assets.B1.alternate) + t.false(response.body.stac_extensions?.includes(ALTERNATE_ASSETS_EXTENSION)) +}) + +test('GET /collections/:collectionId - collection asset hrefs unchanged when proxy disabled', async (t) => { + const { collectionWithAssetId } = t.context + + const collection = await loadFixture(COLLECTION_WITH_ASSET_FIXTURE) + + const response = await t.context.api.client.get( + `collections/${collectionWithAssetId}`, + { resolveBodyOnly: false } + ) + + t.is(response.statusCode, 200) + t.is(response.body.assets.thumbnail.href, collection.assets.thumbnail.href) + t.falsy(response.body.assets.thumbnail.alternate) + t.false(response.body.stac_extensions?.includes(ALTERNATE_ASSETS_EXTENSION)) +}) + +test('POST /search - item asset hrefs unchanged when proxy disabled', async (t) => { + const item = await loadFixture(ITEM_FIXTURE) + + const response = await t.context.api.client.post('search', { + json: { limit: 1 } + }) + + t.is(response.type, 'FeatureCollection') + t.true(response.features.length > 0) + t.is(response.features[0].assets.B1.href, item.assets.B1.href) + t.falsy(response.features[0].assets.B1.alternate) + t.false(response.features[0].stac_extensions?.includes(ALTERNATE_ASSETS_EXTENSION)) +}) + +test('GET /collections - collection asset hrefs unchanged when proxy disabled', async (t) => { + const { collectionWithAssetId } = t.context + + const collection = await loadFixture(COLLECTION_WITH_ASSET_FIXTURE) + + const response = await t.context.api.client.get('collections', { + resolveBodyOnly: false + }) + + t.is(response.statusCode, 200) + + const collectionWithAssets = response.body.collections.find( + (c) => c.id === collectionWithAssetId + ) + + t.truthy(collectionWithAssets) + t.is(collectionWithAssets.assets.thumbnail.href, collection.assets.thumbnail.href) + t.falsy(collectionWithAssets.assets.thumbnail.alternate) +}) + +test('GET /collections/:collectionId/items - item asset hrefs unchanged when proxy disabled', async (t) => { + const { collectionId } = t.context + + const item = await loadFixture(ITEM_FIXTURE) + + const response = await t.context.api.client.get( + `collections/${collectionId}/items`, + { resolveBodyOnly: false } + ) + + t.is(response.statusCode, 200) + t.is(response.body.type, 'FeatureCollection') + t.true(response.body.features.length > 0) + t.is(response.body.features[0].assets.B1.href, item.assets.B1.href) + t.falsy(response.body.features[0].assets.B1.alternate) +}) diff --git a/tests/system/test-api-asset-proxy.js b/tests/system/test-api-asset-proxy.js index d5d93f36..9846507d 100644 --- a/tests/system/test-api-asset-proxy.js +++ b/tests/system/test-api-asset-proxy.js @@ -1,36 +1,32 @@ // @ts-nocheck -/** - * Asset Proxy System Tests - * - * These tests verify the asset proxy endpoints work correctly. - * The env var is set before starting the API to test with proxying enabled. - */ - -// Set env var before starting the API -process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - -/* eslint-disable import/first */ import test from 'ava' +import { CreateBucketCommand } from '@aws-sdk/client-s3' +import { ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' import { deleteAllIndices } from '../helpers/database.js' import { ingestItem } from '../helpers/ingest.js' import { randomId, loadFixture } from '../helpers/utils.js' import { setup } from '../helpers/system-tests.js' -/* eslint-enable import/first */ +import setupAssetProxy from '../helpers/asset-proxy.js' +import { s3 } from '../../src/lib/aws-clients.js' + +const COLLECTION_FIXTURE = 'landsat-8-l1-collection.json' +const ITEM_FIXTURE = 'stac/LC80100102015082LGN00.json' +const COLLECTION_WITH_ASSET_FIXTURE = 'stac/collection-with-asset.json' test.before(async (t) => { await deleteAllIndices() const standUpResult = await setup() - t.context = standUpResult + const s3Client = s3() + await s3Client.send(new CreateBucketCommand({ Bucket: 'landsat-pds' })) - t.context.collectionId = randomId('collection') + standUpResult.api.app.locals['assetProxy'] = await setupAssetProxy('ALL_BUCKETS_IN_ACCOUNT') - const collection = await loadFixture( - 'landsat-8-l1-collection.json', - { id: t.context.collectionId } - ) + t.context = standUpResult + t.context.collectionId = randomId('collection') + const collection = await loadFixture(COLLECTION_FIXTURE, { id: t.context.collectionId }) await ingestItem({ ingestQueueUrl: t.context.ingestQueueUrl, ingestTopicArn: t.context.ingestTopicArn, @@ -38,19 +34,25 @@ test.before(async (t) => { }) t.context.itemId = randomId('item') + const item = await loadFixture(ITEM_FIXTURE, { + id: t.context.itemId, + collection: t.context.collectionId + }) + await ingestItem({ + ingestQueueUrl: t.context.ingestQueueUrl, + ingestTopicArn: t.context.ingestTopicArn, + item + }) - const item = await loadFixture( - 'stac/LC80100102015082LGN00.json', - { - id: t.context.itemId, - collection: t.context.collectionId - } + t.context.collectionWithAssetId = randomId('collection-with-asset') + const collectionWithAsset = await loadFixture( + COLLECTION_WITH_ASSET_FIXTURE, + { id: t.context.collectionWithAssetId } ) - await ingestItem({ ingestQueueUrl: t.context.ingestQueueUrl, ingestTopicArn: t.context.ingestTopicArn, - item + item: collectionWithAsset }) }) @@ -58,6 +60,15 @@ test.after.always(async (t) => { if (t.context.api) await t.context.api.close() }) +test('AssetProxy initialized with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', (t) => { + const assetProxy = t.context.api.app.locals['assetProxy'] + + t.truthy(assetProxy.bucketsCache) + t.true(assetProxy.isEnabled()) + t.true(assetProxy.shouldProxyBucket('landsat-pds')) + t.true(!assetProxy.shouldProxyBucket('some-other-bucket')) +}) + test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 302 redirect to presigned URL', async (t) => { const { collectionId, itemId } = t.context @@ -78,22 +89,10 @@ test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 302 redire }) test('GET /collections/:collectionId/assets/:assetKey - 302 redirect for collection assets', async (t) => { - const { collectionId } = t.context - - const collection = await t.context.api.client.get( - `collections/${collectionId}`, - { resolveBodyOnly: false } - ) - - if (!collection.body.assets || Object.keys(collection.body.assets).length === 0) { - t.pass('Collection has no assets to test') - return - } - - const assetKey = Object.keys(collection.body.assets)[0] + const { collectionWithAssetId } = t.context const response = await t.context.api.client.get( - `collections/${collectionId}/assets/${assetKey}`, + `collections/${collectionWithAssetId}/assets/thumbnail`, { resolveBodyOnly: false, throwHttpErrors: false, @@ -103,6 +102,8 @@ test('GET /collections/:collectionId/assets/:assetKey - 302 redirect for collect t.is(response.statusCode, 302) t.truthy(response.headers.location) + t.true(response.headers.location.includes('landsat-pds')) + t.true(response.headers.location.includes('X-Amz-Algorithm')) t.true(response.headers.location.includes('X-Amz-Algorithm')) }) @@ -145,3 +146,37 @@ test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 404 for no t.is(response.statusCode, 404) }) + +test('GET /collections/:collectionId/items/:itemId - item asset hrefs are transformed with proxy enabled', async (t) => { + const { collectionId, itemId } = t.context + + const item = await loadFixture(ITEM_FIXTURE) + + const response = await t.context.api.client.get( + `collections/${collectionId}/items/${itemId}`, + { resolveBodyOnly: false } + ) + + t.is(response.statusCode, 200) + const expectedAssetPath = `/collections/${collectionId}/items/${itemId}/assets/B1` + t.true(response.body.assets.B1.href.includes(expectedAssetPath)) + t.is(response.body.assets.B1.alternate.s3.href, item.assets.B1.href) + t.true(response.body.stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) +}) + +test('GET /collections/:collectionId - collection asset hrefs are transformed with proxy enabled', async (t) => { + const { collectionWithAssetId } = t.context + + const collection = await loadFixture(COLLECTION_WITH_ASSET_FIXTURE) + + const response = await t.context.api.client.get( + `collections/${collectionWithAssetId}`, + { resolveBodyOnly: false } + ) + + t.is(response.statusCode, 200) + const expectedAssetPath = `/collections/${collectionWithAssetId}/assets/thumbnail` + t.true(response.body.assets.thumbnail.href.includes(expectedAssetPath)) + t.is(response.body.assets.thumbnail.alternate.s3.href, collection.assets.thumbnail.href) + t.true(response.body.stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) +}) diff --git a/tests/system/test-api-item-get.js b/tests/system/test-api-item-get.js index c33bba1b..517787f5 100644 --- a/tests/system/test-api-item-get.js +++ b/tests/system/test-api-item-get.js @@ -46,8 +46,6 @@ test.beforeEach(async (_) => { delete process.env['ENABLE_COLLECTIONS_AUTHX'] delete process.env['ENABLE_FILTER_AUTHX'] delete process.env['ENABLE_THUMBNAILS'] - delete process.env['ASSET_PROXY_BUCKET_OPTION'] - delete process.env['ASSET_PROXY_BUCKET_LIST'] }) test.after.always(async (t) => { @@ -304,25 +302,3 @@ test('GET /collections/:collectionId/items/:itemId/thumbnail with filter authx r }) } })).statusCode, 302) }) - -test.serial('GET /collections/:collectionId/items/:itemId with asset proxying transforms assets', async (t) => { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - - const { collectionId, itemId } = t.context - - const response = await t.context.api.client.get( - `collections/${collectionId}/items/${itemId}`, - { resolveBodyOnly: false } - ) - - t.is(response.statusCode, 200) - - const { assets } = response.body - t.truthy(assets.B1) - - const b1Asset = assets.B1 - t.true(b1Asset.href.includes(`/collections/${collectionId}/items/${itemId}/assets/B1`)) - t.truthy(b1Asset.alternate) - t.truthy(b1Asset.alternate.s3) - t.true(b1Asset.alternate.s3.href.includes('landsat-pds')) -}) diff --git a/tests/system/test-api-search-post.js b/tests/system/test-api-search-post.js index 0ced65c0..e0f2048d 100644 --- a/tests/system/test-api-search-post.js +++ b/tests/system/test-api-search-post.js @@ -45,8 +45,6 @@ test.before(async (t) => { test.beforeEach(async (_) => { delete process.env['ENABLE_COLLECTIONS_AUTHX'] delete process.env['ENABLE_FILTER_AUTHX'] - delete process.env['ASSET_PROXY_BUCKET_OPTION'] - delete process.env['ASSET_PROXY_BUCKET_LIST'] }) test.after.always(async (t) => { @@ -1617,28 +1615,3 @@ test('/search - context extension - context added when enabled', async (t) => { t.is(response.context.returned, 3) t.is(response.context.limit, 10) }) - -test.serial('POST /search with asset proxying transforms assets', async (t) => { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - - const response = await t.context.api.client.post('search', { - json: { - collections: ['landsat-8-l1'], - limit: 1 - } - }) - - t.is(response.features.length, 1) - - const item = response.features[0] - t.truthy(item.assets) - - const assetKeys = Object.keys(item.assets) - t.true(assetKeys.length > 0) - - const sampleAsset = item.assets[assetKeys[0]] - t.true(sampleAsset.href.includes(`/collections/${item.collection}/items/${item.id}/assets/${assetKeys[0]}`)) - t.truthy(sampleAsset.alternate) - t.truthy(sampleAsset.alternate.s3) - t.true(sampleAsset.alternate.s3.href.includes('s3') || sampleAsset.alternate.s3.href.includes('amazonaws')) -}) diff --git a/tests/system/test-ingest.js b/tests/system/test-ingest.js index a4fedd41..f665f82d 100644 --- a/tests/system/test-ingest.js +++ b/tests/system/test-ingest.js @@ -5,11 +5,12 @@ import test from 'ava' import nock from 'nock' import { DateTime } from 'luxon' import { getCollectionIds, getItem } from '../helpers/api.js' -import { handler } from '../../src/lambdas/ingest/index.js' +import { handler, resetAssetProxy } from '../../src/lambdas/ingest/index.js' import { loadFixture, randomId } from '../helpers/utils.js' import { refreshIndices, deleteAllIndices } from '../helpers/database.js' import { sqsTriggerLambda, purgeQueue } from '../helpers/sqs.js' import { sns, sqs, s3 as _s3 } from '../../src/lib/aws-clients.js' +import { ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' import { setup } from '../helpers/system-tests.js' import { ingestItemC, ingestFixtureC, testPostIngestSNS } from '../helpers/ingest.js' @@ -37,8 +38,6 @@ test.beforeEach(async (t) => { await purgeQueue(ingestQueueUrl) delete process.env['ENABLE_INGEST_ACTION_TRUNCATE'] - delete process.env['ASSET_PROXY_BUCKET_OPTION'] - delete process.env['ASSET_PROXY_BUCKET_LIST'] }) test.afterEach.always(() => { @@ -519,34 +518,29 @@ test('Ingested item is published to post-ingest SNS topic with updated links', a } }) -test.serial('Ingested item is published to post-ingest SNS topic with transformed assets', async (t) => { +test('Ingested item is published to post-ingest SNS topic with proxied assets', async (t) => { const envBeforeTest = { ...process.env } try { - const hostname = 'some-stac-server.com' - const endpoint = `https://${hostname}` + const endpoint = 'https://some-stac-server.com' process.env['STAC_API_URL'] = endpoint process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + resetAssetProxy() const collection = await ingestCollectionAndPurgePostIngestQueue(t) - const item = await loadFixture( - 'stac/LC80100102015082LGN00.json', + 'stac/ingest-item.json', { id: randomId('item'), collection: collection.id } ) - const { message } = await testPostIngestSNS(t, item) - - t.truthy(message.record.assets) + const firstAssetKey = Object.keys(item.assets)[0] + const originalHref = item.assets[firstAssetKey].href - const assetKeys = Object.keys(message.record.assets) - t.true(assetKeys.length > 0) + const { message } = await testPostIngestSNS(t, item) + const firstAsset = message.record.assets[firstAssetKey] - const b1Asset = message.record.assets.B1 - t.truthy(b1Asset) - t.true(b1Asset.href.includes(`/collections/${collection.id}/items/${item.id}/assets/B1`)) - t.truthy(b1Asset.alternate) - t.truthy(b1Asset.alternate.s3) - t.true(b1Asset.alternate.s3.href.includes('landsat-pds')) + t.true(firstAsset.href.includes(endpoint)) + t.is(firstAsset.alternate.s3.href, originalHref) + t.true(message.record.stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) } finally { process.env = envBeforeTest } diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js index 240f9ec2..d933985e 100644 --- a/tests/unit/test-asset-proxy.js +++ b/tests/unit/test-asset-proxy.js @@ -1,366 +1,508 @@ +// @ts-nocheck + import test from 'ava' -import { - parseS3Url, - proxyAssets, - shouldProxyAssets, - determineS3Region, - isAssetProxyEnabled, - BucketOption -} from '../../src/lib/asset-proxy.js' - -test('parseS3Url - s3:// URI format', (t) => { - const result = parseS3Url('s3://my-bucket/path/to/file.tif') - t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: null }) -}) +import { AssetProxy, BucketOption, ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' -test('parseS3Url - virtual-hosted style with region', (t) => { - const result = parseS3Url('https://my-bucket.s3.us-west-2.amazonaws.com/path/to/file.tif') - t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: 'us-west-2' }) +test('BucketOption - exports expected constants', (t) => { + t.is(BucketOption.NONE, 'NONE') + t.is(BucketOption.ALL, 'ALL') + t.is(BucketOption.ALL_BUCKETS_IN_ACCOUNT, 'ALL_BUCKETS_IN_ACCOUNT') + t.is(BucketOption.LIST, 'LIST') }) -test('parseS3Url - virtual-hosted style without region', (t) => { - const result = parseS3Url('https://my-bucket.s3.amazonaws.com/path/to/file.tif') - t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: null }) -}) +test('AssetProxy - constructor initializes with expected defaults', (t) => { + const before = { ...process.env } + try { + delete process.env['ASSET_PROXY_BUCKET_OPTION'] -test('parseS3Url - path style with region (dot format)', (t) => { - const result = parseS3Url('https://s3.us-east-1.amazonaws.com/my-bucket/path/to/file.tif') - t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: 'us-east-1' }) + const proxy = new AssetProxy() + t.is(proxy.bucketOption, 'NONE') + t.is(proxy.urlExpiry, 300) + } finally { + process.env = before + } }) -test('parseS3Url - path style with region (hyphen format - legacy)', (t) => { - const result = parseS3Url('https://s3-us-west-2.amazonaws.com/landsat-pds/L8/file.tif') - t.deepEqual(result, { bucket: 'landsat-pds', key: 'L8/file.tif', region: 'us-west-2' }) +test('AssetProxy - constructor reads env vars correctly', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + process.env['ASSET_PROXY_URL_EXPIRY'] = '600' + process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1,bucket2' + + const proxy = new AssetProxy() + t.is(proxy.bucketOption, 'ALL') + t.is(proxy.urlExpiry, 600) + t.is(proxy.bucketList, 'bucket1,bucket2') + } finally { + process.env = before + } }) -test('parseS3Url - path style without region', (t) => { - const result = parseS3Url('https://s3.amazonaws.com/my-bucket/path/to/file.tif') - t.deepEqual(result, { bucket: 'my-bucket', key: 'path/to/file.tif', region: null }) +test('AssetProxy - initialize() with LIST mode parses bucket list', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' + process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1, bucket2 , bucket3' + + const proxy = new AssetProxy() + await proxy.initialize() + + t.truthy(proxy.bucketsCache) + t.true(proxy.bucketsCache.has('bucket1')) + t.true(proxy.bucketsCache.has('bucket2')) + t.true(proxy.bucketsCache.has('bucket3')) + t.is(proxy.bucketsCache.size, 3) + } finally { + process.env = before + } }) -test('parseS3Url - invalid URLs', (t) => { - t.is(parseS3Url('https://example.com/file.tif'), null) - t.is(parseS3Url('s3://bucket'), null) - t.is(parseS3Url('s3://bucket-only-no-key'), null) - t.is(parseS3Url(''), null) +test('AssetProxy - initialize() with LIST mode throws if no bucket list', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' + delete process.env['ASSET_PROXY_BUCKET_LIST'] + + const proxy = new AssetProxy() + await t.throwsAsync( + async () => proxy.initialize(), + { message: /ASSET_PROXY_BUCKET_LIST must be set/ } + ) + } finally { + process.env = before + } }) -test('parseS3Url - handles nested paths', (t) => { - const result = parseS3Url('s3://my-bucket/deeply/nested/path/to/file.tif') - t.deepEqual(result, { bucket: 'my-bucket', key: 'deeply/nested/path/to/file.tif', region: null }) -}) +test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' + process.env['AWS_REGION'] = 'us-west-2' + + const proxy = new AssetProxy() + + const mockS3Client = { + send: async () => ({ + Buckets: [ + { Name: 'bucket-1' }, + { Name: 'bucket-2' }, + ] + }) + } -test('isAssetProxyEnabled - NONE mode', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' + proxy.getS3Client = () => mockS3Client - t.false(isAssetProxyEnabled()) + await proxy.initialize() - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + t.truthy(proxy.bucketsCache) + t.true(proxy.bucketsCache.has('bucket-1')) + t.true(proxy.bucketsCache.has('bucket-2')) + t.true(!proxy.bucketsCache.has('some-other-bucket')) + t.is(proxy.bucketsCache.size, 2) + } finally { + process.env = before } }) -test('isAssetProxyEnabled - ALL mode', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - - t.true(isAssetProxyEnabled()) +test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode throws on error', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] - } -}) + const proxy = new AssetProxy() -test('isAssetProxyEnabled - LIST mode', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' + const mockS3Client = { + send: async () => { + throw new Error('Access denied') + } + } - t.true(isAssetProxyEnabled()) + proxy.getS3Client = () => mockS3Client - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + await t.throwsAsync( + async () => proxy.initialize(), + { message: /Failed to fetch buckets for asset proxy: Access denied/ } + ) + } finally { + process.env = before } }) -test('shouldProxyAssets - NONE mode returns false', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' +test('AssetProxy - isEnabled() returns false for NONE', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - t.false(shouldProxyAssets('any-bucket')) - - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + const proxy = new AssetProxy() + t.false(proxy.isEnabled()) + } finally { + process.env = before } }) -test('shouldProxyAssets - ALL mode returns true for any bucket', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' +test('AssetProxy - isEnabled() returns true for ALL', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - t.true(shouldProxyAssets('any-bucket')) - t.true(shouldProxyAssets('another-bucket')) - - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + const proxy = new AssetProxy() + t.true(proxy.isEnabled()) + } finally { + process.env = before } }) -test('proxyAssets - ALL mode transforms item assets', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - - const assets = { - thumbnail: { - href: 's3://my-bucket/thumb.jpg', - type: 'image/jpeg' - }, - data: { - href: 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif', - type: 'image/tiff' - } - } - - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1' - ) - - t.true(wasProxied) - t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/items/item1/assets/thumbnail') - t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/thumb.jpg') - t.is(proxied.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') - t.is(proxied.data.alternate.s3.href, 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif') - - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] +test('AssetProxy - isEnabled() returns true for LIST', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' + process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1' + + const proxy = new AssetProxy() + await proxy.initialize() + t.true(proxy.isEnabled()) + } finally { + process.env = before } }) -test('proxyAssets - NONE mode does not transform assets', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' +test('AssetProxy - isEnabled() returns true for ALL_BUCKETS_IN_ACCOUNT', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' + + const proxy = new AssetProxy() - const assets = { - thumbnail: { - href: 's3://my-bucket/thumb.jpg', - type: 'image/jpeg' + const mockS3Client = { + send: async () => ({ Buckets: [{ Name: 'bucket-1' }] }) } - } - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1' - ) + proxy.getS3Client = () => mockS3Client + await proxy.initialize() + + t.true(proxy.isEnabled()) + } finally { + process.env = before + } +}) - t.false(wasProxied) - t.is(proxied.thumbnail.href, 's3://my-bucket/thumb.jpg') - t.is(proxied.thumbnail.alternate, undefined) +test('AssetProxy - shouldProxyBucket() with NONE mode returns false', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + const proxy = new AssetProxy() + t.false(proxy.shouldProxyBucket('any-bucket')) + } finally { + process.env = before } }) -test('proxyAssets - collection assets (no itemId)', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' +test('AssetProxy - shouldProxyBucket() with ALL mode returns true', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const assets = { - thumbnail: { - href: 's3://my-bucket/collection-thumb.jpg', - type: 'image/jpeg' - } + const proxy = new AssetProxy() + t.true(proxy.shouldProxyBucket('any-bucket')) + t.true(proxy.shouldProxyBucket('another-bucket')) + } finally { + process.env = before } +}) - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - null - ) +test('AssetProxy - shouldProxyBucket() with LIST mode only proxies buckets in list', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' + process.env['ASSET_PROXY_BUCKET_LIST'] = 'allowed-bucket,another-allowed' - t.true(wasProxied) - t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/assets/thumbnail') - t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/collection-thumb.jpg') + const proxy = new AssetProxy() + await proxy.initialize() - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + t.true(proxy.shouldProxyBucket('allowed-bucket')) + t.true(proxy.shouldProxyBucket('another-allowed')) + t.false(proxy.shouldProxyBucket('not-in-list')) + } finally { + process.env = before } }) -test('proxyAssets - preserves existing alternate links', (t) => { - const originalOption = process.env['ASSET_PROXY_BUCKET_OPTION'] - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' +test('AssetProxy - shouldProxyBucket() with ALL_BUCKETS_IN_ACCOUNT mode only proxies fetched buckets', async (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - const assets = { - data: { - href: 's3://my-bucket/data.tif', - type: 'image/tiff', - alternate: { - http: { href: 'https://example.com/data.tif' } - } - } - } + const proxy = new AssetProxy() - const { assets: proxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1' - ) + const mockS3Client = { + send: async () => ({ + Buckets: [ + { Name: 'fetched-bucket-1' }, + { Name: 'fetched-bucket-2' } + ] + }) + } - t.is(proxied.data.alternate.http.href, 'https://example.com/data.tif') - t.is(proxied.data.alternate.s3.href, 's3://my-bucket/data.tif') + proxy.getS3Client = () => mockS3Client + await proxy.initialize() - if (originalOption !== undefined) { - process.env['ASSET_PROXY_BUCKET_OPTION'] = originalOption - } else { - delete process.env['ASSET_PROXY_BUCKET_OPTION'] + t.true(proxy.shouldProxyBucket('fetched-bucket-1')) + t.true(proxy.shouldProxyBucket('fetched-bucket-2')) + t.false(proxy.shouldProxyBucket('not-fetched-bucket')) + } finally { + process.env = before } }) -test('proxyAssets - handles non-S3 assets', (t) => { - const assets = { - metadata: { - href: 'https://example.com/metadata.xml', - type: 'application/xml' +test('AssetProxy - getProxiedAssets() transforms item assets in ALL mode', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const proxy = new AssetProxy() + const assets = { + thumbnail: { + href: 's3://my-bucket/thumb.jpg', + type: 'image/jpeg' + }, + data: { + href: 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif', + type: 'image/tiff' + } } - } - - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1' - ) - t.false(wasProxied) - t.is(proxied.metadata.href, 'https://example.com/metadata.xml') - t.is(proxied.metadata.alternate, undefined) + const { assets: proxied, wasProxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.true(wasProxied) + t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/items/item1/assets/thumbnail') + t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/thumb.jpg') + t.is(proxied.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') + t.is(proxied.data.alternate.s3.href, 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif') + } finally { + process.env = before + } }) -test('proxyAssets - handles assets without href', (t) => { - const assets = { - metadata: { - type: 'application/xml' - } - } +test('AssetProxy - getProxiedAssets() transforms collection assets', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1' - ) + const proxy = new AssetProxy() + const assets = { + thumbnail: { + href: 's3://my-bucket/collection-thumb.jpg', + type: 'image/jpeg' + } + } - t.false(wasProxied) - t.deepEqual(proxied.metadata, { type: 'application/xml' }) + const { assets: proxied, wasProxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + null + ) + + t.true(wasProxied) + t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/assets/thumbnail') + t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/collection-thumb.jpg') + } finally { + process.env = before + } }) -test('proxyAssets - handles empty assets object', (t) => { - const assets = {} +test('AssetProxy - getProxiedAssets() does not transform assets in NONE mode', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - const { assets: proxied, wasProxied } = proxyAssets( - assets, - 'https://api.example.com', - 'collection1', - 'item1' - ) + const proxy = new AssetProxy() + const assets = { + thumbnail: { + href: 's3://my-bucket/thumb.jpg', + type: 'image/jpeg' + } + } - t.false(wasProxied) - t.deepEqual(proxied, {}) + const { assets: proxied, wasProxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.false(wasProxied) + t.is(proxied.thumbnail.href, 's3://my-bucket/thumb.jpg') + t.is(proxied.thumbnail.alternate, undefined) + } finally { + process.env = before + } }) -test('determineS3Region - v1 asset-level storage extension', (t) => { - const asset = { 'storage:region': 'us-east-1' } - const item = {} - t.is(determineS3Region(asset, item), 'us-east-1') -}) +test('AssetProxy - getProxiedAssets() preserves existing alternate links', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const proxy = new AssetProxy() + const assets = { + data: { + href: 's3://my-bucket/data.tif', + type: 'image/tiff', + alternate: { + http: { href: 'https://example.com/data.tif' } + } + } + } -test('determineS3Region - v1 item-level storage extension', (t) => { - const asset = {} - const item = { properties: { 'storage:region': 'eu-west-1' } } - t.is(determineS3Region(asset, item), 'eu-west-1') + const { assets: proxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.is(proxied.data.alternate.http.href, 'https://example.com/data.tif') + t.is(proxied.data.alternate.s3.href, 's3://my-bucket/data.tif') + } finally { + process.env = before + } }) -test('determineS3Region - v2 storage extension', (t) => { - const asset = { 'storage:refs': 'scheme1' } - const item = { - 'storage:schemes': { - scheme1: { region: 'ap-southeast-2' } +test('AssetProxy - getProxiedAssets() does not transform non-S3 assets', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const proxy = new AssetProxy() + const assets = { + metadata: { + href: 'https://example.com/metadata.xml', + type: 'application/xml' + } } + + const { assets: proxied, wasProxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.false(wasProxied) + t.is(proxied.metadata.href, 'https://example.com/metadata.xml') + t.is(proxied.metadata.alternate, undefined) + } finally { + process.env = before } - t.is(determineS3Region(asset, item), 'ap-southeast-2') }) -test('determineS3Region - v2 storage extension in properties', (t) => { - const asset = { 'storage:refs': 'scheme1' } - const item = { - properties: { - 'storage:schemes': { - scheme1: { region: 'ap-southeast-2' } +test('AssetProxy - getProxiedAssets() handles assets without href', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const proxy = new AssetProxy() + const assets = { + metadata: { + type: 'application/xml' } } + + const { assets: proxied, wasProxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.false(wasProxied) + t.deepEqual(proxied.metadata, { type: 'application/xml' }) + } finally { + process.env = before } - t.is(determineS3Region(asset, item), 'ap-southeast-2') }) -test('determineS3Region - asset-level takes precedence over item-level', (t) => { - const asset = { 'storage:region': 'us-east-1' } - const item = { properties: { 'storage:region': 'eu-west-1' } } - t.is(determineS3Region(asset, item), 'us-east-1') +test('AssetProxy - getProxiedAssets() handles empty assets object', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const proxy = new AssetProxy() + const assets = {} + + const { assets: proxied, wasProxied } = proxy.getProxiedAssets( + assets, + 'https://api.example.com', + 'collection1', + 'item1' + ) + + t.false(wasProxied) + t.deepEqual(proxied, {}) + } finally { + process.env = before + } }) -test('determineS3Region - default fallback', (t) => { - const originalRegion = process.env['AWS_REGION'] - delete process.env['AWS_REGION'] - - const asset = {} - const item = {} - t.is(determineS3Region(asset, item), 'us-west-2') - - if (originalRegion) process.env['AWS_REGION'] = originalRegion +test('AssetProxy - addProxiedAssets() mutates results and adds stac_extensions', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' + + const proxy = new AssetProxy() + const results = [{ + id: 'item1', + collection: 'collection1', + assets: { + data: { + href: 's3://my-bucket/data.tif' + } + } + }] + + proxy.addProxiedAssets(results, 'https://api.example.com') + + t.truthy(results[0].assets) + t.is(results[0].assets.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') + t.truthy(results[0].assets.data.alternate) + t.is(results[0].assets.data.alternate.s3.href, 's3://my-bucket/data.tif') + t.truthy(results[0].stac_extensions) + t.true(results[0].stac_extensions.includes(ALTERNATE_ASSETS_EXTENSION)) + } finally { + process.env = before + } }) -test('determineS3Region - environment variable fallback', (t) => { - const originalRegion = process.env['AWS_REGION'] - process.env['AWS_REGION'] = 'us-west-1' +test('AssetProxy - addProxiedAssets() returns unchanged results when disabled', (t) => { + const before = { ...process.env } + try { + process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' + + const proxy = new AssetProxy() + const results = [{ + id: 'item1', + collection: 'collection1', + assets: { + data: { + href: 's3://my-bucket/data.tif' + } + } + }] - const asset = {} - const item = {} - t.is(determineS3Region(asset, item), 'us-west-1') + const originalHref = results[0].assets.data.href + proxy.addProxiedAssets(results, 'https://api.example.com') - if (originalRegion) { - process.env['AWS_REGION'] = originalRegion - } else { - delete process.env['AWS_REGION'] + t.is(results[0].assets.data.href, originalHref) + t.is(results[0].assets.data.alternate, undefined) + } finally { + process.env = before } }) - -test('BucketOption - exports expected constants', (t) => { - t.is(BucketOption.NONE, 'NONE') - t.is(BucketOption.ALL, 'ALL') - t.is(BucketOption.ALL_BUCKETS_IN_ACCOUNT, 'ALL_BUCKETS_IN_ACCOUNT') - t.is(BucketOption.LIST, 'LIST') -}) From e4b1c2a21b6e4cc9bbbc600e8f8f81b6bd8d221e Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Thu, 16 Oct 2025 16:48:18 -0500 Subject: [PATCH 05/22] docs: update openapi.yaml to reflect asset proxy endpoints --- src/lambdas/api/openapi.yaml | 60 ++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/lambdas/api/openapi.yaml b/src/lambdas/api/openapi.yaml index bcfadac6..77109ef0 100644 --- a/src/lambdas/api/openapi.yaml +++ b/src/lambdas/api/openapi.yaml @@ -380,6 +380,53 @@ paths: $ref: '#/components/responses/Queryables' default: $ref: '#/components/responses/Error' + /collections/{collectionId}/items/{featureId}/assets/{assetKey}: + get: + tags: + - Features + summary: Proxy access to an item asset + description: |- + Redirects to a pre-signed URL for accessing an item asset stored in S3. + This endpoint is only available when asset proxying is enabled. + operationId: getItemAsset + parameters: + - $ref: '#/components/parameters/collectionId' + - $ref: '#/components/parameters/featureId' + - $ref: '#/components/parameters/assetKey' + responses: + '302': + description: Redirect to pre-signed asset URL + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/ServerError' + /collections/{collectionId}/assets/{assetKey}: + get: + tags: + - Features + summary: Proxy access to a collection asset + description: |- + Redirects to a pre-signed URL for accessing a collection asset stored in S3. + This endpoint is only available when asset proxying is enabled. + operationId: getCollectionAsset + parameters: + - $ref: '#/components/parameters/collectionId' + - $ref: '#/components/parameters/assetKey' + responses: + '302': + description: Redirect to pre-signed asset URL + '400': + $ref: '#/components/responses/BadRequest' + '403': + $ref: '#/components/responses/Forbidden' + '404': + $ref: '#/components/responses/NotFound' + '500': + $ref: '#/components/responses/ServerError' components: schemas: landingPage: @@ -1883,6 +1930,12 @@ components: application/json: schema: $ref: '#/components/schemas/exception' + Forbidden: + description: The requested operation is forbidden + content: + application/json: + schema: + $ref: '#/components/schemas/exception' PreconditionFailed: description: Some condition specified by the request could not be met in the server content: @@ -2156,3 +2209,10 @@ components: required: false schema: $ref: '#/components/schemas/filter-crs' + assetKey: + name: assetKey + in: path + description: Asset key identifier + required: true + schema: + type: string From b0ae7d566e0247b8016213403d265ca811c27f08 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Thu, 16 Oct 2025 17:02:39 -0500 Subject: [PATCH 06/22] docs: update README --- README.md | 131 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) diff --git a/README.md b/README.md index 99a9307a..aa2eabd2 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ - [Filter Extension](#filter-extension) - [Query Extension](#query-extension) - [Aggregation](#aggregation) + - [Asset Proxy](#asset-proxy) - [Collections and filter parameters for authorization](#collections-and-filter-parameters-for-authorization) - [Collections](#collections) - [CQL2 Filter](#cql2-filter) @@ -617,6 +618,9 @@ There are some settings that should be reviewed and updated as needeed in the se | ENABLE_INGEST_ACTION_TRUNCATE | Enables support for ingest action "truncate". | none (not enabled) | | ENABLE_RESPONSE_COMPRESSION | Enables response compression. Set to 'false' to disable. | enabled | | ITEMS_MAX_LIMIT | The maximum limit for the number of items returned from the /search and /collections/{collection_id}/items endpoints. It is recommended that this be set to 100. There is an absolute max limit of 10000 for this. | 10000 | +| ASSET_PROXY_BUCKET_OPTION | Control which S3 buckets are proxied through the API. Options: `NONE` (disabled), `ALL` (all S3 assets), `ALL_BUCKETS_IN_ACCOUNT` (all buckets in AWS account), `LIST` (specific buckets only). | NONE | +| ASSET_PROXY_BUCKET_LIST | Comma-separated list of S3 bucket names to proxy. Required when `ASSET_PROXY_BUCKET_OPTION` is `LIST`. | none | +| ASSET_PROXY_URL_EXPIRY | Pre-signed URL expiry time in seconds for proxied assets. | 300 | Additionally, the credential for OpenSearch must be configured, as decribed in the section [Populating and accessing credentials](#populating-and-accessing-credentials). @@ -1124,6 +1128,129 @@ Available aggregations are: - geometry_geohash_grid_frequency ([geohash grid](https://opensearch.org/docs/latest/aggregations/bucket/geohash-grid/) on Item.geometry) - geometry_geotile_grid_frequency ([geotile grid](https://opensearch.org/docs/latest/aggregations/bucket/geotile-grid/) on Item.geometry) +## Asset Proxy + +The Asset Proxy feature allows stac-server to proxy access to S3 assets through the STAC API by generating pre-signed URLs. When enabled, asset `href` values pointing to S3 are replaced with proxy endpoint URLs, while the original S3 URLs are preserved in the `alternate.s3.href` field using the [Alternate Assets Extension](https://github.com/stac-extensions/alternate-assets). + +### Configuration + +Asset proxying is controlled by the `ASSET_PROXY_BUCKET_OPTION` environment variable, which supports four modes: + +- **NONE** (default): Asset proxy is disabled. All asset hrefs are returned unchanged. +- **ALL**: Proxy all S3 assets regardless of which bucket they are in. +- **ALL_BUCKETS_IN_ACCOUNT**: Proxy assets from any S3 bucket in the AWS account. The list of buckets is fetched at Lambda startup. +- **LIST**: Proxy only assets from specific buckets listed in `ASSET_PROXY_BUCKET_LIST`. + +When using the `LIST` option, the `ASSET_PROXY_BUCKET_LIST` environment variable must be set to a comma-separated list of bucket names: + +```yaml +ASSET_PROXY_BUCKET_OPTION: "LIST" +ASSET_PROXY_BUCKET_LIST: "my-bucket-1,my-bucket-2,my-bucket-3" +``` + +The `ASSET_PROXY_URL_EXPIRY` environment variable controls how long the pre-signed URLs are valid, in seconds (default: 300). + +### Endpoints + +When asset proxying is enabled, two endpoints are available for accessing proxied assets: + +- `GET /collections/{collectionId}/items/{itemId}/assets/{assetKey}` - Redirects (HTTP 302) to a pre-signed S3 URL for an item asset +- `GET /collections/{collectionId}/assets/{assetKey}` - Redirects (HTTP 302) to a pre-signed S3 URL for a collection asset + +These endpoints will return: +- `302` - Redirect to pre-signed S3 URL (success) +- `400` - Bad request (asset href is not a valid S3 URL) +- `403` - Forbidden (asset proxy disabled, or bucket not in allowed list) +- `404` - Not found (item/collection or asset does not exist) +- `500` - Server error + +### IAM Permissions + +For the Asset Proxy to generate pre-signed URLs, the API Lambda must have `s3:GetObject` permission for the S3 buckets containing the assets. Add the following to the IAM role statements in your serverless.yml: + +```yaml +- Effect: Allow + Action: s3:GetObject + Resource: + - "arn:aws:s3:::my-bucket-1/*" + - "arn:aws:s3:::my-bucket-2/*" +``` + +For the `ALL` or `ALL_BUCKETS_IN_ACCOUNT` options, you may use a wildcard: + +```yaml +- Effect: Allow + Action: s3:GetObject + Resource: "arn:aws:s3:::*/*" +``` + +When using `ALL_BUCKETS_IN_ACCOUNT`, the Lambda also needs permission to list buckets: + +```yaml +- Effect: Allow + Action: s3:ListAllMyBuckets + Resource: "*" +``` + +### Asset Transformation + +When asset proxying is enabled and an asset's `href` points to an S3 URL, the asset is transformed as follows: + +**Original asset:** +```json +{ + "thumbnail": { + "href": "s3://my-bucket/path/to/thumbnail.png", + "type": "image/png", + "roles": ["thumbnail"] + } +} +``` + +**Transformed asset:** +```json +{ + "thumbnail": { + "href": "https://api.example.com/collections/my-collection/items/my-item/assets/thumbnail", + "type": "image/png", + "roles": ["thumbnail"], + "alternate": { + "s3": { + "href": "s3://my-bucket/path/to/thumbnail.png" + } + } + } +} +``` + +The item or collection will also have the Alternate Assets Extension added to its `stac_extensions` array: + +```json +"stac_extensions": [ + "https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json" +] +``` + +### Supported S3 URL Formats + +The Asset Proxy recognizes and parses these S3 URL formats: + +- S3 URI: `s3://bucket-name/key` +- Virtual-hosted style: `https://bucket-name.s3.region.amazonaws.com/key` +- Virtual-hosted style (no region): `https://bucket-name.s3.amazonaws.com/key` +- Path style: `https://s3.region.amazonaws.com/bucket-name/key` +- Path style (legacy): `https://s3-region.amazonaws.com/bucket-name/key` + +### Region Determination + +The AWS region for generating pre-signed URLs is determined in this order: + +1. Region parsed from the S3 URL (for HTTPS URLs) +2. `storage:region` field on the asset (Storage Extension v1) +3. Region from `storage:schemes` referenced by `storage:refs` on the asset (Storage Extension v2) +4. `AWS_REGION` environment variable +5. Default: `us-west-2` + ## Collections and filter parameters for authorization One key concern in stac-server is how to restrict user's access to items. These @@ -1160,6 +1287,8 @@ The endpoints this applies to are: - /collections/:collectionId/items - /collections/:collectionId/items/:itemId - /collections/:collectionId/items/:itemId/thumbnail +- /collections/:collectionId/items/:itemId/assets/:assetKey +- /collections/:collectionId/assets/:assetKey - /search - /aggregate @@ -1187,6 +1316,8 @@ The endpoints this applies to are: - /collections/:collectionId/items - /collections/:collectionId/items/:itemId - /collections/:collectionId/items/:itemId/thumbnail +- /collections/:collectionId/items/:itemId/assets/:assetKey +- /collections/:collectionId/assets/:assetKey - /search - /aggregate From a09d37a3dd3114eb5e1e1f5847db3271e1ab7ec4 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Thu, 16 Oct 2025 17:11:17 -0500 Subject: [PATCH 07/22] chore: update CHANGELOG --- CHANGELOG.md | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index be11416f..3a5e0f44 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,22 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added + +- Asset proxying for generating pre-signed S3 URLs through proxy endpoints `GET + /collections/{collectionId}/items/{itemId}/assets/{assetKey}` and `GET + /collections/{collectionId}/assets/{assetKey}`. +- Environment variables `ASSET_PROXY_BUCKET_OPTION`, `ASSET_PROXY_BUCKET_LIST`, and + `ASSET_PROXY_URL_EXPIRY` to configure asset proxying. + +### Changed + +- When asset proxying is enabled, S3 asset hrefs are replaced with proxy endpoint URLs, + and original S3 URLs are preserved in `alternate.s3.href` using the Alternate Assets + Extension. + ## [4.4.0] - 2025-09-10 ## Changed @@ -579,8 +595,7 @@ Initial release, forked from [sat-api](https://github.com/sat-utils/sat-api/tree Compliant with STAC 0.9.0 - - +[unreleased]: https://github.com/stac-utils/stac-server/compare/v4.4.0...main [4.4.0]: https://github.com/stac-utils/stac-api/compare/v4.3.0...v4.4.0 [4.3.0]: https://github.com/stac-utils/stac-api/compare/v4.2.0...v4.3.0 [4.2.0]: https://github.com/stac-utils/stac-api/compare/v4.1.0...v4.2.0 From ac302f9a5f279225123c2019004a392998cf234d Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 22 Oct 2025 09:44:44 -0500 Subject: [PATCH 08/22] review: move appInstance initialization out of function scope so it occurs during lambda init (rather than execution) --- src/lambdas/api/index.js | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/lambdas/api/index.js b/src/lambdas/api/index.js index 5874094b..05d6046f 100644 --- a/src/lambdas/api/index.js +++ b/src/lambdas/api/index.js @@ -30,6 +30,8 @@ const { * @typedef {z.infer} LambdaError */ +const appInstance = await createApp() + /** @type {APIGatewayProxyResult} */ const internalServerError = Object.freeze({ statusCode: 500, @@ -156,18 +158,12 @@ const invokePostHook = async (lambda, postHook, payload) => { return hookResult } -let appInstance = null - /** * @param {APIGatewayProxyEvent} event * @param {Context} context * @returns {Promise} */ const callServerlessApp = async (event, context) => { - if (!appInstance) { - appInstance = await createApp() - } - const result = await serverless(appInstance)(event, context) try { From 4802a726eebfdb6f4154178fe1a492a1522d0fac Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 22 Oct 2025 11:03:11 -0500 Subject: [PATCH 09/22] review: initialize assetProxy outside function so it runs during lamba init phase --- src/lambdas/ingest/index.js | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/src/lambdas/ingest/index.js b/src/lambdas/ingest/index.js index 27fe5a27..d889c2b4 100644 --- a/src/lambdas/ingest/index.js +++ b/src/lambdas/ingest/index.js @@ -6,19 +6,12 @@ import getObjectJson from '../../lib/s3-utils.js' import logger from '../../lib/logger.js' import { AssetProxy } from '../../lib/asset-proxy.js' -let assetProxyInstance = null +let assetProxy = new AssetProxy() +await assetProxy.initialize() -const getAssetProxy = async () => { - if (!assetProxyInstance) { - assetProxyInstance = new AssetProxy() - await assetProxyInstance.initialize() - } - - return assetProxyInstance -} - -export const resetAssetProxy = () => { - assetProxyInstance = null +export const resetAssetProxy = async () => { + assetProxy = new AssetProxy() + await assetProxy.initialize() } const isSqsEvent = (event) => 'Records' in event @@ -92,7 +85,7 @@ export const handler = async (event, _context) => { if (postIngestTopicArn) { logger.debug('Publishing to post-ingest topic: %s', postIngestTopicArn) - const assetProxy = await getAssetProxy() + // const assetProxy = await getAssetProxy() await publishResultsToSns(results, postIngestTopicArn, assetProxy) } else { logger.debug('Skipping post-ingest notification since no topic is configured') From 40632c8cee8cf3f2a21d23c1ff223379f868df54 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 22 Oct 2025 11:49:50 -0500 Subject: [PATCH 10/22] review: move from v2 to v3 of AWS SDK --- src/lib/aws-clients.js | 12 ++++++------ src/lib/s3-utils.js | 4 +++- src/lib/s3-utils.ts | 5 ++++- src/lib/sns.js | 4 +++- tests/helpers/ingest.js | 11 ++++++++--- tests/helpers/sns.js | 7 +++++-- tests/helpers/sqs.js | 18 ++++++++++++++---- tests/system/test-ingest.js | 23 ++++++++++++++++------- 8 files changed, 59 insertions(+), 25 deletions(-) diff --git a/src/lib/aws-clients.js b/src/lib/aws-clients.js index 5bdf1a0b..0475f782 100644 --- a/src/lib/aws-clients.js +++ b/src/lib/aws-clients.js @@ -1,6 +1,6 @@ -import { S3 } from '@aws-sdk/client-s3' -import { SNS } from '@aws-sdk/client-sns' -import { SQS } from '@aws-sdk/client-sqs' +import { S3Client } from '@aws-sdk/client-s3' +import { SNSClient } from '@aws-sdk/client-sns' +import { SQSClient } from '@aws-sdk/client-sqs' const localStackEndpointEnvVar = 'LOCAL_STACK_ENDPOINT' @@ -28,7 +28,7 @@ export const s3 = (options = {}) => { const overrides = useLocalStack() ? localStackOverrides : {} - return new S3({ + return new S3Client({ ...overrides, ...options }) @@ -37,7 +37,7 @@ export const s3 = (options = {}) => { export const sns = (options = {}) => { const overrides = useLocalStack() ? localStackParams() : {} - return new SNS({ + return new SNSClient({ ...overrides, ...options }) @@ -46,7 +46,7 @@ export const sns = (options = {}) => { export const sqs = (options = {}) => { const overrides = useLocalStack() ? localStackParams() : {} - return new SQS({ + return new SQSClient({ ...overrides, ...options }) diff --git a/src/lib/s3-utils.js b/src/lib/s3-utils.js index a823a0f8..38b68fba 100644 --- a/src/lib/s3-utils.js +++ b/src/lib/s3-utils.js @@ -1,12 +1,14 @@ +import { GetObjectCommand } from '@aws-sdk/client-s3' import { s3 } from './aws-clients.js' import logger from './logger.js' const getObjectBody = async (s3Location) => { try { - const result = await s3().getObject({ + const command = new GetObjectCommand({ Bucket: s3Location.bucket, Key: s3Location.key }) + const result = await s3().send(command) if (result.Body === undefined) { throw new Error(`Body of ${s3Location.url} is undefined`) diff --git a/src/lib/s3-utils.ts b/src/lib/s3-utils.ts index cfea2575..2b5bdafa 100644 --- a/src/lib/s3-utils.ts +++ b/src/lib/s3-utils.ts @@ -1,12 +1,15 @@ +import { GetObjectCommand } from '@aws-sdk/client-s3' import { s3 } from './aws-clients.js' const getObjectBody = async (s3Location: {bucket: string, key: string}) => { try { - const result = await s3().getObject({ + const command = new GetObjectCommand({ Bucket: s3Location.bucket, Key: s3Location.key }) + const result = await s3().send(command) + if (result.Body === undefined) { throw new Error(`Body of ${s3Location.bucket}/${s3Location.key} is undefined`) } diff --git a/src/lib/sns.js b/src/lib/sns.js index 5eb6332e..631bc357 100644 --- a/src/lib/sns.js +++ b/src/lib/sns.js @@ -1,3 +1,4 @@ +import { PublishCommand } from '@aws-sdk/client-sns' import { sns } from './aws-clients.js' import logger from './logger.js' import { getBBox, getStartAndEndDates, isCollection, isItem } from './stac-utils.js' @@ -86,11 +87,12 @@ const attrsFromPayload = function (payload) { export async function publishRecordToSns(topicArn, record, error) { const payload = { record, error } try { - await sns().publish({ + const command = new PublishCommand({ Message: JSON.stringify(payload), TopicArn: topicArn, MessageAttributes: attrsFromPayload(payload) }) + await sns().send(command) logger.info(`Wrote record ${record.id} to ${topicArn}`) } catch (err) { logger.error(`Failed to write record ${record.id} to ${topicArn}: ${err}`) diff --git a/tests/helpers/ingest.js b/tests/helpers/ingest.js index 66aef556..3cc87a3c 100644 --- a/tests/helpers/ingest.js +++ b/tests/helpers/ingest.js @@ -1,3 +1,5 @@ +import { PublishCommand } from '@aws-sdk/client-sns' +import { ReceiveMessageCommand } from '@aws-sdk/client-sqs' import { sns, sqs } from '../../src/lib/aws-clients.js' import { handler } from '../../src/lambdas/ingest/index.js' import { sqsTriggerLambda } from './sqs.js' @@ -16,10 +18,11 @@ import { loadFixture } from './utils.js' * @returns {Promise} */ export const ingestItem = async (params) => { - await sns().publish({ + const command = new PublishCommand({ TopicArn: params.ingestTopicArn, Message: JSON.stringify(params.item) }) + await sns().send(command) await sqsTriggerLambda(params.ingestQueueUrl, handler) @@ -78,10 +81,11 @@ export async function testPostIngestSNS(t, record, shouldError = false) { // @ts-ignore process.env['POST_INGEST_TOPIC_ARN'] = t.context.postIngestTopicArn - await sns().publish({ + const publishCommand = new PublishCommand({ TopicArn: t.context.ingestTopicArn, Message: JSON.stringify(record) }) + await sns().send(publishCommand) try { await sqsTriggerLambda(t.context.ingestQueueUrl, handler) @@ -91,10 +95,11 @@ export async function testPostIngestSNS(t, record, shouldError = false) { } } - const { Messages } = await sqs().receiveMessage({ + const receiveCommand = new ReceiveMessageCommand({ QueueUrl: t.context.postIngestQueueUrl, WaitTimeSeconds: 1 }) + const { Messages } = await sqs().send(receiveCommand) t.truthy(Messages, 'Post-ingest message not found in queue') t.false(Messages && Messages.length > 1, 'More than one message in post-ingest queue') diff --git a/tests/helpers/sns.js b/tests/helpers/sns.js index 74e0a445..f5d2a132 100644 --- a/tests/helpers/sns.js +++ b/tests/helpers/sns.js @@ -1,3 +1,4 @@ +import { CreateTopicCommand, SubscribeCommand } from '@aws-sdk/client-sns' import { sns as _sns } from '../../src/lib/aws-clients.js' import { randomId } from './utils.js' @@ -7,9 +8,10 @@ import { randomId } from './utils.js' export const createTopic = async () => { const sns = _sns() - const { TopicArn } = await sns.createTopic({ + const command = new CreateTopicCommand({ Name: randomId('topic') }) + const { TopicArn } = await sns.send(command) if (TopicArn) return TopicArn @@ -22,9 +24,10 @@ export const createTopic = async () => { * @returns {Promise} */ export const addSnsToSqsSubscription = async (topicArn, queueArn) => { - await _sns().subscribe({ + const command = new SubscribeCommand({ TopicArn: topicArn, Protocol: 'sqs', Endpoint: queueArn }) + await _sns().send(command) } diff --git a/tests/helpers/sqs.js b/tests/helpers/sqs.js index ce6cdd1f..a1868829 100644 --- a/tests/helpers/sqs.js +++ b/tests/helpers/sqs.js @@ -1,6 +1,12 @@ // @ts-nocheck import { isUndefined } from 'lodash-es' +import { + ReceiveMessageCommand, + PurgeQueueCommand, + CreateQueueCommand, + GetQueueAttributesCommand +} from '@aws-sdk/client-sqs' import { sqs as _sqs } from '../../src/lib/aws-clients.js' import { randomId } from './utils.js' @@ -17,10 +23,11 @@ const sqsMessageToRecord = (message) => ({ }) const eventFromQueue = async (ingestQueueUrl) => { - const { Messages } = await _sqs().receiveMessage({ + const command = new ReceiveMessageCommand({ QueueUrl: ingestQueueUrl, WaitTimeSeconds: 1 }) + const { Messages } = await _sqs().send(command) return { Records: Messages.map((m) => sqsMessageToRecord(m)) @@ -37,7 +44,8 @@ export const sqsTriggerLambda = async (sqsUrl, handler, _context = {}) => { * @returns {Promise} */ export const purgeQueue = async (url) => { - await _sqs().purgeQueue({ QueueUrl: url }) + const command = new PurgeQueueCommand({ QueueUrl: url }) + await _sqs().send(command) } /** @@ -46,9 +54,10 @@ export const purgeQueue = async (url) => { export const createQueue = async () => { const sqs = _sqs() - const { QueueUrl } = await sqs.createQueue({ + const command = new CreateQueueCommand({ QueueName: randomId('queue') }) + const { QueueUrl } = await sqs.send(command) if (QueueUrl) return QueueUrl @@ -62,10 +71,11 @@ export const createQueue = async () => { export const getQueueArn = async (queueUrl) => { const sqs = _sqs() - const getQueueAttributesResult = await sqs.getQueueAttributes({ + const command = new GetQueueAttributesCommand({ QueueUrl: queueUrl, AttributeNames: ['QueueArn'] }) + const getQueueAttributesResult = await sqs.send(command) if ( isUndefined(getQueueAttributesResult.Attributes) diff --git a/tests/system/test-ingest.js b/tests/system/test-ingest.js index f665f82d..1f330c7d 100644 --- a/tests/system/test-ingest.js +++ b/tests/system/test-ingest.js @@ -4,6 +4,9 @@ import url from 'url' import test from 'ava' import nock from 'nock' import { DateTime } from 'luxon' +import { PublishCommand } from '@aws-sdk/client-sns' +import { ReceiveMessageCommand } from '@aws-sdk/client-sqs' +import { CreateBucketCommand, PutObjectCommand } from '@aws-sdk/client-s3' import { getCollectionIds, getItem } from '../helpers/api.js' import { handler, resetAssetProxy } from '../../src/lambdas/ingest/index.js' import { loadFixture, randomId } from '../helpers/utils.js' @@ -54,10 +57,11 @@ test('The ingest lambda supports ingesting a collection published to SNS', async { id: randomId('collection') } ) - await sns().publish({ + const publishCommand = new PublishCommand({ TopicArn: ingestTopicArn, Message: JSON.stringify(collection) }) + await sns().send(publishCommand) await sqsTriggerLambda(ingestQueueUrl, handler) @@ -85,23 +89,26 @@ test('The ingest lambda supports ingesting a collection sourced from S3', async const sourceBucket = randomId('bucket') const sourceKey = randomId('key') - await s3.createBucket({ + const createBucketCommand = new CreateBucketCommand({ Bucket: sourceBucket, CreateBucketConfiguration: { LocationConstraint: 'us-west-2' } }) + await s3.send(createBucketCommand) - await s3.putObject({ + const putObjectCommand = new PutObjectCommand({ Bucket: sourceBucket, Key: sourceKey, Body: JSON.stringify(collection) }) + await s3.send(putObjectCommand) - await sns().publish({ + const publishCommand2 = new PublishCommand({ TopicArn: ingestTopicArn, Message: JSON.stringify({ href: `s3://${sourceBucket}/${sourceKey}` }) }) + await sns().send(publishCommand2) await sqsTriggerLambda(ingestQueueUrl, handler) @@ -125,10 +132,11 @@ test('The ingest lambda supports ingesting a collection sourced from http', asyn nock('http://source.local').get('/my-file.dat').reply(200, collection) - await sns().publish({ + const publishCommand3 = new PublishCommand({ TopicArn: ingestTopicArn, Message: JSON.stringify({ href: 'http://source.local/my-file.dat' }) }) + await sns().send(publishCommand3) await sqsTriggerLambda(ingestQueueUrl, handler) @@ -417,11 +425,12 @@ async function emptyPostIngestQueue(t) { // We recommend waiting for 60 seconds regardless of your queue's size." let result do { - // eslint-disable-next-line no-await-in-loop - result = await sqs().receiveMessage({ + const receiveCommand = new ReceiveMessageCommand({ QueueUrl: t.context.postIngestQueueUrl, WaitTimeSeconds: 1 }) + // eslint-disable-next-line no-await-in-loop + result = await sqs().send(receiveCommand) } while (result.Message && result.Message.length > 0) } From b76f0e4792fcc54bc756eff0dcb577d13eccd28a Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 22 Oct 2025 17:02:20 -0500 Subject: [PATCH 11/22] review: remove unnecessary S3 client caching in AssetProxy --- src/lib/asset-proxy.js | 29 ++++----------- tests/unit/test-asset-proxy.js | 66 +++++++++++++++------------------- 2 files changed, 35 insertions(+), 60 deletions(-) diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index d608948c..9079e940 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -7,6 +7,8 @@ import { NotFoundError, ValidationError, ForbiddenError } from './errors.js' const VIRTUAL_HOST_PATTERN = /^([^.]+)\.s3(?:\.([^.]+))?\.amazonaws\.com$/ const PATH_STYLE_PATTERN = /^s3(?:[.-]([^.]+))?\.amazonaws\.com$/ +const s3Client = s3() + export const ALTERNATE_ASSETS_EXTENSION = 'https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json' export const BucketOption = Object.freeze({ @@ -112,7 +114,6 @@ const determineS3Region = (asset, itemOrCollection) => { export class AssetProxy { constructor() { this.bucketsCache = null - this.s3ClientCache = new Map() this.bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' this.bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] this.urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) @@ -139,10 +140,8 @@ export class AssetProxy { case BucketOption.ALL_BUCKETS_IN_ACCOUNT: try { - const region = process.env['AWS_REGION'] || 'us-west-2' - const client = this.getS3Client(region) const command = new ListBucketsCommand({}) - const response = await client.send(command) + const response = await s3Client.send(command) const bucketNames = response.Buckets?.map((b) => b.Name) ?.filter((name) => typeof name === 'string') || [] this.bucketsCache = new Set(bucketNames) @@ -158,20 +157,6 @@ export class AssetProxy { } } - /** - * @param {string} region - AWS region - * @returns {Object} S3 client instance - */ - getS3Client(region) { - if (this.s3ClientCache.has(region)) { - return this.s3ClientCache.get(region) - } - - const client = s3({ region }) - this.s3ClientCache.set(region, client) - return client - } - /** * @returns {boolean} */ @@ -280,20 +265,18 @@ export class AssetProxy { /** * @param {string} bucket - S3 bucket name * @param {string} key - S3 object key - * @param {string} region - AWS region + * @param {string} region - AWS region of the S3 bucket * @returns {Promise} Pre-signed URL */ async createPresignedUrl(bucket, key, region) { - const client = this.getS3Client(region) - const command = new GetObjectCommand({ Bucket: bucket, Key: key, RequestPayer: 'requester' }) - const presignedUrl = await getSignedUrl(client, command, { - expiresIn: this.urlExpiry + const presignedUrl = await getSignedUrl(s3Client, command, { + expiresIn: this.urlExpiry, signingRegion: region }) logger.debug('Generated pre-signed URL for asset', { diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js index d933985e..5887ec57 100644 --- a/tests/unit/test-asset-proxy.js +++ b/tests/unit/test-asset-proxy.js @@ -1,8 +1,16 @@ // @ts-nocheck import test from 'ava' +import { mockClient } from 'aws-sdk-client-mock' +import { S3Client, ListBucketsCommand } from '@aws-sdk/client-s3' import { AssetProxy, BucketOption, ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' +const s3Mock = mockClient(S3Client) + +test.beforeEach(() => { + s3Mock.reset() +}) + test('BucketOption - exports expected constants', (t) => { t.is(BucketOption.NONE, 'NONE') t.is(BucketOption.ALL, 'ALL') @@ -80,19 +88,14 @@ test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' process.env['AWS_REGION'] = 'us-west-2' - const proxy = new AssetProxy() - - const mockS3Client = { - send: async () => ({ - Buckets: [ - { Name: 'bucket-1' }, - { Name: 'bucket-2' }, - ] - }) - } - - proxy.getS3Client = () => mockS3Client + s3Mock.on(ListBucketsCommand).resolves({ + Buckets: [ + { Name: 'bucket-1' }, + { Name: 'bucket-2' }, + ] + }) + const proxy = new AssetProxy() await proxy.initialize() t.truthy(proxy.bucketsCache) @@ -110,15 +113,10 @@ test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode throws on error try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - const proxy = new AssetProxy() + // Set up the mock to reject with an error + s3Mock.on(ListBucketsCommand).rejects(new Error('Access denied')) - const mockS3Client = { - send: async () => { - throw new Error('Access denied') - } - } - - proxy.getS3Client = () => mockS3Client + const proxy = new AssetProxy() await t.throwsAsync( async () => proxy.initialize(), @@ -172,13 +170,11 @@ test('AssetProxy - isEnabled() returns true for ALL_BUCKETS_IN_ACCOUNT', async ( try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - const proxy = new AssetProxy() - - const mockS3Client = { - send: async () => ({ Buckets: [{ Name: 'bucket-1' }] }) - } + s3Mock.on(ListBucketsCommand).resolves({ + Buckets: [{ Name: 'bucket-1' }] + }) - proxy.getS3Client = () => mockS3Client + const proxy = new AssetProxy() await proxy.initialize() t.true(proxy.isEnabled()) @@ -234,18 +230,14 @@ test('AssetProxy - shouldProxyBucket() with ALL_BUCKETS_IN_ACCOUNT mode only pro try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - const proxy = new AssetProxy() + s3Mock.on(ListBucketsCommand).resolves({ + Buckets: [ + { Name: 'fetched-bucket-1' }, + { Name: 'fetched-bucket-2' } + ] + }) - const mockS3Client = { - send: async () => ({ - Buckets: [ - { Name: 'fetched-bucket-1' }, - { Name: 'fetched-bucket-2' } - ] - }) - } - - proxy.getS3Client = () => mockS3Client + const proxy = new AssetProxy() await proxy.initialize() t.true(proxy.shouldProxyBucket('fetched-bucket-1')) From 8d3c10f111fc4359fcbb675d1146f79083f8ac08 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 22 Oct 2025 17:20:23 -0500 Subject: [PATCH 12/22] review: 403 t0 404 when asset proxy is disabled --- src/lib/asset-proxy.js | 2 +- tests/system/test-api-asset-proxy-disabled.js | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index 9079e940..a8ebf1f3 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -296,7 +296,7 @@ export class AssetProxy { */ async getAssetPresignedUrl(itemOrCollection, assetKey) { if (!this.isEnabled()) { - return new ForbiddenError() + return new NotFoundError() } const asset = itemOrCollection.assets?.[assetKey] || null diff --git a/tests/system/test-api-asset-proxy-disabled.js b/tests/system/test-api-asset-proxy-disabled.js index 4c2de005..52a51e45 100644 --- a/tests/system/test-api-asset-proxy-disabled.js +++ b/tests/system/test-api-asset-proxy-disabled.js @@ -55,7 +55,7 @@ test.after.always(async (t) => { if (t.context.api) await t.context.api.close() }) -test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - returns 403 when proxy disabled', async (t) => { +test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - returns 404 when proxy disabled', async (t) => { const { collectionId, itemId } = t.context const response = await t.context.api.client.get( @@ -67,10 +67,10 @@ test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - returns 40 } ) - t.is(response.statusCode, 403) + t.is(response.statusCode, 404) }) -test('GET /collections/:collectionId/assets/:assetKey - returns 403 when proxy disabled', async (t) => { +test('GET /collections/:collectionId/assets/:assetKey - returns 404 when proxy disabled', async (t) => { const { collectionWithAssetId } = t.context const response = await t.context.api.client.get( @@ -82,7 +82,7 @@ test('GET /collections/:collectionId/assets/:assetKey - returns 403 when proxy d } ) - t.is(response.statusCode, 403) + t.is(response.statusCode, 404) }) test('GET /collections/:collectionId/items/:itemId - item asset hrefs unchanged when proxy disabled', async (t) => { From dad9234857f680e9a147039a6329bd22d3a8875f Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 22 Oct 2025 17:22:52 -0500 Subject: [PATCH 13/22] review: remove redundant asset proxy isEnabled check --- src/lib/ingest.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/lib/ingest.js b/src/lib/ingest.js index 6bcb75dc..9fea0791 100644 --- a/src/lib/ingest.js +++ b/src/lib/ingest.js @@ -179,9 +179,7 @@ function updateLinksAndHrefsWithinRecord(record, assetProxy) { } else if (isCollection(record)) { addCollectionLinks([record], endpoint) } - if (assetProxy.isEnabled()) { - assetProxy.addProxiedAssets([record], endpoint) - } + assetProxy.addProxiedAssets([record], endpoint) return record } From f8ae55e4b7a3efd79187be3bda774159091aea2c Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Fri, 24 Oct 2025 15:26:04 -0500 Subject: [PATCH 14/22] review: significant refactor to improve bucket caching and region determination --- src/lambdas/api/app.js | 47 ++- src/lambdas/ingest/index.js | 6 +- src/lib/asset-proxy.js | 279 ++++++++---------- src/lib/errors.js | 7 - src/lib/ingest.js | 2 +- .../fixtures/stac/LC80100102015082LGN00.json | 32 +- .../fixtures/stac/collection-with-asset.json | 2 +- tests/fixtures/stac/ingest-item.json | 32 +- tests/helpers/asset-proxy.js | 3 +- tests/system/test-api-asset-proxy.js | 4 +- tests/unit/test-asset-proxy.js | 167 ++++++----- 11 files changed, 281 insertions(+), 300 deletions(-) diff --git a/src/lambdas/api/app.js b/src/lambdas/api/app.js index 99980793..dded7f74 100644 --- a/src/lambdas/api/app.js +++ b/src/lambdas/api/app.js @@ -7,7 +7,7 @@ import path from 'path' import { fileURLToPath } from 'url' import database from '../../lib/database.js' import api from '../../lib/api.js' -import { NotFoundError, ValidationError, ForbiddenError } from '../../lib/errors.js' +import { NotFoundError, ValidationError } from '../../lib/errors.js' import { readFile } from '../../lib/fs.js' import addEndpoint from './middleware/add-endpoint.js' import logger from '../../lib/logger.js' @@ -25,8 +25,7 @@ export const createApp = async () => { const app = express() - app.locals['assetProxy'] = new AssetProxy() - await app.locals['assetProxy'].initialize() + app.locals['assetProxy'] = await AssetProxy.create() if (process.env['REQUEST_LOGGING_ENABLED'] !== 'false') { app.use( @@ -120,7 +119,7 @@ export const createApp = async () => { const result = await api.searchItems( database, 'GET', null, req.endpoint, req.query, req.headers ) - req.app.locals['assetProxy'].addProxiedAssets(result.features, req.endpoint) + req.app.locals['assetProxy'].updateAssetHrefs(result.features, req.endpoint) res.type('application/geo+json') res.json(result) } catch (error) { @@ -137,7 +136,7 @@ export const createApp = async () => { const result = await api.searchItems( database, 'POST', null, req.endpoint, req.body, req.headers ) - req.app.locals['assetProxy'].addProxiedAssets(result.features, req.endpoint) + req.app.locals['assetProxy'].updateAssetHrefs(result.features, req.endpoint) res.type('application/geo+json') res.json(result) } catch (error) { @@ -174,7 +173,7 @@ export const createApp = async () => { const response = await api.getCollections(database, req.endpoint, req.query, req.headers) if (response instanceof Error) next(createError(500, response.message)) else { - req.app.locals['assetProxy'].addProxiedAssets(response.collections, req.endpoint) + req.app.locals['assetProxy'].updateAssetHrefs(response.collections, req.endpoint) res.json(response) } } catch (error) { @@ -211,7 +210,7 @@ export const createApp = async () => { ) if (response instanceof Error) next(createError(404)) else { - req.app.locals['assetProxy'].addProxiedAssets([response], req.endpoint) + req.app.locals['assetProxy'].updateAssetHrefs([response], req.endpoint) res.json(response) } } catch (error) { @@ -294,7 +293,7 @@ export const createApp = async () => { const result = await api.searchItems( database, 'GET', collectionId, req.endpoint, req.query, req.headers ) - req.app.locals['assetProxy'].addProxiedAssets(result.features, req.endpoint) + req.app.locals['assetProxy'].updateAssetHrefs(result.features, req.endpoint) res.type('application/geo+json') res.json(result) } catch (error) { @@ -358,7 +357,7 @@ export const createApp = async () => { } else if (response instanceof Error) { next(createError(500)) } else { - req.app.locals['assetProxy'].addProxiedAssets([response], req.endpoint) + req.app.locals['assetProxy'].updateAssetHrefs([response], req.endpoint) res.type('application/geo+json') res.json(response) } @@ -476,6 +475,11 @@ export const createApp = async () => { app.get('/collections/:collectionId/items/:itemId/assets/:assetKey', async (req, res, next) => { + if (!req.app.locals['assetProxy'].isEnabled) { + next(createError(404)) + return + } + try { const item = await api.getItem( database, @@ -488,21 +492,13 @@ export const createApp = async () => { if (item instanceof NotFoundError) { next(createError(404)) - } else if (item instanceof Error) { - next(createError(500)) } else { const presignedUrl = await req.app.locals['assetProxy'].getAssetPresignedUrl( item, req.params.assetKey ) - if (presignedUrl instanceof ValidationError) { - next(createError(400)) - } else if (presignedUrl instanceof ForbiddenError) { - next(createError(403)) - } else if (presignedUrl instanceof NotFoundError) { + if (!presignedUrl) { next(createError(404)) - } else if (presignedUrl instanceof Error) { - next(createError(500)) } else { res.redirect(presignedUrl) } @@ -513,6 +509,11 @@ export const createApp = async () => { }) app.get('/collections/:collectionId/assets/:assetKey', async (req, res, next) => { + if (!req.app.locals['assetProxy'].isEnabled) { + next(createError(404)) + return + } + try { const collection = await api.getCollection( database, @@ -524,21 +525,13 @@ export const createApp = async () => { if (collection instanceof NotFoundError) { next(createError(404)) - } else if (collection instanceof Error) { - next(createError(500)) } else { const presignedUrl = await req.app.locals['assetProxy'].getAssetPresignedUrl( collection, req.params.assetKey ) - if (presignedUrl instanceof ValidationError) { - next(createError(400)) - } else if (presignedUrl instanceof ForbiddenError) { - next(createError(403)) - } else if (presignedUrl instanceof NotFoundError) { + if (!presignedUrl) { next(createError(404)) - } else if (presignedUrl instanceof Error) { - next(createError(500)) } else { res.redirect(presignedUrl) } diff --git a/src/lambdas/ingest/index.js b/src/lambdas/ingest/index.js index d889c2b4..6be33081 100644 --- a/src/lambdas/ingest/index.js +++ b/src/lambdas/ingest/index.js @@ -6,12 +6,10 @@ import getObjectJson from '../../lib/s3-utils.js' import logger from '../../lib/logger.js' import { AssetProxy } from '../../lib/asset-proxy.js' -let assetProxy = new AssetProxy() -await assetProxy.initialize() +let assetProxy = await AssetProxy.create() export const resetAssetProxy = async () => { - assetProxy = new AssetProxy() - await assetProxy.initialize() + assetProxy = await AssetProxy.create() } const isSqsEvent = (event) => 'Records' in event diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index a8ebf1f3..c352d485 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -1,14 +1,17 @@ -import { GetObjectCommand, ListBucketsCommand } from '@aws-sdk/client-s3' +/* eslint-disable max-classes-per-file */ +import { + GetObjectCommand, + ListBucketsCommand, + HeadBucketCommand +} from '@aws-sdk/client-s3' import { getSignedUrl } from '@aws-sdk/s3-request-presigner' import { s3 } from './aws-clients.js' import logger from './logger.js' -import { NotFoundError, ValidationError, ForbiddenError } from './errors.js' - -const VIRTUAL_HOST_PATTERN = /^([^.]+)\.s3(?:\.([^.]+))?\.amazonaws\.com$/ -const PATH_STYLE_PATTERN = /^s3(?:[.-]([^.]+))?\.amazonaws\.com$/ const s3Client = s3() +const S3_URL_REGEX = /^s3:\/\/([^/]+)\/(.+)$/ + export const ALTERNATE_ASSETS_EXTENSION = 'https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json' export const BucketOption = Object.freeze({ @@ -20,116 +23,59 @@ export const BucketOption = Object.freeze({ /** * @param {string} url - S3 URL to parse - * @returns {Object|null} {bucket, key, region} or null if not a valid S3 URL + * @returns {Object} {bucket, key} or {bucket: null, key: null} if not a valid S3 URL */ const parseS3Url = (url) => { - // S3 URI format: s3://bucket/key - if (url.startsWith('s3://')) { - const withoutProtocol = url.substring(5) - const firstSlash = withoutProtocol.indexOf('/') - - if (firstSlash === -1) { - return null // No key provided - } - - const bucket = withoutProtocol.substring(0, firstSlash) - const key = withoutProtocol.substring(firstSlash + 1) - - if (!bucket || !key) { - return null - } - - return { bucket, key, region: null } - } - - // HTTPS URL formats - if (url.startsWith('https://')) { - try { - const urlObj = new URL(url) - const hostname = urlObj.hostname - const pathname = urlObj.pathname - - // Virtual-hosted style: bucket.s3.region.amazonaws.com or bucket.s3.amazonaws.com - const virtualHostMatch = hostname.match(VIRTUAL_HOST_PATTERN) - if (virtualHostMatch) { - const bucket = virtualHostMatch[1] - const region = virtualHostMatch[2] || null - const key = pathname.startsWith('/') ? pathname.substring(1) : pathname - - if (!key) { - return null - } - - return { bucket, key, region } - } - - // Path style: s3.region.amazonaws.com/bucket/key, - // s3-region.amazonaws.com/bucket/key, or s3.amazonaws.com/bucket/key - const pathStyleMatch = hostname.match(PATH_STYLE_PATTERN) - if (pathStyleMatch) { - const region = pathStyleMatch[1] || null - const pathParts = pathname.split('/').filter((p) => p) - - if (pathParts.length < 2) { - return null // Need at least bucket and key - } + const match = S3_URL_REGEX.exec(url) + if (!match) return { bucket: null, key: null } - const bucket = pathParts[0] - const key = pathParts.slice(1).join('/') - - return { bucket, key, region } - } - } catch (_error) { - // Invalid URL - return null - } - } - - return null -} - -/** - * @param {Object} asset - Asset object - * @param {Object} itemOrCollection - Item or Collection object - * @returns {string} AWS region - */ -const determineS3Region = (asset, itemOrCollection) => { - // Storage Extension v1 - const v1Region = asset['storage:region'] || itemOrCollection.properties?.['storage:region'] - if (v1Region) { - return v1Region - } - - // Storage Extension v2 - const storageSchemes = itemOrCollection.properties?.['storage:schemes'] - || itemOrCollection['storage:schemes'] - const v2Region = storageSchemes?.[asset['storage:refs']]?.region - if (v2Region) { - return v2Region - } - - return process.env['AWS_REGION'] || 'us-west-2' + const [, bucket, key] = match + return { bucket, key } } export class AssetProxy { constructor() { - this.bucketsCache = null this.bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' this.bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] this.urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) + this.isEnabled = this.bucketOption !== BucketOption.NONE + this.buckets = {} + } + + /** + * @returns {Promise} Initialized AssetProxy instance + */ + static async create() { + const dbInstance = new AssetProxy() + await dbInstance._initBuckets() + return dbInstance } /** * @returns {Promise} */ - async initialize() { + async _initBuckets() { switch (this.bucketOption) { case BucketOption.LIST: if (this.bucketList) { const bucketNames = this.bucketList.split(',').map((b) => b.trim()).filter((b) => b) - this.bucketsCache = new Set(bucketNames) + await Promise.all( + bucketNames.map(async (name) => { await this.getBucket(name) }) + ) + + const invalidBuckets = Object.values(this.buckets) + .filter((b) => b.region === null) + .map((b) => b.name) + if (invalidBuckets.length > 0) { + throw new Error( + `Could not access or determine region for the following buckets: ${ + invalidBuckets.join(', ')}` + ) + } + + const count = Object.keys(this.buckets).length logger.info( - `Parsed ${this.bucketsCache.size} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` + `Parsed ${count} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` ) } else { throw new Error( @@ -142,10 +88,19 @@ export class AssetProxy { try { const command = new ListBucketsCommand({}) const response = await s3Client.send(command) - const bucketNames = response.Buckets?.map((b) => b.Name) - ?.filter((name) => typeof name === 'string') || [] - this.bucketsCache = new Set(bucketNames) - logger.info(`Fetched ${this.bucketsCache.size} buckets from AWS account for asset proxy`) + const buckets = response.Buckets || [] + + await Promise.all( + buckets + .map((bucket) => bucket.Name) + .filter((name) => typeof name === 'string') + .map(async (name) => { await this.getBucket(name) }) + ) + + const count = Object.keys(this.buckets).length + logger.info( + `Fetched ${count} buckets from AWS account for asset proxy` + ) } catch (error) { const message = error instanceof Error ? error.message : String(error) throw new Error(`Failed to fetch buckets for asset proxy: ${message}`) @@ -158,18 +113,47 @@ export class AssetProxy { } /** - * @returns {boolean} + * @param {string} bucketName - S3 bucket name + * @returns {Promise} Bucket info {name, region} */ - isEnabled() { - return this.bucketOption !== BucketOption.NONE + async getBucket(bucketName) { + if (!(bucketName in this.buckets)) { + const command = new HeadBucketCommand({ Bucket: bucketName }) + const response = await s3Client.send(command) + const statusCode = response.$metadata.httpStatusCode + let region = null + + switch (statusCode) { + case 200: + region = response.BucketRegion === 'EU' + ? 'eu-west-1' + : response.BucketRegion || 'us-east-1' + break + case 403: + logger.warn(`Access denied to bucket ${bucketName}`) + break + case 404: + logger.warn(`Bucket ${bucketName} does not exist`) + break + case 400: + logger.warn(`Bad request for bucket ${bucketName}`) + break + default: + logger.warn(`Unexpected status code ${statusCode} for bucket ${bucketName}`) + } + + this.buckets[bucketName] = { name: bucketName, region } + } + return this.buckets[bucketName] } /** - * @param {string} bucket - S3 bucket name - * @returns {boolean} True if bucket should be proxied + * @param {string} bucketName - S3 bucket name + * @returns {boolean} True if bucket should be proxied, False otherwise */ - shouldProxyBucket(bucket) { - if (this.bucketOption === BucketOption.ALL || this.bucketsCache?.has(bucket)) { + shouldProxyBucket(bucketName) { + if (this.bucketOption === BucketOption.ALL + || bucketName in this.buckets) { return true } return false @@ -189,13 +173,22 @@ export class AssetProxy { for (const [assetKey, asset] of Object.entries(assets)) { if (!asset?.href) { proxiedAssets[assetKey] = asset + logger.warn(`Asset ${assetKey} is missing href`) // eslint-disable-next-line no-continue continue } - const s3Info = parseS3Url(asset.href) - if (!s3Info || !(this.shouldProxyBucket(s3Info.bucket))) { + const { bucket, key } = parseS3Url(asset.href) + if (!bucket || !key) { proxiedAssets[assetKey] = asset + logger.warn(`Asset ${assetKey} has invalid S3 URL: ${asset.href}`) + // eslint-disable-next-line no-continue + continue + } + + if (!this.shouldProxyBucket(bucket)) { + proxiedAssets[assetKey] = asset + logger.warn(`Asset ${assetKey} bucket ${bucket} is not configured for proxying`) // eslint-disable-next-line no-continue continue } @@ -222,17 +215,18 @@ export class AssetProxy { } /** - * @param {Array} results - Array of STAC items or collections + * @param {Array} stacObjects - Array of STAC items or collections * @param {string} endpoint - API endpoint base URL - * @returns {Array} Mutated results array with proxied assets + * @returns {Array} Mutated stacObjects array with proxied asset HREFs */ - addProxiedAssets(results, endpoint) { - if (!this.isEnabled()) { - return results + updateAssetHrefs(stacObjects, endpoint) { + if (!this.isEnabled) { + return stacObjects } - results.forEach((result) => { + stacObjects.forEach((result) => { if (!result.assets || typeof result.assets !== 'object') { + logger.info(`${result.id} has no assets to proxy`) return } @@ -259,22 +253,35 @@ export class AssetProxy { } }) - return results + return stacObjects } /** - * @param {string} bucket - S3 bucket name - * @param {string} key - S3 object key - * @param {string} region - AWS region of the S3 bucket - * @returns {Promise} Pre-signed URL + * @param {Object} itemOrCollection - STAC Item or Collection + * @param {string} assetKey - Asset key to generate presigned URL for + * @returns {Promise} Pre-signed URL or null */ - async createPresignedUrl(bucket, key, region) { + async getAssetPresignedUrl(itemOrCollection, assetKey) { + const asset = itemOrCollection.assets?.[assetKey] || null + if (!asset || !asset.href) { + return null + } + + const { bucket, key } = parseS3Url(asset.href) + if (!bucket || !key || !this.shouldProxyBucket(bucket)) { + return null + } + + const region = await this.getBucket(bucket).then((b) => b.region) + if (!region) { + return null + } + const command = new GetObjectCommand({ Bucket: bucket, Key: key, RequestPayer: 'requester' }) - const presignedUrl = await getSignedUrl(s3Client, command, { expiresIn: this.urlExpiry, signingRegion: region }) @@ -288,34 +295,4 @@ export class AssetProxy { return presignedUrl } - - /** - * @param {Object} itemOrCollection - STAC Item or Collection - * @param {string} assetKey - Asset key to generate presigned URL for - * @returns {Promise} Pre-signed URL or Error - */ - async getAssetPresignedUrl(itemOrCollection, assetKey) { - if (!this.isEnabled()) { - return new NotFoundError() - } - - const asset = itemOrCollection.assets?.[assetKey] || null - if (!asset || !asset.href) { - return new NotFoundError() - } - - const s3Info = parseS3Url(asset.href) - if (!s3Info) { - return new ValidationError('Asset href is not a valid S3 URL') - } - - if (!this.shouldProxyBucket(s3Info.bucket)) { - return new ForbiddenError() - } - - const region = s3Info.region || determineS3Region(asset, itemOrCollection) - const presignedUrl = await this.createPresignedUrl(s3Info.bucket, s3Info.key, region) - - return presignedUrl - } } diff --git a/src/lib/errors.js b/src/lib/errors.js index b0309c28..5a7f0d65 100644 --- a/src/lib/errors.js +++ b/src/lib/errors.js @@ -13,10 +13,3 @@ export class NotFoundError extends Error { this.name = this.constructor.name } } - -export class ForbiddenError extends Error { - constructor(message) { - super(message) - this.name = this.constructor.name - } -} diff --git a/src/lib/ingest.js b/src/lib/ingest.js index 9fea0791..5247304e 100644 --- a/src/lib/ingest.js +++ b/src/lib/ingest.js @@ -179,7 +179,7 @@ function updateLinksAndHrefsWithinRecord(record, assetProxy) { } else if (isCollection(record)) { addCollectionLinks([record], endpoint) } - assetProxy.addProxiedAssets([record], endpoint) + assetProxy.updateAssetHrefs([record], endpoint) return record } diff --git a/tests/fixtures/stac/LC80100102015082LGN00.json b/tests/fixtures/stac/LC80100102015082LGN00.json index d1afa6fd..7e702021 100644 --- a/tests/fixtures/stac/LC80100102015082LGN00.json +++ b/tests/fixtures/stac/LC80100102015082LGN00.json @@ -1,7 +1,7 @@ { "assets": { "ANG": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_ANG.txt", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_ANG.txt", "title": "Angle coefficients file", "type": "text/plain", "roles": [ @@ -17,7 +17,7 @@ "full_width_half_max": 0.02 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B1.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B1.TIF", "title": "Band 1 (coastal)", "type": "image/tiff; application=geotiff" }, @@ -31,7 +31,7 @@ } ], "gsd": 100, - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B10.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B10.TIF", "title": "Band 10 (lwir)", "type": "image/tiff; application=geotiff" }, @@ -45,7 +45,7 @@ } ], "gsd": 100, - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B11.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B11.TIF", "title": "Band 11 (lwir)", "type": "image/tiff; application=geotiff" }, @@ -58,7 +58,7 @@ "full_width_half_max": 0.06 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B2.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B2.TIF", "title": "Band 2 (blue)", "type": "image/tiff; application=geotiff" }, @@ -71,7 +71,7 @@ "full_width_half_max": 0.06 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B3.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B3.TIF", "title": "Band 3 (green)", "type": "image/tiff; application=geotiff" }, @@ -84,7 +84,7 @@ "full_width_half_max": 0.04 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B4.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B4.TIF", "title": "Band 4 (red)", "type": "image/tiff; application=geotiff" }, @@ -97,7 +97,7 @@ "full_width_half_max": 0.03 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B5.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B5.TIF", "title": "Band 5 (nir)", "type": "image/tiff; application=geotiff" }, @@ -110,7 +110,7 @@ "full_width_half_max": 0.08 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B6.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B6.TIF", "title": "Band 6 (swir16)", "type": "image/tiff; application=geotiff" }, @@ -123,7 +123,7 @@ "full_width_half_max": 0.2 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B7.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B7.TIF", "title": "Band 7 (swir22)", "type": "image/tiff; application=geotiff" }, @@ -137,7 +137,7 @@ } ], "gsd": 15, - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B8.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B8.TIF", "title": "Band 8 (pan)", "type": "image/tiff; application=geotiff" }, @@ -150,12 +150,12 @@ "full_width_half_max": 0.02 } ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B9.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B9.TIF", "title": "Band 9 (cirrus)", "type": "image/tiff; application=geotiff" }, "BQA": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_BQA.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_BQA.TIF", "title": "Band quality data", "type": "image/tiff; application=geotiff", "roles": [ @@ -163,7 +163,7 @@ ] }, "MTL": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_MTL.txt", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_MTL.txt", "title": "original metadata file", "type": "text/plain", "roles": [ @@ -171,12 +171,12 @@ ] }, "index": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/index.html", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/index.html", "title": "HTML index page", "type": "text/html" }, "thumbnail": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", "title": "Thumbnail image", "type": "image/jpeg", "roles": [ diff --git a/tests/fixtures/stac/collection-with-asset.json b/tests/fixtures/stac/collection-with-asset.json index 764964c6..c52129c3 100644 --- a/tests/fixtures/stac/collection-with-asset.json +++ b/tests/fixtures/stac/collection-with-asset.json @@ -107,7 +107,7 @@ "license": "PDDL-1.0", "assets": { "thumbnail": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", "title": "Thumbnail image", "type": "image/jpeg" } diff --git a/tests/fixtures/stac/ingest-item.json b/tests/fixtures/stac/ingest-item.json index 8d7fab06..c77d4a08 100644 --- a/tests/fixtures/stac/ingest-item.json +++ b/tests/fixtures/stac/ingest-item.json @@ -1,7 +1,7 @@ { "assets": { "ANG": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_ANG.txt", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_ANG.txt", "title": "Angle coefficients file", "type": "text/plain" }, @@ -9,7 +9,7 @@ "eo:bands": [ 0 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B1.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B1.TIF", "title": "Band 1 (coastal)", "type": "image/tiff; application=geotiff" }, @@ -17,7 +17,7 @@ "eo:bands": [ 9 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B10.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B10.TIF", "title": "Band 10 (lwir)", "type": "image/tiff; application=geotiff" }, @@ -25,7 +25,7 @@ "eo:bands": [ 10 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B11.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B11.TIF", "title": "Band 11 (lwir)", "type": "image/tiff; application=geotiff" }, @@ -33,7 +33,7 @@ "eo:bands": [ 1 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B2.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B2.TIF", "title": "Band 2 (blue)", "type": "image/tiff; application=geotiff" }, @@ -41,7 +41,7 @@ "eo:bands": [ 2 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B3.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B3.TIF", "title": "Band 3 (green)", "type": "image/tiff; application=geotiff" }, @@ -49,7 +49,7 @@ "eo:bands": [ 3 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B4.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B4.TIF", "title": "Band 4 (red)", "type": "image/tiff; application=geotiff" }, @@ -57,7 +57,7 @@ "eo:bands": [ 4 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B5.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B5.TIF", "title": "Band 5 (nir)", "type": "image/tiff; application=geotiff" }, @@ -65,7 +65,7 @@ "eo:bands": [ 5 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B6.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B6.TIF", "title": "Band 6 (swir16)", "type": "image/tiff; application=geotiff" }, @@ -73,7 +73,7 @@ "eo:bands": [ 6 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B7.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B7.TIF", "title": "Band 7 (swir22)", "type": "image/tiff; application=geotiff" }, @@ -81,7 +81,7 @@ "eo:bands": [ 7 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B8.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B8.TIF", "title": "Band 8 (pan)", "type": "image/tiff; application=geotiff" }, @@ -89,27 +89,27 @@ "eo:bands": [ 8 ], - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B9.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_B9.TIF", "title": "Band 9 (cirrus)", "type": "image/tiff; application=geotiff" }, "BQA": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_BQA.TIF", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_BQA.TIF", "title": "Band quality data", "type": "image/tiff; application=geotiff" }, "MTL": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_MTL.txt", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_MTL.txt", "title": "original metadata file", "type": "text/plain" }, "index": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/index.html", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/index.html", "title": "HTML index page", "type": "text/html" }, "thumbnail": { - "href": "https://s3-us-west-2.amazonaws.com/landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", + "href": "s3://landsat-pds/L8/010/010/LC80100102015082LGN00/LC80100102015082LGN00_thumb_large.jpg", "title": "Thumbnail image", "type": "image/jpeg" } diff --git a/tests/helpers/asset-proxy.js b/tests/helpers/asset-proxy.js index 51e3d2d2..ede103b3 100644 --- a/tests/helpers/asset-proxy.js +++ b/tests/helpers/asset-proxy.js @@ -4,8 +4,7 @@ const setupAssetProxy = async (assetProxyBucketOption) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = assetProxyBucketOption - const assetProxy = new AssetProxy() - await assetProxy.initialize() + const assetProxy = await AssetProxy.create() return assetProxy } finally { process.env = before diff --git a/tests/system/test-api-asset-proxy.js b/tests/system/test-api-asset-proxy.js index 9846507d..0c9ee7cf 100644 --- a/tests/system/test-api-asset-proxy.js +++ b/tests/system/test-api-asset-proxy.js @@ -63,8 +63,8 @@ test.after.always(async (t) => { test('AssetProxy initialized with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', (t) => { const assetProxy = t.context.api.app.locals['assetProxy'] - t.truthy(assetProxy.bucketsCache) - t.true(assetProxy.isEnabled()) + t.truthy(assetProxy.buckets) + t.true(assetProxy.isEnabled) t.true(assetProxy.shouldProxyBucket('landsat-pds')) t.true(!assetProxy.shouldProxyBucket('some-other-bucket')) }) diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js index 5887ec57..37e75201 100644 --- a/tests/unit/test-asset-proxy.js +++ b/tests/unit/test-asset-proxy.js @@ -2,7 +2,7 @@ import test from 'ava' import { mockClient } from 'aws-sdk-client-mock' -import { S3Client, ListBucketsCommand } from '@aws-sdk/client-s3' +import { S3Client, ListBucketsCommand, HeadBucketCommand } from '@aws-sdk/client-s3' import { AssetProxy, BucketOption, ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' const s3Mock = mockClient(S3Client) @@ -18,27 +18,28 @@ test('BucketOption - exports expected constants', (t) => { t.is(BucketOption.LIST, 'LIST') }) -test('AssetProxy - constructor initializes with expected defaults', (t) => { +test('AssetProxy - constructor initializes with expected defaults', async (t) => { const before = { ...process.env } try { delete process.env['ASSET_PROXY_BUCKET_OPTION'] - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() t.is(proxy.bucketOption, 'NONE') t.is(proxy.urlExpiry, 300) + t.is(proxy.isEnabled, false) } finally { process.env = before } }) -test('AssetProxy - constructor reads env vars correctly', (t) => { +test('AssetProxy - constructor reads env vars correctly', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' process.env['ASSET_PROXY_URL_EXPIRY'] = '600' process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1,bucket2' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() t.is(proxy.bucketOption, 'ALL') t.is(proxy.urlExpiry, 600) t.is(proxy.bucketList, 'bucket1,bucket2') @@ -47,34 +48,37 @@ test('AssetProxy - constructor reads env vars correctly', (t) => { } }) -test('AssetProxy - initialize() with LIST mode parses bucket list', async (t) => { +test('AssetProxy - LIST mode parses bucket list', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1, bucket2 , bucket3' - const proxy = new AssetProxy() - await proxy.initialize() + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const proxy = await AssetProxy.create() - t.truthy(proxy.bucketsCache) - t.true(proxy.bucketsCache.has('bucket1')) - t.true(proxy.bucketsCache.has('bucket2')) - t.true(proxy.bucketsCache.has('bucket3')) - t.is(proxy.bucketsCache.size, 3) + t.truthy(proxy.buckets) + t.truthy(proxy.buckets['bucket1']) + t.truthy(proxy.buckets['bucket2']) + t.truthy(proxy.buckets['bucket3']) + t.is(Object.keys(proxy.buckets).length, 3) } finally { process.env = before } }) -test('AssetProxy - initialize() with LIST mode throws if no bucket list', async (t) => { +test('AssetProxy - LIST mode throws if no bucket list', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' delete process.env['ASSET_PROXY_BUCKET_LIST'] - const proxy = new AssetProxy() await t.throwsAsync( - async () => proxy.initialize(), + async () => AssetProxy.create(), { message: /ASSET_PROXY_BUCKET_LIST must be set/ } ) } finally { @@ -82,7 +86,7 @@ test('AssetProxy - initialize() with LIST mode throws if no bucket list', async } }) -test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', async (t) => { +test('AssetProxy - ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' @@ -95,31 +99,32 @@ test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets ] }) - const proxy = new AssetProxy() - await proxy.initialize() + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const proxy = await AssetProxy.create() - t.truthy(proxy.bucketsCache) - t.true(proxy.bucketsCache.has('bucket-1')) - t.true(proxy.bucketsCache.has('bucket-2')) - t.true(!proxy.bucketsCache.has('some-other-bucket')) - t.is(proxy.bucketsCache.size, 2) + t.truthy(proxy.buckets) + t.truthy(proxy.buckets['bucket-1']) + t.truthy(proxy.buckets['bucket-2']) + t.is(proxy.buckets['some-other-bucket'], undefined) + t.is(Object.keys(proxy.buckets).length, 2) } finally { process.env = before } }) -test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode throws on error', async (t) => { +test('AssetProxy - ALL_BUCKETS_IN_ACCOUNT mode throws on error', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - // Set up the mock to reject with an error s3Mock.on(ListBucketsCommand).rejects(new Error('Access denied')) - const proxy = new AssetProxy() - await t.throwsAsync( - async () => proxy.initialize(), + async () => AssetProxy.create(), { message: /Failed to fetch buckets for asset proxy: Access denied/ } ) } finally { @@ -127,45 +132,49 @@ test('AssetProxy - initialize() with ALL_BUCKETS_IN_ACCOUNT mode throws on error } }) -test('AssetProxy - isEnabled() returns false for NONE', (t) => { +test('AssetProxy - isEnabled returns false for NONE', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - const proxy = new AssetProxy() - t.false(proxy.isEnabled()) + const proxy = await AssetProxy.create() + t.false(proxy.isEnabled) } finally { process.env = before } }) -test('AssetProxy - isEnabled() returns true for ALL', (t) => { +test('AssetProxy - isEnabled returns true for ALL', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() - t.true(proxy.isEnabled()) + const proxy = await AssetProxy.create() + t.true(proxy.isEnabled) } finally { process.env = before } }) -test('AssetProxy - isEnabled() returns true for LIST', async (t) => { +test('AssetProxy - isEnabled returns true for LIST', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1' - const proxy = new AssetProxy() - await proxy.initialize() - t.true(proxy.isEnabled()) + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const proxy = await AssetProxy.create() + t.true(proxy.isEnabled) } finally { process.env = before } }) -test('AssetProxy - isEnabled() returns true for ALL_BUCKETS_IN_ACCOUNT', async (t) => { +test('AssetProxy - isEnabled returns true for ALL_BUCKETS_IN_ACCOUNT', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' @@ -174,33 +183,37 @@ test('AssetProxy - isEnabled() returns true for ALL_BUCKETS_IN_ACCOUNT', async ( Buckets: [{ Name: 'bucket-1' }] }) - const proxy = new AssetProxy() - await proxy.initialize() + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) - t.true(proxy.isEnabled()) + const proxy = await AssetProxy.create() + + t.true(proxy.isEnabled) } finally { process.env = before } }) -test('AssetProxy - shouldProxyBucket() with NONE mode returns false', (t) => { +test('AssetProxy - shouldProxyBucket() with NONE mode returns false', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() t.false(proxy.shouldProxyBucket('any-bucket')) } finally { process.env = before } }) -test('AssetProxy - shouldProxyBucket() with ALL mode returns true', (t) => { +test('AssetProxy - shouldProxyBucket() with ALL mode returns true', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() t.true(proxy.shouldProxyBucket('any-bucket')) t.true(proxy.shouldProxyBucket('another-bucket')) } finally { @@ -214,8 +227,12 @@ test('AssetProxy - shouldProxyBucket() with LIST mode only proxies buckets in li process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' process.env['ASSET_PROXY_BUCKET_LIST'] = 'allowed-bucket,another-allowed' - const proxy = new AssetProxy() - await proxy.initialize() + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const proxy = await AssetProxy.create() t.true(proxy.shouldProxyBucket('allowed-bucket')) t.true(proxy.shouldProxyBucket('another-allowed')) @@ -237,8 +254,12 @@ test('AssetProxy - shouldProxyBucket() with ALL_BUCKETS_IN_ACCOUNT mode only pro ] }) - const proxy = new AssetProxy() - await proxy.initialize() + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const proxy = await AssetProxy.create() t.true(proxy.shouldProxyBucket('fetched-bucket-1')) t.true(proxy.shouldProxyBucket('fetched-bucket-2')) @@ -248,19 +269,19 @@ test('AssetProxy - shouldProxyBucket() with ALL_BUCKETS_IN_ACCOUNT mode only pro } }) -test('AssetProxy - getProxiedAssets() transforms item assets in ALL mode', (t) => { +test('AssetProxy - getProxiedAssets() transforms item assets in ALL mode', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = { thumbnail: { href: 's3://my-bucket/thumb.jpg', type: 'image/jpeg' }, data: { - href: 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif', + href: 's3://my-bucket/data.tif', type: 'image/tiff' } } @@ -276,18 +297,18 @@ test('AssetProxy - getProxiedAssets() transforms item assets in ALL mode', (t) = t.is(proxied.thumbnail.href, 'https://api.example.com/collections/collection1/items/item1/assets/thumbnail') t.is(proxied.thumbnail.alternate.s3.href, 's3://my-bucket/thumb.jpg') t.is(proxied.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') - t.is(proxied.data.alternate.s3.href, 'https://my-bucket.s3.us-west-2.amazonaws.com/data.tif') + t.is(proxied.data.alternate.s3.href, 's3://my-bucket/data.tif') } finally { process.env = before } }) -test('AssetProxy - getProxiedAssets() transforms collection assets', (t) => { +test('AssetProxy - getProxiedAssets() transforms collection assets', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = { thumbnail: { href: 's3://my-bucket/collection-thumb.jpg', @@ -310,12 +331,12 @@ test('AssetProxy - getProxiedAssets() transforms collection assets', (t) => { } }) -test('AssetProxy - getProxiedAssets() does not transform assets in NONE mode', (t) => { +test('AssetProxy - getProxiedAssets() does not transform assets in NONE mode', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = { thumbnail: { href: 's3://my-bucket/thumb.jpg', @@ -338,12 +359,12 @@ test('AssetProxy - getProxiedAssets() does not transform assets in NONE mode', ( } }) -test('AssetProxy - getProxiedAssets() preserves existing alternate links', (t) => { +test('AssetProxy - getProxiedAssets() preserves existing alternate links', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = { data: { href: 's3://my-bucket/data.tif', @@ -368,12 +389,12 @@ test('AssetProxy - getProxiedAssets() preserves existing alternate links', (t) = } }) -test('AssetProxy - getProxiedAssets() does not transform non-S3 assets', (t) => { +test('AssetProxy - getProxiedAssets() does not transform non-S3 assets', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = { metadata: { href: 'https://example.com/metadata.xml', @@ -396,12 +417,12 @@ test('AssetProxy - getProxiedAssets() does not transform non-S3 assets', (t) => } }) -test('AssetProxy - getProxiedAssets() handles assets without href', (t) => { +test('AssetProxy - getProxiedAssets() handles assets without href', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = { metadata: { type: 'application/xml' @@ -422,12 +443,12 @@ test('AssetProxy - getProxiedAssets() handles assets without href', (t) => { } }) -test('AssetProxy - getProxiedAssets() handles empty assets object', (t) => { +test('AssetProxy - getProxiedAssets() handles empty assets object', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const assets = {} const { assets: proxied, wasProxied } = proxy.getProxiedAssets( @@ -444,12 +465,12 @@ test('AssetProxy - getProxiedAssets() handles empty assets object', (t) => { } }) -test('AssetProxy - addProxiedAssets() mutates results and adds stac_extensions', (t) => { +test('AssetProxy - updateAssetHrefs() mutates results and adds the alternate assets extension', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const results = [{ id: 'item1', collection: 'collection1', @@ -460,7 +481,7 @@ test('AssetProxy - addProxiedAssets() mutates results and adds stac_extensions', } }] - proxy.addProxiedAssets(results, 'https://api.example.com') + proxy.updateAssetHrefs(results, 'https://api.example.com') t.truthy(results[0].assets) t.is(results[0].assets.data.href, 'https://api.example.com/collections/collection1/items/item1/assets/data') @@ -473,12 +494,12 @@ test('AssetProxy - addProxiedAssets() mutates results and adds stac_extensions', } }) -test('AssetProxy - addProxiedAssets() returns unchanged results when disabled', (t) => { +test('AssetProxy - updateAssetHrefs() returns unchanged results when disabled', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - const proxy = new AssetProxy() + const proxy = await AssetProxy.create() const results = [{ id: 'item1', collection: 'collection1', @@ -490,7 +511,7 @@ test('AssetProxy - addProxiedAssets() returns unchanged results when disabled', }] const originalHref = results[0].assets.data.href - proxy.addProxiedAssets(results, 'https://api.example.com') + proxy.updateAssetHrefs(results, 'https://api.example.com') t.is(results[0].assets.data.href, originalHref) t.is(results[0].assets.data.alternate, undefined) From 9dc923f8a97df1a7fbc28a7b5d10f63d0159a79a Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Fri, 24 Oct 2025 16:32:21 -0500 Subject: [PATCH 15/22] docs: update docs --- README.md | 88 +++++++++++++++++++++--------------------- serverless.example.yml | 2 +- 2 files changed, 44 insertions(+), 46 deletions(-) diff --git a/README.md b/README.md index aa2eabd2..ee6f89ac 100644 --- a/README.md +++ b/README.md @@ -1130,25 +1130,27 @@ Available aggregations are: ## Asset Proxy -The Asset Proxy feature allows stac-server to proxy access to S3 assets through the STAC API by generating pre-signed URLs. When enabled, asset `href` values pointing to S3 are replaced with proxy endpoint URLs, while the original S3 URLs are preserved in the `alternate.s3.href` field using the [Alternate Assets Extension](https://github.com/stac-extensions/alternate-assets). +The Asset Proxy feature enables stac-server to proxy access to S3 assets through the STAC API by generating pre-signed URLs. When enabled, asset `href` values pointing to S3 are replaced with proxy endpoint URLs when an Item or Collection is served, while the original S3 URLs are preserved in the `alternate.s3.href` field using the [Alternate Assets Extension](https://github.com/stac-extensions/alternate-assets). Only objects with S3 URIs (`s3://` prefix) are proxied; other URL schemes are returned unchanged. ### Configuration -Asset proxying is controlled by the `ASSET_PROXY_BUCKET_OPTION` environment variable, which supports four modes: +Asset proxying uses three environment variables: -- **NONE** (default): Asset proxy is disabled. All asset hrefs are returned unchanged. -- **ALL**: Proxy all S3 assets regardless of which bucket they are in. -- **ALL_BUCKETS_IN_ACCOUNT**: Proxy assets from any S3 bucket in the AWS account. The list of buckets is fetched at Lambda startup. -- **LIST**: Proxy only assets from specific buckets listed in `ASSET_PROXY_BUCKET_LIST`. +- **`ASSET_PROXY_BUCKET_OPTION` -** Specifies one of four modes to control which S3 buckets are proxied. -When using the `LIST` option, the `ASSET_PROXY_BUCKET_LIST` environment variable must be set to a comma-separated list of bucket names: + - **NONE** (default): Asset proxy is disabled. All asset hrefs are returned unchanged. + - **ALL**: Proxy all S3 assets regardless of which bucket they are in. + - **ALL_BUCKETS_IN_ACCOUNT**: Proxy assets from any S3 bucket accessible to the AWS account credentials. The list of buckets is fetched at Lambda startup. + - **LIST**: Only proxy assets from specific buckets listed in `ASSET_PROXY_BUCKET_LIST`. -```yaml -ASSET_PROXY_BUCKET_OPTION: "LIST" -ASSET_PROXY_BUCKET_LIST: "my-bucket-1,my-bucket-2,my-bucket-3" -``` +- **`ASSET_PROXY_BUCKET_LIST`** — Comma-separated list of bucket names (required only when the `ASSET_PROXY_BUCKET_OPTION` environment variable is set to `LIST`) -The `ASSET_PROXY_URL_EXPIRY` environment variable controls how long the pre-signed URLs are valid, in seconds (default: 300). + ```yaml + ASSET_PROXY_BUCKET_OPTION: "LIST" + ASSET_PROXY_BUCKET_LIST: "my-bucket-1,my-bucket-2,my-bucket-3" + ``` + +- **`ASSET_PROXY_URL_EXPIRY`** — Pre-signed URL expiry in seconds (default: `300`) ### Endpoints @@ -1157,38 +1159,54 @@ When asset proxying is enabled, two endpoints are available for accessing proxie - `GET /collections/{collectionId}/items/{itemId}/assets/{assetKey}` - Redirects (HTTP 302) to a pre-signed S3 URL for an item asset - `GET /collections/{collectionId}/assets/{assetKey}` - Redirects (HTTP 302) to a pre-signed S3 URL for a collection asset -These endpoints will return: -- `302` - Redirect to pre-signed S3 URL (success) -- `400` - Bad request (asset href is not a valid S3 URL) -- `403` - Forbidden (asset proxy disabled, or bucket not in allowed list) -- `404` - Not found (item/collection or asset does not exist) -- `500` - Server error - ### IAM Permissions -For the Asset Proxy to generate pre-signed URLs, the API Lambda must have `s3:GetObject` permission for the S3 buckets containing the assets. Add the following to the IAM role statements in your serverless.yml: +For the Asset Proxy feature to generate pre-signed URLs, the API and ingest Lambdas must be assigned permissions for the S3 buckets containing the assets. Add the following to the IAM role statements in your `serverless.yml` file, adjusting the resources as needed: + +For the `LIST` mode, you can specify the buckets listed in `ASSET_PROXY_BUCKET_LIST`: ```yaml - Effect: Allow - Action: s3:GetObject + Action: + - s3:GetObject Resource: - "arn:aws:s3:::my-bucket-1/*" - "arn:aws:s3:::my-bucket-2/*" +- Effect: Allow + Action: + - s3:HeadBucket + Resource: + - "arn:aws:s3:::my-bucket-1" + - "arn:aws:s3:::my-bucket-2" ``` -For the `ALL` or `ALL_BUCKETS_IN_ACCOUNT` options, you may use a wildcard: +For the `ALL` mode, use wildcards: ```yaml - Effect: Allow - Action: s3:GetObject + Action: + - s3:GetObject Resource: "arn:aws:s3:::*/*" +- Effect: Allow + Action: + - s3:HeadBucket + Resource: "arn:aws:s3:::*" ``` -When using `ALL_BUCKETS_IN_ACCOUNT`, the Lambda also needs permission to list buckets: +When using `ALL_BUCKETS_IN_ACCOUNT` mode, the Lambda also needs permission to list buckets: ```yaml - Effect: Allow - Action: s3:ListAllMyBuckets + Action: + - s3:GetObject + Resource: "arn:aws:s3:::*/*" +- Effect: Allow + Action: + - s3:HeadBucket + Resource: "arn:aws:s3:::*" +- Effect: Allow + Action: + - s3:ListAllMyBuckets Resource: "*" ``` @@ -1231,26 +1249,6 @@ The item or collection will also have the Alternate Assets Extension added to it ] ``` -### Supported S3 URL Formats - -The Asset Proxy recognizes and parses these S3 URL formats: - -- S3 URI: `s3://bucket-name/key` -- Virtual-hosted style: `https://bucket-name.s3.region.amazonaws.com/key` -- Virtual-hosted style (no region): `https://bucket-name.s3.amazonaws.com/key` -- Path style: `https://s3.region.amazonaws.com/bucket-name/key` -- Path style (legacy): `https://s3-region.amazonaws.com/bucket-name/key` - -### Region Determination - -The AWS region for generating pre-signed URLs is determined in this order: - -1. Region parsed from the S3 URL (for HTTPS URLs) -2. `storage:region` field on the asset (Storage Extension v1) -3. Region from `storage:schemes` referenced by `storage:refs` on the asset (Storage Extension v2) -4. `AWS_REGION` environment variable -5. Default: `us-west-2` - ## Collections and filter parameters for authorization One key concern in stac-server is how to restrict user's access to items. These diff --git a/serverless.example.yml b/serverless.example.yml index 6960c3f2..2b875ba6 100644 --- a/serverless.example.yml +++ b/serverless.example.yml @@ -34,7 +34,7 @@ provider: STAC_API_URL: "https://some-stac-server.example.com" CORS_ORIGIN: "https://ui.example.com" CORS_CREDENTIALS: true - # Asset Proxy Configuration + # Asset Proxy Environment Variables # ASSET_PROXY_BUCKET_OPTION: "NONE" # Options: NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST # ASSET_PROXY_BUCKET_LIST: "bucket1,bucket2,bucket3" # Required only when ASSET_PROXY_BUCKET_OPTION is LIST # ASSET_PROXY_URL_EXPIRY: 300 # Pre-signed URL expiry in seconds (default: 300) From 517a5d4a8008b5db47ca435f3838b13f82d94cc7 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Tue, 28 Oct 2025 11:58:35 -0500 Subject: [PATCH 16/22] docs: minor README update and logging improvement in asset-proxy.js --- README.md | 10 +++++++++- src/lib/asset-proxy.js | 4 +++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index ee6f89ac..2cdfce52 100644 --- a/README.md +++ b/README.md @@ -1130,7 +1130,15 @@ Available aggregations are: ## Asset Proxy -The Asset Proxy feature enables stac-server to proxy access to S3 assets through the STAC API by generating pre-signed URLs. When enabled, asset `href` values pointing to S3 are replaced with proxy endpoint URLs when an Item or Collection is served, while the original S3 URLs are preserved in the `alternate.s3.href` field using the [Alternate Assets Extension](https://github.com/stac-extensions/alternate-assets). Only objects with S3 URIs (`s3://` prefix) are proxied; other URL schemes are returned unchanged. +The Asset Proxy feature enables stac-server to proxy access to S3 assets through the STAC +API by generating requester-pays pre-signed URLs. Only assets with S3 URIs (`s3://` +prefix) are proxied; other URL schemes are ignored. When the Asset Proxy feature is +enabled, asset `href` values pointing to S3 are replaced with proxy endpoint URLs when an +Item or Collection is served, while the original S3 URLs are preserved in the +`alternate.s3.href` field using the [Alternate Assets +Extension](https://github.com/stac-extensions/alternate-assets). Subsequent GET requests +to the proxy endpoint URLs are redirected to pre-signed S3 object URLS for download (the +requester pays for S3 egress). ### Configuration diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index c352d485..970f9b86 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -1,4 +1,3 @@ -/* eslint-disable max-classes-per-file */ import { GetObjectCommand, ListBucketsCommand, @@ -274,6 +273,9 @@ export class AssetProxy { const region = await this.getBucket(bucket).then((b) => b.region) if (!region) { + // Should not get here if bucketOption is LIST or ALL_BUCKETS_IN_ACCOUNT + // If bucketOption is ALL, the bucket either does not exist or access is denied + logger.warn(`Bucket ${bucket} does not exist or access is denied`) return null } From 3e96a02ed064b3065057517bd0cf5995f244cb14 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Tue, 28 Oct 2025 15:43:11 -0500 Subject: [PATCH 17/22] review: remove commented code --- README.md | 2 +- src/lambdas/ingest/index.js | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 2cdfce52..992c0a37 100644 --- a/README.md +++ b/README.md @@ -619,7 +619,7 @@ There are some settings that should be reviewed and updated as needeed in the se | ENABLE_RESPONSE_COMPRESSION | Enables response compression. Set to 'false' to disable. | enabled | | ITEMS_MAX_LIMIT | The maximum limit for the number of items returned from the /search and /collections/{collection_id}/items endpoints. It is recommended that this be set to 100. There is an absolute max limit of 10000 for this. | 10000 | | ASSET_PROXY_BUCKET_OPTION | Control which S3 buckets are proxied through the API. Options: `NONE` (disabled), `ALL` (all S3 assets), `ALL_BUCKETS_IN_ACCOUNT` (all buckets in AWS account), `LIST` (specific buckets only). | NONE | -| ASSET_PROXY_BUCKET_LIST | Comma-separated list of S3 bucket names to proxy. Required when `ASSET_PROXY_BUCKET_OPTION` is `LIST`. | none | +| ASSET_PROXY_BUCKET_LIST | Comma-separated list of S3 bucket names to proxy. Required when `ASSET_PROXY_BUCKET_OPTION` is `LIST`. | | | ASSET_PROXY_URL_EXPIRY | Pre-signed URL expiry time in seconds for proxied assets. | 300 | Additionally, the credential for OpenSearch must be configured, as decribed in the diff --git a/src/lambdas/ingest/index.js b/src/lambdas/ingest/index.js index 6be33081..49a52918 100644 --- a/src/lambdas/ingest/index.js +++ b/src/lambdas/ingest/index.js @@ -83,7 +83,6 @@ export const handler = async (event, _context) => { if (postIngestTopicArn) { logger.debug('Publishing to post-ingest topic: %s', postIngestTopicArn) - // const assetProxy = await getAssetProxy() await publishResultsToSns(results, postIngestTopicArn, assetProxy) } else { logger.debug('Skipping post-ingest notification since no topic is configured') From 3dec688048dd67613b1d19f2ff46c0d8a0f2a297 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 29 Oct 2025 10:29:32 -0500 Subject: [PATCH 18/22] review: pull asset proxy bucket management into its own class --- src/lib/asset-proxy.js | 122 ++++++++++++++++++--------- tests/system/test-api-asset-proxy.js | 4 +- tests/unit/test-asset-proxy.js | 80 ++++++------------ 3 files changed, 107 insertions(+), 99 deletions(-) diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index 970f9b86..ab7a9a08 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -1,3 +1,4 @@ +/* eslint-disable max-classes-per-file */ import { GetObjectCommand, ListBucketsCommand, @@ -32,22 +33,26 @@ const parseS3Url = (url) => { return { bucket, key } } -export class AssetProxy { - constructor() { - this.bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' - this.bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] - this.urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) - this.isEnabled = this.bucketOption !== BucketOption.NONE +class AssetBuckets { + /** + * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) + * @param {string[]|null} bucketNames - Array of bucket names (required for LIST option) + */ + constructor(bucketOption, bucketNames) { + this.bucketOption = bucketOption + this.bucketNames = bucketNames this.buckets = {} } /** - * @returns {Promise} Initialized AssetProxy instance + * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) + * @param {string[]|null} bucketNames - Array of bucket names (required for LIST option) + * @returns {Promise} Initialized AssetBuckets instance */ - static async create() { - const dbInstance = new AssetProxy() - await dbInstance._initBuckets() - return dbInstance + static async create(bucketOption, bucketNames) { + const instance = new AssetBuckets(bucketOption, bucketNames) + await instance._initBuckets() + return instance } /** @@ -55,16 +60,14 @@ export class AssetProxy { */ async _initBuckets() { switch (this.bucketOption) { - case BucketOption.LIST: - if (this.bucketList) { - const bucketNames = this.bucketList.split(',').map((b) => b.trim()).filter((b) => b) + case BucketOption.LIST: { + if (this.bucketNames && this.bucketNames.length > 0) { await Promise.all( - bucketNames.map(async (name) => { await this.getBucket(name) }) + this.bucketNames.map(async (name) => { await this.getBucket(name) }) ) - const invalidBuckets = Object.values(this.buckets) - .filter((b) => b.region === null) - .map((b) => b.name) + const invalidBuckets = Object.keys(this.buckets) + .filter((bucketName) => this.buckets[bucketName].region === null) if (invalidBuckets.length > 0) { throw new Error( `Could not access or determine region for the following buckets: ${ @@ -78,33 +81,30 @@ export class AssetProxy { ) } else { throw new Error( - 'ASSET_PROXY_BUCKET_LIST must be set when ASSET_PROXY_BUCKET_OPTION is LIST' + 'ASSET_PROXY_BUCKET_LIST must not be empty when ASSET_PROXY_BUCKET_OPTION is LIST' ) } break + } - case BucketOption.ALL_BUCKETS_IN_ACCOUNT: - try { - const command = new ListBucketsCommand({}) - const response = await s3Client.send(command) - const buckets = response.Buckets || [] + case BucketOption.ALL_BUCKETS_IN_ACCOUNT: { + const command = new ListBucketsCommand({}) + const response = await s3Client.send(command) + const buckets = response.Buckets || [] - await Promise.all( - buckets - .map((bucket) => bucket.Name) - .filter((name) => typeof name === 'string') - .map(async (name) => { await this.getBucket(name) }) - ) + await Promise.all( + buckets + .map((bucket) => bucket.Name) + .filter((name) => typeof name === 'string') + .map(async (name) => { await this.getBucket(name) }) + ) - const count = Object.keys(this.buckets).length - logger.info( - `Fetched ${count} buckets from AWS account for asset proxy` - ) - } catch (error) { - const message = error instanceof Error ? error.message : String(error) - throw new Error(`Failed to fetch buckets for asset proxy: ${message}`) - } + const count = Object.keys(this.buckets).length + logger.info( + `Fetched ${count} buckets from AWS account for asset proxy` + ) break + } default: break @@ -120,10 +120,12 @@ export class AssetProxy { const command = new HeadBucketCommand({ Bucket: bucketName }) const response = await s3Client.send(command) const statusCode = response.$metadata.httpStatusCode + let name = null let region = null switch (statusCode) { case 200: + name = bucketName region = response.BucketRegion === 'EU' ? 'eu-west-1' : response.BucketRegion || 'us-east-1' @@ -141,7 +143,7 @@ export class AssetProxy { logger.warn(`Unexpected status code ${statusCode} for bucket ${bucketName}`) } - this.buckets[bucketName] = { name: bucketName, region } + this.buckets[bucketName] = { name, region } } return this.buckets[bucketName] } @@ -157,6 +159,42 @@ export class AssetProxy { } return false } +} + +export class AssetProxy { + /** + * @param {AssetBuckets} buckets - AssetBuckets instance + * @param {number} urlExpiry - Pre-signed URL expiry time in seconds + * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) + */ + constructor(buckets, urlExpiry, bucketOption) { + this.buckets = buckets + this.urlExpiry = urlExpiry + this.isEnabled = bucketOption !== BucketOption.NONE + } + + /** + * @returns {Promise} Initialized AssetProxy instance + */ + static async create() { + const bucketOption = process.env['ASSET_PROXY_BUCKET_OPTION'] || 'NONE' + const urlExpiry = parseInt(process.env['ASSET_PROXY_URL_EXPIRY'] || '300', 10) + const bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] + + let bucketNames = null + if (bucketOption === BucketOption.LIST) { + if (!bucketList) { + throw new Error( + 'ASSET_PROXY_BUCKET_LIST must be set when ASSET_PROXY_BUCKET_OPTION is LIST' + ) + } + bucketNames = bucketList.split(',').map((b) => b.trim()).filter((b) => b) + } + + const buckets = await AssetBuckets.create(bucketOption, bucketNames) + + return new AssetProxy(buckets, urlExpiry, bucketOption) + } /** * @param {Object} assets - Assets object @@ -185,7 +223,7 @@ export class AssetProxy { continue } - if (!this.shouldProxyBucket(bucket)) { + if (!this.buckets.shouldProxyBucket(bucket)) { proxiedAssets[assetKey] = asset logger.warn(`Asset ${assetKey} bucket ${bucket} is not configured for proxying`) // eslint-disable-next-line no-continue @@ -267,11 +305,11 @@ export class AssetProxy { } const { bucket, key } = parseS3Url(asset.href) - if (!bucket || !key || !this.shouldProxyBucket(bucket)) { + if (!bucket || !key || !this.buckets.shouldProxyBucket(bucket)) { return null } - const region = await this.getBucket(bucket).then((b) => b.region) + const region = await this.buckets.getBucket(bucket).then((b) => b.region) if (!region) { // Should not get here if bucketOption is LIST or ALL_BUCKETS_IN_ACCOUNT // If bucketOption is ALL, the bucket either does not exist or access is denied diff --git a/tests/system/test-api-asset-proxy.js b/tests/system/test-api-asset-proxy.js index 0c9ee7cf..be255edd 100644 --- a/tests/system/test-api-asset-proxy.js +++ b/tests/system/test-api-asset-proxy.js @@ -65,8 +65,8 @@ test('AssetProxy initialized with ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', t.truthy(assetProxy.buckets) t.true(assetProxy.isEnabled) - t.true(assetProxy.shouldProxyBucket('landsat-pds')) - t.true(!assetProxy.shouldProxyBucket('some-other-bucket')) + t.true(assetProxy.buckets.shouldProxyBucket('landsat-pds')) + t.true(!assetProxy.buckets.shouldProxyBucket('some-other-bucket')) }) test('GET /collections/:collectionId/items/:itemId/assets/:assetKey - 302 redirect to presigned URL', async (t) => { diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js index 37e75201..8f34243b 100644 --- a/tests/unit/test-asset-proxy.js +++ b/tests/unit/test-asset-proxy.js @@ -18,31 +18,17 @@ test('BucketOption - exports expected constants', (t) => { t.is(BucketOption.LIST, 'LIST') }) -test('AssetProxy - constructor initializes with expected defaults', async (t) => { +test.only('AssetProxy - constructor initializes with expected defaults', async (t) => { const before = { ...process.env } try { delete process.env['ASSET_PROXY_BUCKET_OPTION'] const proxy = await AssetProxy.create() - t.is(proxy.bucketOption, 'NONE') t.is(proxy.urlExpiry, 300) t.is(proxy.isEnabled, false) - } finally { - process.env = before - } -}) - -test('AssetProxy - constructor reads env vars correctly', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - process.env['ASSET_PROXY_URL_EXPIRY'] = '600' - process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1,bucket2' - - const proxy = await AssetProxy.create() - t.is(proxy.bucketOption, 'ALL') - t.is(proxy.urlExpiry, 600) - t.is(proxy.bucketList, 'bucket1,bucket2') + t.is(proxy.buckets.bucketOption, 'NONE') + t.is(proxy.buckets.bucketNames, null) + t.deepEqual(proxy.buckets.buckets, {}) } finally { process.env = before } @@ -62,10 +48,10 @@ test('AssetProxy - LIST mode parses bucket list', async (t) => { const proxy = await AssetProxy.create() t.truthy(proxy.buckets) - t.truthy(proxy.buckets['bucket1']) - t.truthy(proxy.buckets['bucket2']) - t.truthy(proxy.buckets['bucket3']) - t.is(Object.keys(proxy.buckets).length, 3) + t.truthy(proxy.buckets.buckets['bucket1']) + t.truthy(proxy.buckets.buckets['bucket2']) + t.truthy(proxy.buckets.buckets['bucket3']) + t.is(Object.keys(proxy.buckets.buckets).length, 3) } finally { process.env = before } @@ -107,26 +93,10 @@ test('AssetProxy - ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', async (t) => { const proxy = await AssetProxy.create() t.truthy(proxy.buckets) - t.truthy(proxy.buckets['bucket-1']) - t.truthy(proxy.buckets['bucket-2']) - t.is(proxy.buckets['some-other-bucket'], undefined) - t.is(Object.keys(proxy.buckets).length, 2) - } finally { - process.env = before - } -}) - -test('AssetProxy - ALL_BUCKETS_IN_ACCOUNT mode throws on error', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - - s3Mock.on(ListBucketsCommand).rejects(new Error('Access denied')) - - await t.throwsAsync( - async () => AssetProxy.create(), - { message: /Failed to fetch buckets for asset proxy: Access denied/ } - ) + t.truthy(proxy.buckets.buckets['bucket-1']) + t.truthy(proxy.buckets.buckets['bucket-2']) + t.is(proxy.buckets.buckets['some-other-bucket'], undefined) + t.is(Object.keys(proxy.buckets.buckets).length, 2) } finally { process.env = before } @@ -196,32 +166,32 @@ test('AssetProxy - isEnabled returns true for ALL_BUCKETS_IN_ACCOUNT', async (t) } }) -test('AssetProxy - shouldProxyBucket() with NONE mode returns false', async (t) => { +test('AssetProxy - bucket filtering with NONE mode returns false', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' const proxy = await AssetProxy.create() - t.false(proxy.shouldProxyBucket('any-bucket')) + t.false(proxy.buckets.shouldProxyBucket('any-bucket')) } finally { process.env = before } }) -test('AssetProxy - shouldProxyBucket() with ALL mode returns true', async (t) => { +test('AssetProxy - bucket filtering with ALL mode returns true', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' const proxy = await AssetProxy.create() - t.true(proxy.shouldProxyBucket('any-bucket')) - t.true(proxy.shouldProxyBucket('another-bucket')) + t.true(proxy.buckets.shouldProxyBucket('any-bucket')) + t.true(proxy.buckets.shouldProxyBucket('another-bucket')) } finally { process.env = before } }) -test('AssetProxy - shouldProxyBucket() with LIST mode only proxies buckets in list', async (t) => { +test('AssetProxy - bucket filtering with LIST mode only proxies buckets in list', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' @@ -234,15 +204,15 @@ test('AssetProxy - shouldProxyBucket() with LIST mode only proxies buckets in li const proxy = await AssetProxy.create() - t.true(proxy.shouldProxyBucket('allowed-bucket')) - t.true(proxy.shouldProxyBucket('another-allowed')) - t.false(proxy.shouldProxyBucket('not-in-list')) + t.true(proxy.buckets.shouldProxyBucket('allowed-bucket')) + t.true(proxy.buckets.shouldProxyBucket('another-allowed')) + t.false(proxy.buckets.shouldProxyBucket('not-in-list')) } finally { process.env = before } }) -test('AssetProxy - shouldProxyBucket() with ALL_BUCKETS_IN_ACCOUNT mode only proxies fetched buckets', async (t) => { +test('AssetProxy - bucket filtering with ALL_BUCKETS_IN_ACCOUNT mode only proxies fetched buckets', async (t) => { const before = { ...process.env } try { process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' @@ -261,9 +231,9 @@ test('AssetProxy - shouldProxyBucket() with ALL_BUCKETS_IN_ACCOUNT mode only pro const proxy = await AssetProxy.create() - t.true(proxy.shouldProxyBucket('fetched-bucket-1')) - t.true(proxy.shouldProxyBucket('fetched-bucket-2')) - t.false(proxy.shouldProxyBucket('not-fetched-bucket')) + t.true(proxy.buckets.shouldProxyBucket('fetched-bucket-1')) + t.true(proxy.buckets.shouldProxyBucket('fetched-bucket-2')) + t.false(proxy.buckets.shouldProxyBucket('not-fetched-bucket')) } finally { process.env = before } From 1126949c82db57300f1bb38929877c69051d9f57 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 29 Oct 2025 11:39:58 -0500 Subject: [PATCH 19/22] chore: update CHANGELOG --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3a5e0f44..25bd7053 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,9 +17,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Changed -- When asset proxying is enabled, S3 asset hrefs are replaced with proxy endpoint URLs, - and original S3 URLs are preserved in `alternate.s3.href` using the Alternate Assets - Extension. +- When asset proxying is enabled, when a STAC Item or Collection is served, asset S3 hrefs + are replaced with proxy endpoint URLs and the original S3 URLs are preserved in + `alternate.s3.href` using the Alternate Assets Extension. ## [4.4.0] - 2025-09-10 From cad05dc212ebaa2d29cc52293eb3eff8017306a6 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Wed, 29 Oct 2025 15:58:44 -0500 Subject: [PATCH 20/22] refactor: extract AssetBuckets class from asset-proxy.js --- src/lib/asset-buckets.js | 143 ++++++++++++++++++++++ src/lib/asset-proxy.js | 145 +--------------------- tests/unit/test-asset-buckets.js | 198 +++++++++++++++++++++++++++++++ tests/unit/test-asset-proxy.js | 197 +----------------------------- 4 files changed, 348 insertions(+), 335 deletions(-) create mode 100644 src/lib/asset-buckets.js create mode 100644 tests/unit/test-asset-buckets.js diff --git a/src/lib/asset-buckets.js b/src/lib/asset-buckets.js new file mode 100644 index 00000000..7d3a98dc --- /dev/null +++ b/src/lib/asset-buckets.js @@ -0,0 +1,143 @@ +import { + ListBucketsCommand, + HeadBucketCommand +} from '@aws-sdk/client-s3' +import { s3 } from './aws-clients.js' +import logger from './logger.js' + +const s3Client = s3() + +export const BucketOptionEnum = Object.freeze({ + NONE: 'NONE', + ALL: 'ALL', + ALL_BUCKETS_IN_ACCOUNT: 'ALL_BUCKETS_IN_ACCOUNT', + LIST: 'LIST' +}) + +export class AssetBuckets { + /** + * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) + * @param {string[]|null} bucketNames - Array of bucket names (required for LIST option) + */ + constructor(bucketOption, bucketNames) { + this.bucketOption = bucketOption + this.bucketNames = bucketNames + this.bucketCache = {} + } + + /** + * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) + * @param {string[]|null} bucketNames - Array of bucket names (required for LIST option) + * @returns {Promise} Initialized AssetBuckets instance + */ + static async create(bucketOption, bucketNames) { + const instance = new AssetBuckets(bucketOption, bucketNames) + await instance._initBuckets() + return instance + } + + /** + * @returns {Promise} + */ + async _initBuckets() { + switch (this.bucketOption) { + case BucketOptionEnum.LIST: { + if (this.bucketNames && this.bucketNames.length > 0) { + await Promise.all( + this.bucketNames.map(async (name) => { await this.getBucket(name) }) + ) + + const invalidBuckets = Object.keys(this.bucketCache) + .filter((bucketName) => this.bucketCache[bucketName].region === null) + if (invalidBuckets.length > 0) { + throw new Error( + `Could not access or determine region for the following buckets: ${ + invalidBuckets.join(', ')}` + ) + } + + const count = Object.keys(this.bucketCache).length + logger.info( + `Parsed ${count} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` + ) + } else { + throw new Error( + 'ASSET_PROXY_BUCKET_LIST must not be empty when ASSET_PROXY_BUCKET_OPTION is LIST' + ) + } + break + } + + case BucketOptionEnum.ALL_BUCKETS_IN_ACCOUNT: { + const command = new ListBucketsCommand({}) + const response = await s3Client.send(command) + const buckets = response.Buckets || [] + + await Promise.all( + buckets + .map((bucket) => bucket.Name) + .filter((name) => typeof name === 'string') + .map(async (name) => { await this.getBucket(name) }) + ) + + const count = Object.keys(this.bucketCache).length + logger.info( + `Fetched ${count} buckets from AWS account for asset proxy` + ) + break + } + + default: + break + } + } + + /** + * @param {string} bucketName - S3 bucket name + * @returns {Promise} Bucket info {name, region} + */ + async getBucket(bucketName) { + if (!(bucketName in this.bucketCache)) { + const command = new HeadBucketCommand({ Bucket: bucketName }) + const response = await s3Client.send(command) + const statusCode = response.$metadata.httpStatusCode + let name = null + let region = null + + switch (statusCode) { + case 200: + name = bucketName + region = response.BucketRegion === 'EU' + ? 'eu-west-1' + : response.BucketRegion || 'us-east-1' + break + case 403: + logger.warn(`Access denied to bucket ${bucketName}`) + break + case 404: + logger.warn(`Bucket ${bucketName} does not exist`) + break + case 400: + logger.warn(`Bad request for bucket ${bucketName}`) + break + default: + logger.warn(`Unexpected status code ${statusCode} for bucket ${bucketName}`) + } + + this.bucketCache[bucketName] = { name, region } + } + return this.bucketCache[bucketName] + } + + /** + * @param {string} bucketName - S3 bucket name + * @returns {boolean} True if bucket should be proxied, False otherwise + */ + shouldProxyBucket(bucketName) { + if (this.bucketOption === BucketOptionEnum.ALL + || bucketName in this.bucketCache) { + return true + } + return false + } +} diff --git a/src/lib/asset-proxy.js b/src/lib/asset-proxy.js index ab7a9a08..73af9dc8 100644 --- a/src/lib/asset-proxy.js +++ b/src/lib/asset-proxy.js @@ -1,12 +1,10 @@ -/* eslint-disable max-classes-per-file */ import { GetObjectCommand, - ListBucketsCommand, - HeadBucketCommand } from '@aws-sdk/client-s3' import { getSignedUrl } from '@aws-sdk/s3-request-presigner' import { s3 } from './aws-clients.js' import logger from './logger.js' +import { AssetBuckets, BucketOptionEnum } from './asset-buckets.js' const s3Client = s3() @@ -14,18 +12,11 @@ const S3_URL_REGEX = /^s3:\/\/([^/]+)\/(.+)$/ export const ALTERNATE_ASSETS_EXTENSION = 'https://stac-extensions.github.io/alternate-assets/v1.2.0/schema.json' -export const BucketOption = Object.freeze({ - NONE: 'NONE', - ALL: 'ALL', - ALL_BUCKETS_IN_ACCOUNT: 'ALL_BUCKETS_IN_ACCOUNT', - LIST: 'LIST' -}) - /** * @param {string} url - S3 URL to parse * @returns {Object} {bucket, key} or {bucket: null, key: null} if not a valid S3 URL */ -const parseS3Url = (url) => { +export const parseS3Url = (url) => { const match = S3_URL_REGEX.exec(url) if (!match) return { bucket: null, key: null } @@ -33,134 +24,6 @@ const parseS3Url = (url) => { return { bucket, key } } -class AssetBuckets { - /** - * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) - * @param {string[]|null} bucketNames - Array of bucket names (required for LIST option) - */ - constructor(bucketOption, bucketNames) { - this.bucketOption = bucketOption - this.bucketNames = bucketNames - this.buckets = {} - } - - /** - * @param {string} bucketOption - Bucket option (NONE, ALL, ALL_BUCKETS_IN_ACCOUNT, LIST) - * @param {string[]|null} bucketNames - Array of bucket names (required for LIST option) - * @returns {Promise} Initialized AssetBuckets instance - */ - static async create(bucketOption, bucketNames) { - const instance = new AssetBuckets(bucketOption, bucketNames) - await instance._initBuckets() - return instance - } - - /** - * @returns {Promise} - */ - async _initBuckets() { - switch (this.bucketOption) { - case BucketOption.LIST: { - if (this.bucketNames && this.bucketNames.length > 0) { - await Promise.all( - this.bucketNames.map(async (name) => { await this.getBucket(name) }) - ) - - const invalidBuckets = Object.keys(this.buckets) - .filter((bucketName) => this.buckets[bucketName].region === null) - if (invalidBuckets.length > 0) { - throw new Error( - `Could not access or determine region for the following buckets: ${ - invalidBuckets.join(', ')}` - ) - } - - const count = Object.keys(this.buckets).length - logger.info( - `Parsed ${count} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` - ) - } else { - throw new Error( - 'ASSET_PROXY_BUCKET_LIST must not be empty when ASSET_PROXY_BUCKET_OPTION is LIST' - ) - } - break - } - - case BucketOption.ALL_BUCKETS_IN_ACCOUNT: { - const command = new ListBucketsCommand({}) - const response = await s3Client.send(command) - const buckets = response.Buckets || [] - - await Promise.all( - buckets - .map((bucket) => bucket.Name) - .filter((name) => typeof name === 'string') - .map(async (name) => { await this.getBucket(name) }) - ) - - const count = Object.keys(this.buckets).length - logger.info( - `Fetched ${count} buckets from AWS account for asset proxy` - ) - break - } - - default: - break - } - } - - /** - * @param {string} bucketName - S3 bucket name - * @returns {Promise} Bucket info {name, region} - */ - async getBucket(bucketName) { - if (!(bucketName in this.buckets)) { - const command = new HeadBucketCommand({ Bucket: bucketName }) - const response = await s3Client.send(command) - const statusCode = response.$metadata.httpStatusCode - let name = null - let region = null - - switch (statusCode) { - case 200: - name = bucketName - region = response.BucketRegion === 'EU' - ? 'eu-west-1' - : response.BucketRegion || 'us-east-1' - break - case 403: - logger.warn(`Access denied to bucket ${bucketName}`) - break - case 404: - logger.warn(`Bucket ${bucketName} does not exist`) - break - case 400: - logger.warn(`Bad request for bucket ${bucketName}`) - break - default: - logger.warn(`Unexpected status code ${statusCode} for bucket ${bucketName}`) - } - - this.buckets[bucketName] = { name, region } - } - return this.buckets[bucketName] - } - - /** - * @param {string} bucketName - S3 bucket name - * @returns {boolean} True if bucket should be proxied, False otherwise - */ - shouldProxyBucket(bucketName) { - if (this.bucketOption === BucketOption.ALL - || bucketName in this.buckets) { - return true - } - return false - } -} - export class AssetProxy { /** * @param {AssetBuckets} buckets - AssetBuckets instance @@ -170,7 +33,7 @@ export class AssetProxy { constructor(buckets, urlExpiry, bucketOption) { this.buckets = buckets this.urlExpiry = urlExpiry - this.isEnabled = bucketOption !== BucketOption.NONE + this.isEnabled = bucketOption !== BucketOptionEnum.NONE } /** @@ -182,7 +45,7 @@ export class AssetProxy { const bucketList = process.env['ASSET_PROXY_BUCKET_LIST'] let bucketNames = null - if (bucketOption === BucketOption.LIST) { + if (bucketOption === BucketOptionEnum.LIST) { if (!bucketList) { throw new Error( 'ASSET_PROXY_BUCKET_LIST must be set when ASSET_PROXY_BUCKET_OPTION is LIST' diff --git a/tests/unit/test-asset-buckets.js b/tests/unit/test-asset-buckets.js new file mode 100644 index 00000000..26ada467 --- /dev/null +++ b/tests/unit/test-asset-buckets.js @@ -0,0 +1,198 @@ +// @ts-nocheck + +import test from 'ava' +import { mockClient } from 'aws-sdk-client-mock' +import { S3Client, ListBucketsCommand, HeadBucketCommand } from '@aws-sdk/client-s3' +import { AssetBuckets, BucketOptionEnum } from '../../src/lib/asset-buckets.js' + +const s3Mock = mockClient(S3Client) + +test.beforeEach(() => { + s3Mock.reset() +}) + +test('BucketOptionEnum - exports expected constants', (t) => { + t.is(BucketOptionEnum.NONE, 'NONE') + t.is(BucketOptionEnum.ALL, 'ALL') + t.is(BucketOptionEnum.ALL_BUCKETS_IN_ACCOUNT, 'ALL_BUCKETS_IN_ACCOUNT') + t.is(BucketOptionEnum.LIST, 'LIST') +}) + +test('AssetBuckets - LIST mode parses bucket list', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const buckets = await AssetBuckets.create( + BucketOptionEnum.LIST, + ['bucket1', 'bucket2', 'bucket3'] + ) + + t.truthy(buckets.bucketCache['bucket1']) + t.truthy(buckets.bucketCache['bucket2']) + t.truthy(buckets.bucketCache['bucket3']) + t.is(Object.keys(buckets.bucketCache).length, 3) +}) + +test('AssetBuckets - LIST mode throws if bucket list is empty', async (t) => { + await t.throwsAsync( + async () => AssetBuckets.create(BucketOptionEnum.LIST, []), + { message: /ASSET_PROXY_BUCKET_LIST must not be empty/ } + ) +}) + +test('AssetBuckets - LIST mode throws if bucket list is null', async (t) => { + await t.throwsAsync( + async () => AssetBuckets.create(BucketOptionEnum.LIST, null), + { message: /ASSET_PROXY_BUCKET_LIST must not be empty/ } + ) +}) + +test('AssetBuckets - LIST mode throws if bucket is inaccessible', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 403 } + }) + + await t.throwsAsync( + async () => AssetBuckets.create(BucketOptionEnum.LIST, ['bucket1']), + { message: /Could not access or determine region/ } + ) +}) + +test('AssetBuckets - ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', async (t) => { + s3Mock.on(ListBucketsCommand).resolves({ + Buckets: [ + { Name: 'bucket-1' }, + { Name: 'bucket-2' }, + ] + }) + + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL_BUCKETS_IN_ACCOUNT, null) + + t.truthy(buckets.bucketCache['bucket-1']) + t.truthy(buckets.bucketCache['bucket-2']) + t.is(buckets.bucketCache['some-other-bucket'], undefined) + t.is(Object.keys(buckets.bucketCache).length, 2) +}) + +test('AssetBuckets - shouldProxyBucket returns false for NONE mode', async (t) => { + const buckets = await AssetBuckets.create(BucketOptionEnum.NONE, null) + t.false(buckets.shouldProxyBucket('any-bucket')) +}) + +test('AssetBuckets - shouldProxyBucket returns true for ALL mode', async (t) => { + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL, null) + t.true(buckets.shouldProxyBucket('any-bucket')) + t.true(buckets.shouldProxyBucket('another-bucket')) +}) + +test('AssetBuckets - shouldProxyBucket with LIST mode only proxies buckets in list', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const buckets = await AssetBuckets.create( + BucketOptionEnum.LIST, + ['allowed-bucket', 'another-allowed'] + ) + + t.true(buckets.shouldProxyBucket('allowed-bucket')) + t.true(buckets.shouldProxyBucket('another-allowed')) + t.false(buckets.shouldProxyBucket('not-in-list')) +}) + +test('AssetBuckets - shouldProxyBucket with ALL_BUCKETS_IN_ACCOUNT mode only proxies fetched buckets', async (t) => { + s3Mock.on(ListBucketsCommand).resolves({ + Buckets: [ + { Name: 'fetched-bucket-1' }, + { Name: 'fetched-bucket-2' } + ] + }) + + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL_BUCKETS_IN_ACCOUNT, null) + + t.true(buckets.shouldProxyBucket('fetched-bucket-1')) + t.true(buckets.shouldProxyBucket('fetched-bucket-2')) + t.false(buckets.shouldProxyBucket('not-fetched-bucket')) +}) + +// Using serial to prevent HeadBucketCommand mock interference between tests +test.serial('AssetBuckets - getBucket handles 403 access denied', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 403 } + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL, null) + const bucket = await buckets.getBucket('denied-bucket') + + t.is(bucket.name, null) + t.is(bucket.region, null) +}) + +// Using serial to prevent HeadBucketCommand mock interference between tests +test.serial('AssetBuckets - getBucket handles 404 not found', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 404 } + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL, null) + const bucket = await buckets.getBucket('missing-bucket') + + t.is(bucket.name, null) + t.is(bucket.region, null) +}) + +// Using serial to prevent HeadBucketCommand mock interference between tests +test.serial('AssetBuckets - getBucket caches bucket info', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'us-west-2' + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL, null) + + const bucket1 = await buckets.getBucket('test-bucket') + const bucket2 = await buckets.getBucket('test-bucket') + + t.is(bucket1, bucket2) + t.is(s3Mock.commandCalls(HeadBucketCommand).length, 1) +}) + +// Using serial to prevent HeadBucketCommand mock interference between tests +test.serial('AssetBuckets - getBucket handles EU region', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 }, + BucketRegion: 'EU' + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL, null) + const bucket = await buckets.getBucket('eu-bucket') + + t.is(bucket.name, 'eu-bucket') + t.is(bucket.region, 'eu-west-1') +}) + +// Using serial to prevent HeadBucketCommand mock interference between tests +test.serial('AssetBuckets - getBucket defaults to us-east-1 when region is missing', async (t) => { + s3Mock.on(HeadBucketCommand).resolves({ + $metadata: { httpStatusCode: 200 } + }) + + const buckets = await AssetBuckets.create(BucketOptionEnum.ALL, null) + const bucket = await buckets.getBucket('default-region-bucket') + + t.is(bucket.name, 'default-region-bucket') + t.is(bucket.region, 'us-east-1') +}) diff --git a/tests/unit/test-asset-proxy.js b/tests/unit/test-asset-proxy.js index 8f34243b..d6bb5f5f 100644 --- a/tests/unit/test-asset-proxy.js +++ b/tests/unit/test-asset-proxy.js @@ -2,8 +2,8 @@ import test from 'ava' import { mockClient } from 'aws-sdk-client-mock' -import { S3Client, ListBucketsCommand, HeadBucketCommand } from '@aws-sdk/client-s3' -import { AssetProxy, BucketOption, ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' +import { S3Client } from '@aws-sdk/client-s3' +import { AssetProxy, ALTERNATE_ASSETS_EXTENSION } from '../../src/lib/asset-proxy.js' const s3Mock = mockClient(S3Client) @@ -11,14 +11,7 @@ test.beforeEach(() => { s3Mock.reset() }) -test('BucketOption - exports expected constants', (t) => { - t.is(BucketOption.NONE, 'NONE') - t.is(BucketOption.ALL, 'ALL') - t.is(BucketOption.ALL_BUCKETS_IN_ACCOUNT, 'ALL_BUCKETS_IN_ACCOUNT') - t.is(BucketOption.LIST, 'LIST') -}) - -test.only('AssetProxy - constructor initializes with expected defaults', async (t) => { +test('AssetProxy - constructor initializes with expected defaults', async (t) => { const before = { ...process.env } try { delete process.env['ASSET_PROXY_BUCKET_OPTION'] @@ -26,77 +19,6 @@ test.only('AssetProxy - constructor initializes with expected defaults', async ( const proxy = await AssetProxy.create() t.is(proxy.urlExpiry, 300) t.is(proxy.isEnabled, false) - t.is(proxy.buckets.bucketOption, 'NONE') - t.is(proxy.buckets.bucketNames, null) - t.deepEqual(proxy.buckets.buckets, {}) - } finally { - process.env = before - } -}) - -test('AssetProxy - LIST mode parses bucket list', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' - process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1, bucket2 , bucket3' - - s3Mock.on(HeadBucketCommand).resolves({ - $metadata: { httpStatusCode: 200 }, - BucketRegion: 'us-west-2' - }) - - const proxy = await AssetProxy.create() - - t.truthy(proxy.buckets) - t.truthy(proxy.buckets.buckets['bucket1']) - t.truthy(proxy.buckets.buckets['bucket2']) - t.truthy(proxy.buckets.buckets['bucket3']) - t.is(Object.keys(proxy.buckets.buckets).length, 3) - } finally { - process.env = before - } -}) - -test('AssetProxy - LIST mode throws if no bucket list', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' - delete process.env['ASSET_PROXY_BUCKET_LIST'] - - await t.throwsAsync( - async () => AssetProxy.create(), - { message: /ASSET_PROXY_BUCKET_LIST must be set/ } - ) - } finally { - process.env = before - } -}) - -test('AssetProxy - ALL_BUCKETS_IN_ACCOUNT mode fetches buckets', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - process.env['AWS_REGION'] = 'us-west-2' - - s3Mock.on(ListBucketsCommand).resolves({ - Buckets: [ - { Name: 'bucket-1' }, - { Name: 'bucket-2' }, - ] - }) - - s3Mock.on(HeadBucketCommand).resolves({ - $metadata: { httpStatusCode: 200 }, - BucketRegion: 'us-west-2' - }) - - const proxy = await AssetProxy.create() - - t.truthy(proxy.buckets) - t.truthy(proxy.buckets.buckets['bucket-1']) - t.truthy(proxy.buckets.buckets['bucket-2']) - t.is(proxy.buckets.buckets['some-other-bucket'], undefined) - t.is(Object.keys(proxy.buckets.buckets).length, 2) } finally { process.env = before } @@ -126,119 +48,6 @@ test('AssetProxy - isEnabled returns true for ALL', async (t) => { } }) -test('AssetProxy - isEnabled returns true for LIST', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' - process.env['ASSET_PROXY_BUCKET_LIST'] = 'bucket1' - - s3Mock.on(HeadBucketCommand).resolves({ - $metadata: { httpStatusCode: 200 }, - BucketRegion: 'us-west-2' - }) - - const proxy = await AssetProxy.create() - t.true(proxy.isEnabled) - } finally { - process.env = before - } -}) - -test('AssetProxy - isEnabled returns true for ALL_BUCKETS_IN_ACCOUNT', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - - s3Mock.on(ListBucketsCommand).resolves({ - Buckets: [{ Name: 'bucket-1' }] - }) - - s3Mock.on(HeadBucketCommand).resolves({ - $metadata: { httpStatusCode: 200 }, - BucketRegion: 'us-west-2' - }) - - const proxy = await AssetProxy.create() - - t.true(proxy.isEnabled) - } finally { - process.env = before - } -}) - -test('AssetProxy - bucket filtering with NONE mode returns false', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'NONE' - - const proxy = await AssetProxy.create() - t.false(proxy.buckets.shouldProxyBucket('any-bucket')) - } finally { - process.env = before - } -}) - -test('AssetProxy - bucket filtering with ALL mode returns true', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL' - - const proxy = await AssetProxy.create() - t.true(proxy.buckets.shouldProxyBucket('any-bucket')) - t.true(proxy.buckets.shouldProxyBucket('another-bucket')) - } finally { - process.env = before - } -}) - -test('AssetProxy - bucket filtering with LIST mode only proxies buckets in list', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'LIST' - process.env['ASSET_PROXY_BUCKET_LIST'] = 'allowed-bucket,another-allowed' - - s3Mock.on(HeadBucketCommand).resolves({ - $metadata: { httpStatusCode: 200 }, - BucketRegion: 'us-west-2' - }) - - const proxy = await AssetProxy.create() - - t.true(proxy.buckets.shouldProxyBucket('allowed-bucket')) - t.true(proxy.buckets.shouldProxyBucket('another-allowed')) - t.false(proxy.buckets.shouldProxyBucket('not-in-list')) - } finally { - process.env = before - } -}) - -test('AssetProxy - bucket filtering with ALL_BUCKETS_IN_ACCOUNT mode only proxies fetched buckets', async (t) => { - const before = { ...process.env } - try { - process.env['ASSET_PROXY_BUCKET_OPTION'] = 'ALL_BUCKETS_IN_ACCOUNT' - - s3Mock.on(ListBucketsCommand).resolves({ - Buckets: [ - { Name: 'fetched-bucket-1' }, - { Name: 'fetched-bucket-2' } - ] - }) - - s3Mock.on(HeadBucketCommand).resolves({ - $metadata: { httpStatusCode: 200 }, - BucketRegion: 'us-west-2' - }) - - const proxy = await AssetProxy.create() - - t.true(proxy.buckets.shouldProxyBucket('fetched-bucket-1')) - t.true(proxy.buckets.shouldProxyBucket('fetched-bucket-2')) - t.false(proxy.buckets.shouldProxyBucket('not-fetched-bucket')) - } finally { - process.env = before - } -}) - test('AssetProxy - getProxiedAssets() transforms item assets in ALL mode', async (t) => { const before = { ...process.env } try { From 6863badb8a6876ac84e2fd56938587eda37299c1 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Thu, 30 Oct 2025 08:44:21 -0500 Subject: [PATCH 21/22] review: correct requester pay information in README --- README.md | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 992c0a37..ca93d8a8 100644 --- a/README.md +++ b/README.md @@ -1131,14 +1131,17 @@ Available aggregations are: ## Asset Proxy The Asset Proxy feature enables stac-server to proxy access to S3 assets through the STAC -API by generating requester-pays pre-signed URLs. Only assets with S3 URIs (`s3://` -prefix) are proxied; other URL schemes are ignored. When the Asset Proxy feature is -enabled, asset `href` values pointing to S3 are replaced with proxy endpoint URLs when an -Item or Collection is served, while the original S3 URLs are preserved in the -`alternate.s3.href` field using the [Alternate Assets -Extension](https://github.com/stac-extensions/alternate-assets). Subsequent GET requests -to the proxy endpoint URLs are redirected to pre-signed S3 object URLS for download (the -requester pays for S3 egress). +API by generating pre-signed URLs. Only assets with S3 URIs (`s3://` prefix) are proxied; +other URL schemes are ignored. When the Asset Proxy feature is enabled, asset `href` +values pointing to S3 are replaced with proxy endpoint URLs when an Item or Collection is +served, while the original S3 URLs are preserved in the `alternate.s3.href` field using +the [Alternate Assets Extension](https://github.com/stac-extensions/alternate-assets). +Subsequent GET requests to the proxy endpoint URLs are redirected to pre-signed S3 URLS +for download. Note that the AWS account that stac-server is running under must have +permission to access the S3 buckets containing the assets and that the stac-server AWS +account will be charged for the S3 egress, regardless of whether the bucket is a +"Requester Pays" bucket or not (the stac-server AWS account is the requester when +generating the pre-signed URL). ### Configuration From eab78902a608dfbe5a219629705b2806275b56e5 Mon Sep 17 00:00:00 2001 From: pjhartzell Date: Fri, 31 Oct 2025 10:02:46 -0500 Subject: [PATCH 22/22] fix: correct errors revealed by testing a deployment --- README.md | 10 +++++-- src/lib/asset-buckets.js | 49 ++++++++++++++++++-------------- tests/unit/test-asset-buckets.js | 9 ++++-- 3 files changed, 42 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index ca93d8a8..16a83de2 100644 --- a/README.md +++ b/README.md @@ -1172,7 +1172,9 @@ When asset proxying is enabled, two endpoints are available for accessing proxie ### IAM Permissions -For the Asset Proxy feature to generate pre-signed URLs, the API and ingest Lambdas must be assigned permissions for the S3 buckets containing the assets. Add the following to the IAM role statements in your `serverless.yml` file, adjusting the resources as needed: +For the Asset Proxy feature to generate pre-signed URLs, the API and ingest Lambdas must +be assigned permissions for the S3 buckets containing the assets. Add the following to the +IAM role statements in your `serverless.yml` file, adjusting the resources as needed: For the `LIST` mode, you can specify the buckets listed in `ASSET_PROXY_BUCKET_LIST`: @@ -1186,6 +1188,7 @@ For the `LIST` mode, you can specify the buckets listed in `ASSET_PROXY_BUCKET_L - Effect: Allow Action: - s3:HeadBucket + - s3:ListBucket Resource: - "arn:aws:s3:::my-bucket-1" - "arn:aws:s3:::my-bucket-2" @@ -1201,10 +1204,12 @@ For the `ALL` mode, use wildcards: - Effect: Allow Action: - s3:HeadBucket + - s3:ListBucket Resource: "arn:aws:s3:::*" ``` -When using `ALL_BUCKETS_IN_ACCOUNT` mode, the Lambda also needs permission to list buckets: +When using `ALL_BUCKETS_IN_ACCOUNT` mode, the Lambda also needs permission to list the +account buckets: ```yaml - Effect: Allow @@ -1214,6 +1219,7 @@ When using `ALL_BUCKETS_IN_ACCOUNT` mode, the Lambda also needs permission to li - Effect: Allow Action: - s3:HeadBucket + - s3:ListBucket Resource: "arn:aws:s3:::*" - Effect: Allow Action: diff --git a/src/lib/asset-buckets.js b/src/lib/asset-buckets.js index 7d3a98dc..f5032773 100644 --- a/src/lib/asset-buckets.js +++ b/src/lib/asset-buckets.js @@ -5,7 +5,8 @@ import { import { s3 } from './aws-clients.js' import logger from './logger.js' -const s3Client = s3() +// Follow, rather than throw, HeadBucket redirects for buckets not in the client's region +const s3Client = s3({ followRegionRedirects: true }) export const BucketOptionEnum = Object.freeze({ NONE: 'NONE', @@ -56,9 +57,10 @@ export class AssetBuckets { ) } - const count = Object.keys(this.bucketCache).length + const bucketNames = Object.keys(this.bucketCache) logger.info( - `Parsed ${count} buckets from ASSET_PROXY_BUCKET_LIST for asset proxy` + `Parsed ${bucketNames.length} buckets from ASSET_PROXY_BUCKET_LIST ` + + `for asset proxy: ${bucketNames.join(', ')}` ) } else { throw new Error( @@ -80,9 +82,10 @@ export class AssetBuckets { .map(async (name) => { await this.getBucket(name) }) ) - const count = Object.keys(this.bucketCache).length + const bucketNames = Object.keys(this.bucketCache) logger.info( - `Fetched ${count} buckets from AWS account for asset proxy` + `Fetched ${bucketNames.length} buckets from AWS account ` + + `for asset proxy: ${bucketNames.join(', ')}` ) break } @@ -99,29 +102,33 @@ export class AssetBuckets { async getBucket(bucketName) { if (!(bucketName in this.bucketCache)) { const command = new HeadBucketCommand({ Bucket: bucketName }) - const response = await s3Client.send(command) - const statusCode = response.$metadata.httpStatusCode let name = null let region = null - switch (statusCode) { - case 200: + try { + const response = await s3Client.send(command) name = bucketName region = response.BucketRegion === 'EU' ? 'eu-west-1' : response.BucketRegion || 'us-east-1' - break - case 403: - logger.warn(`Access denied to bucket ${bucketName}`) - break - case 404: - logger.warn(`Bucket ${bucketName} does not exist`) - break - case 400: - logger.warn(`Bad request for bucket ${bucketName}`) - break - default: - logger.warn(`Unexpected status code ${statusCode} for bucket ${bucketName}`) + } catch (err) { + const error = /** @type {any} */ (err) + const statusCode = error.$metadata?.httpStatusCode + + switch (statusCode) { + case 403: + logger.warn(`Access denied to bucket ${bucketName}`) + break + case 404: + logger.warn(`Bucket ${bucketName} does not exist`) + break + case 400: + logger.warn(`Bad request for bucket ${bucketName}`) + break + default: + logger.error(`Unexpected error for bucket ${bucketName}:`, error) + throw error + } } this.bucketCache[bucketName] = { name, region } diff --git a/tests/unit/test-asset-buckets.js b/tests/unit/test-asset-buckets.js index 26ada467..9ddfb936 100644 --- a/tests/unit/test-asset-buckets.js +++ b/tests/unit/test-asset-buckets.js @@ -50,7 +50,8 @@ test('AssetBuckets - LIST mode throws if bucket list is null', async (t) => { }) test('AssetBuckets - LIST mode throws if bucket is inaccessible', async (t) => { - s3Mock.on(HeadBucketCommand).resolves({ + s3Mock.on(HeadBucketCommand).rejects({ + name: '403', $metadata: { httpStatusCode: 403 } }) @@ -130,7 +131,8 @@ test('AssetBuckets - shouldProxyBucket with ALL_BUCKETS_IN_ACCOUNT mode only pro // Using serial to prevent HeadBucketCommand mock interference between tests test.serial('AssetBuckets - getBucket handles 403 access denied', async (t) => { - s3Mock.on(HeadBucketCommand).resolves({ + s3Mock.on(HeadBucketCommand).rejects({ + name: '403', $metadata: { httpStatusCode: 403 } }) @@ -143,7 +145,8 @@ test.serial('AssetBuckets - getBucket handles 403 access denied', async (t) => { // Using serial to prevent HeadBucketCommand mock interference between tests test.serial('AssetBuckets - getBucket handles 404 not found', async (t) => { - s3Mock.on(HeadBucketCommand).resolves({ + s3Mock.on(HeadBucketCommand).rejects({ + name: '404', $metadata: { httpStatusCode: 404 } })