From 687ab7cb2ef2194c96c6ce1e1ebfb1819b53c2e9 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Thu, 13 Mar 2025 13:40:01 -0700 Subject: [PATCH 1/4] Add a Euclid cloud access notebook --- tutorials/cloud_access/euclid-cloud-access.md | 303 ++++++++++++++++++ 1 file changed, 303 insertions(+) create mode 100644 tutorials/cloud_access/euclid-cloud-access.md diff --git a/tutorials/cloud_access/euclid-cloud-access.md b/tutorials/cloud_access/euclid-cloud-access.md new file mode 100644 index 00000000..6d627163 --- /dev/null +++ b/tutorials/cloud_access/euclid-cloud-access.md @@ -0,0 +1,303 @@ +--- +jupytext: + text_representation: + extension: .md + format_name: myst + format_version: 0.13 + jupytext_version: 1.16.3 +kernelspec: + display_name: irsa-tutorials + language: python + name: python3 +--- + +# Euclid Quick Release 1: Cloud Access + ++++ + +## Learning Goals +- Learn where Euclid QR1 data is present in the cloud +- Find an image and retireve its cutout from cloud +- Find an object and retrieve its spectrum from cloud + ++++ + +## Introduction +TODO: fill with links of docs/specs? + ++++ + +## Imports + +```{code-cell} ipython3 +# Uncomment the next line to install dependencies if needed. +# !pip install s3fs astropy astroquery matploltib +``` + +```{code-cell} ipython3 +import s3fs +from astropy.coordinates import SkyCoord +import astropy.units as u +from astropy.visualization import ImageNormalize, PercentileInterval, AsinhStretch +from astropy.io import fits +from astropy.nddata import Cutout2D +from astropy.wcs import WCS +from astropy.table import Table +from astroquery.ipac.irsa import Irsa +from matplotlib import pyplot as plt +import json +``` + +## Browse Euclid QR1 Bucket + +```{code-cell} ipython3 +BUCKET_NAME='nasa-irsa-euclid-q1' # internal to IPAC until public release (use LAN or VPN w/ Tunnel-all) +``` + +```{code-cell} ipython3 +s3 = s3fs.S3FileSystem(anon=True) +``` + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1') +``` + +## Find images for a coordinate search + ++++ + +### Locate MER images in the bucket + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1/MER') +``` + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1/MER/102018211') +``` + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1/MER/102018211/VIS') +``` + +As per doc specification, we need `MER/{tile_id}/{instrument}/EUC_MER_BGSUB-MOSAIC*.fits` for displaying background-subtracted mosiac images. But these images are stored under TILE IDs so first we need to find TILE ID for a coordinate search we are interested in. We will use astroquery (in next section) to retrieve FITS file paths for our coordinates. + ++++ + +### Get image file paths for a coordinate search of interest + +```{code-cell} ipython3 +# Star TYC 4429-1677-1 +ra = 273.11730230 * u.deg +dec = 68.20761349 * u.deg +``` + +```{code-cell} ipython3 +coord = SkyCoord(ra=ra, dec=dec) +search_radius = 10 * u.arcsec +``` + +List all Simple Image Access (SIA) collections for IRSA. + +```{code-cell} ipython3 +tbl = Irsa.list_collections(servicetype='SIA') +len(tbl) +``` + +Filter to only those containing "euclid": + +```{code-cell} ipython3 +tbl[['euclid' in v for v in tbl['collection']]] +``` + +We identify the collection we need for MER images: + +```{code-cell} ipython3 +img_collection = 'euclid_DpdMerBksMosaic' +``` + +```{code-cell} ipython3 +img_tbl = Irsa.query_sia(pos=(coord, search_radius), collection=img_collection).to_table() +img_tbl +``` + +Now we narrow it down to the images with science dataproduct subtype and Euclid facility: + +```{code-cell} ipython3 +euclid_sci_img_tbl = img_tbl[[row['facility_name']=='Euclid' and row['dataproduct_subtype']=='science' for row in img_tbl]] +euclid_sci_img_tbl +``` + +We can see there's a `cloud_access` column that gives us the location info of the image files we are interested in. So let's extract the S3 bucket file path from it. + +```{code-cell} ipython3 +def get_s3_fpath(cloud_access): + cloud_info = json.loads(cloud_access) # converts str to dict + bucket_name = cloud_info['aws']['bucket_name'] + key = cloud_info['aws']['key'] + + return f'{bucket_name}/{key}' +``` + +```{code-cell} ipython3 +[get_s3_fpath(row['cloud_access']) for row in euclid_sci_img_tbl] +``` + +Let's also extract filter names to use when displaying the images: + +```{code-cell} ipython3 +def get_filter_name(instrument, bandpass): + return f'{instrument}_{bandpass}' if instrument!=bandpass else instrument +``` + +```{code-cell} ipython3 +[get_filter_name(row['instrument_name'], row['energy_bandpassname']) for row in euclid_sci_img_tbl] +``` + +## Retrieve image cutouts from the cloud +These image files are very big (~1.4GB), so we use astropy's lazy-loading capability of FITS for better performance. (See [Obtaining subsets from cloud-hosted FITS files](https://docs.astropy.org/en/stable/io/fits/usage/cloud.html#fits-io-cloud).) + +```{code-cell} ipython3 +cutout_size = 1 * u.arcmin +``` + +```{code-cell} ipython3 +cutouts = [] +filters = [] + +for row in euclid_sci_img_tbl: + s3_fpath = get_s3_fpath(row['cloud_access']) + filter_name = get_filter_name(row['instrument_name'], row['energy_bandpassname']) + + with fits.open(f's3://{s3_fpath}', fsspec_kwargs={"anon": True}) as hdul: + print(f'Retrieving cutout for {filter_name} ...') + cutout = Cutout2D(hdul[0].section, + position=coord, + size=cutout_size, + wcs=WCS(hdul[0].header)) + cutouts.append(cutout) + filters.append(filter_name) +``` + +```{code-cell} ipython3 +fig, axes = plt.subplots(2, 2, figsize=(4 * 2, 4 * 2), subplot_kw={'projection': cutouts[0].wcs}) + +for idx, ax in enumerate(axes.flat): + norm = ImageNormalize(cutouts[idx].data, interval=PercentileInterval(99), stretch=AsinhStretch()) + ax.imshow(cutouts[idx].data, cmap='gray', origin='lower', norm=norm) + ax.set_xlabel('RA') + ax.set_ylabel('Dec') + ax.text(0.95, 0.05, filters[idx], color='white', fontsize=14, transform=ax.transAxes, va='bottom', ha='right') + +plt.tight_layout() +``` + +## Find objects for the coordinates of our interest + ++++ + +### Locate MER catalogs in the bucket + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1/catalogs') +``` + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1/catalogs/MER_FINAL_CATALOG') +``` + +```{code-cell} ipython3 +s3.ls(f'{BUCKET_NAME}/q1/catalogs/MER_FINAL_CATALOG/102018211') +``` + +As per doc specification, we need `catalogs/MER_FINAL_CATALOG/{tile_id}/EUC_MER_FINAL-CAT*.fits` for listing the objects catalogued. But we only need to find objects in our coordinates of interest so we will use astroquery to do a spatial search in MER catalog (combined for all tiles). + ++++ + +### Get object IDs for the coordinates of our interest + +```{code-cell} ipython3 +tbl_catalogs = Irsa.list_catalogs(full=True).to_table() +len(tbl_catalogs) +``` + +```{code-cell} ipython3 +tbl_catalogs[['euclid' in v for v in tbl_catalogs['schema_name']]] +``` + +From this table, we can extract the MER catalog name. We also see several other interesting catalogs, let's also extract spectral file association catalog for retrieving spectra later. + +```{code-cell} ipython3 +euclid_mer_catalog = 'euclid_q1_mer_catalogue' +euclid_spec_association_catalog = 'euclid.objectid_spectrafile_association_q1' +``` + +Now, we do a TAP search with spatial constraints for our coordinates. We use cone of 5 arcsec around our source to pinpoint its object ID in Euclid catalog. + +```{code-cell} ipython3 +search_radius = (5 * u.arcsec).to('deg') + +adql_query = f"SELECT * \ + FROM {euclid_mer_catalog} \ + WHERE CONTAINS(POINT('ICRS', ra, dec), \ + CIRCLE('ICRS', {ra.value}, {dec.value}, {search_radius.value})) = 1" + +mer_catalog_tbl = Irsa.query_tap(query=adql_query).to_table() +mer_catalog_tbl +``` + +```{code-cell} ipython3 +object_id = int(mer_catalog_tbl['object_id'][0]) +object_id +``` + +## Find spectra for the coordinates of our interest +Using the object ID(s) we extracted above, we can narrow down the spectral file association catalog to identify spectra file path(s). + +```{code-cell} ipython3 +adql_query = f"SELECT * FROM {euclid_spec_association_catalog} \ + WHERE objectid = {object_id}" + +spec_association_tbl = Irsa.query_tap(adql_query).to_table() +spec_association_tbl +``` + +We can see the `uri` column that gives us location of spectra file on IBE, we can map it to S3 bucket key to retrieve spectra file from the cloud. This is a very big FITS spectra file with multiple extensions where each extension contains spectrum of one object. The `hdu` column gives us the extension number for our object. So let's extract both of these. + +```{code-cell} ipython3 +spec_fpath_key = spec_association_tbl['uri'][0].replace('ibe/data/euclid/', '') +spec_fpath_key +``` + +```{code-cell} ipython3 +object_hdu_idx = int(spec_association_tbl['hdu'][0]) +object_hdu_idx +``` + +## Retrieve spectrum from the cloud +Again we use astropy's lazy-loading capability of FITS to only retrieve the spectrum table of our object from the S3 bucket. + +```{code-cell} ipython3 +with fits.open(f's3://{BUCKET_NAME}/{spec_fpath_key}', fsspec_kwargs={'anon': True}) as hdul: + spec_hdu = hdul[object_hdu_idx] + spec_tbl = Table.read(spec_hdu) +``` + +```{code-cell} ipython3 +spec_tbl +``` + +```{code-cell} ipython3 +plt.plot(spec_tbl['WAVELENGTH'], spec_tbl['SIGNAL']) +plt.xlabel(spec_tbl['WAVELENGTH'].unit.to_string('latex_inline')) +plt.ylabel(spec_tbl['SIGNAL'].unit.to_string('latex_inline')) + +plt.title(f'Euclid Object ID: {object_id}'); +``` + +## About this Notebook +Author: Jaladh Singhal (IRSA Developer) in conjunction with Tiffany Meshkat, Vandana Desai, Brigitta Sipőcz, and the IPAC Science Platform team + +Updated: 2025-03-13 + +Contact: the [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems. From 91c8d38f644e2c2a2a31d950111f698123c613e6 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 14 Mar 2025 15:18:59 -0700 Subject: [PATCH 2/4] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Brigitta Sipőcz --- tutorials/cloud_access/euclid-cloud-access.md | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/tutorials/cloud_access/euclid-cloud-access.md b/tutorials/cloud_access/euclid-cloud-access.md index 6d627163..6225fc95 100644 --- a/tutorials/cloud_access/euclid-cloud-access.md +++ b/tutorials/cloud_access/euclid-cloud-access.md @@ -87,20 +87,14 @@ As per doc specification, we need `MER/{tile_id}/{instrument}/EUC_MER_BGSUB-MOSA ### Get image file paths for a coordinate search of interest ```{code-cell} ipython3 -# Star TYC 4429-1677-1 -ra = 273.11730230 * u.deg -dec = 68.20761349 * u.deg -``` - -```{code-cell} ipython3 -coord = SkyCoord(ra=ra, dec=dec) +coord = SkyCoord.from_name("TYC 4429-1677-1") search_radius = 10 * u.arcsec ``` List all Simple Image Access (SIA) collections for IRSA. ```{code-cell} ipython3 -tbl = Irsa.list_collections(servicetype='SIA') +collections = Irsa.list_collections(servicetype='SIA') len(tbl) ``` @@ -296,8 +290,9 @@ plt.title(f'Euclid Object ID: {object_id}'); ``` ## About this Notebook -Author: Jaladh Singhal (IRSA Developer) in conjunction with Tiffany Meshkat, Vandana Desai, Brigitta Sipőcz, and the IPAC Science Platform team -Updated: 2025-03-13 +**Author:** Jaladh Singhal (IRSA Developer) in conjunction with Tiffany Meshkat, Vandana Desai, Brigitta Sipőcz, and the IPAC Science Platform team + +**Updated:** 2025-03-13 -Contact: the [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems. +**Contact:** the [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems. From 41b8c5f9e31ae66c474ab5c1a895646a59527227 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Fri, 14 Mar 2025 18:52:05 -0700 Subject: [PATCH 3/4] Update some markdown and fix code to address comments --- tutorials/cloud_access/euclid-cloud-access.md | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tutorials/cloud_access/euclid-cloud-access.md b/tutorials/cloud_access/euclid-cloud-access.md index 6225fc95..a359a0a1 100644 --- a/tutorials/cloud_access/euclid-cloud-access.md +++ b/tutorials/cloud_access/euclid-cloud-access.md @@ -6,7 +6,7 @@ jupytext: format_version: 0.13 jupytext_version: 1.16.3 kernelspec: - display_name: irsa-tutorials + display_name: Python 3 (ipykernel) language: python name: python3 --- @@ -16,18 +16,24 @@ kernelspec: +++ ## Learning Goals -- Learn where Euclid QR1 data is present in the cloud -- Find an image and retireve its cutout from cloud -- Find an object and retrieve its spectrum from cloud +By the end of this tutorial, you will: +- Learn where Euclid Q1 data are stored in the cloud. +- Retrieve an image cutout from the cloud. +- Retrieve a spectrum from the cloud. +++ ## Introduction -TODO: fill with links of docs/specs? +Euclid launched in July 2023 as a European Space Agency (ESA) mission with involvement by NASA. The primary science goals of Euclid are to better understand the composition and evolution of the dark Universe. The Euclid mission is providing space-based imaging and spectroscopy as well as supporting ground-based imaging to achieve these primary goals. These data will be archived by multiple global repositories, including IRSA, where they will support transformational work in many areas of astrophysics. + +Euclid Quick Release 1 (Q1) consists of consists of ~30 TB of imaging, spectroscopy, and catalogs covering four non-contiguous fields: Euclid Deep Field North (22.9 sq deg), Euclid Deep Field Fornax (12.1 sq deg), Euclid Deep Field South (28.1 sq deg), and LDN1641. + +Euclid Q1 data were released on-premises at IPAC and in the cloud via Amazon Web Services' Open Data Repository. This notebook introduces users to accessing Euclid Q1 data from the cloud. Additional Euclid-specific notebooks can be found at ["Accessing Euclid data" section](https://caltech-ipac.github.io/irsa-tutorials/#accessing-euclid-data) and additional notebooks about how to access IRSA-curated data from the AWS ODR can be found at ["Accessing IRSA's cloud holdings" section](https://caltech-ipac.github.io/irsa-tutorials/#accessing-irsa-s-cloud-holdings). +++ ## Imports +- TODO: fill the imports explaination ```{code-cell} ipython3 # Uncomment the next line to install dependencies if needed. @@ -58,6 +64,8 @@ BUCKET_NAME='nasa-irsa-euclid-q1' # internal to IPAC until public release (use L s3 = s3fs.S3FileSystem(anon=True) ``` +TODO: link s3fs docs + ```{code-cell} ipython3 s3.ls(f'{BUCKET_NAME}/q1') ``` @@ -95,16 +103,16 @@ List all Simple Image Access (SIA) collections for IRSA. ```{code-cell} ipython3 collections = Irsa.list_collections(servicetype='SIA') -len(tbl) +len(collections) ``` Filter to only those containing "euclid": ```{code-cell} ipython3 -tbl[['euclid' in v for v in tbl['collection']]] +collections[['euclid' in v for v in collections['collection']]] ``` -We identify the collection we need for MER images: +As per "Data Products Overview" in [user guide](https://irsa.ipac.caltech.edu/data/Euclid/docs/euclid_archive_at_irsa_user_guide.pdf) and the above table, we identify that MER Mosiacs are available as follows: ```{code-cell} ipython3 img_collection = 'euclid_DpdMerBksMosaic' @@ -234,7 +242,7 @@ search_radius = (5 * u.arcsec).to('deg') adql_query = f"SELECT * \ FROM {euclid_mer_catalog} \ WHERE CONTAINS(POINT('ICRS', ra, dec), \ - CIRCLE('ICRS', {ra.value}, {dec.value}, {search_radius.value})) = 1" + CIRCLE('ICRS', {coord.ra.deg}, {coord.dec.deg}, {search_radius.value})) = 1" mer_catalog_tbl = Irsa.query_tap(query=adql_query).to_table() mer_catalog_tbl From 91619c92f5a82831ec6032c84a20cbd1ef120bd6 Mon Sep 17 00:00:00 2001 From: Jaladh Singhal Date: Sat, 15 Mar 2025 19:21:01 -0700 Subject: [PATCH 4/4] Incorporate Vandana's feedback about narrative --- tutorials/cloud_access/euclid-cloud-access.md | 113 ++++++++++-------- 1 file changed, 62 insertions(+), 51 deletions(-) diff --git a/tutorials/cloud_access/euclid-cloud-access.md b/tutorials/cloud_access/euclid-cloud-access.md index a359a0a1..e13fd157 100644 --- a/tutorials/cloud_access/euclid-cloud-access.md +++ b/tutorials/cloud_access/euclid-cloud-access.md @@ -23,7 +23,7 @@ By the end of this tutorial, you will: +++ -## Introduction +## 1. Introduction Euclid launched in July 2023 as a European Space Agency (ESA) mission with involvement by NASA. The primary science goals of Euclid are to better understand the composition and evolution of the dark Universe. The Euclid mission is providing space-based imaging and spectroscopy as well as supporting ground-based imaging to achieve these primary goals. These data will be archived by multiple global repositories, including IRSA, where they will support transformational work in many areas of astrophysics. Euclid Quick Release 1 (Q1) consists of consists of ~30 TB of imaging, spectroscopy, and catalogs covering four non-contiguous fields: Euclid Deep Field North (22.9 sq deg), Euclid Deep Field Fornax (12.1 sq deg), Euclid Deep Field South (28.1 sq deg), and LDN1641. @@ -32,8 +32,12 @@ Euclid Q1 data were released on-premises at IPAC and in the cloud via Amazon Web +++ -## Imports -- TODO: fill the imports explaination +## 2. Imports +- `s3fs` for browsing S3 buckets +- `astropy` for handling coordinates, units, FITS I/O, tables, images, etc. +- `astroquery` for querying Euclid data products from IRSA +- `matplotlib` for visualization +- `json` for decoding JSON strings ```{code-cell} ipython3 # Uncomment the next line to install dependencies if needed. @@ -54,52 +58,53 @@ from matplotlib import pyplot as plt import json ``` -## Browse Euclid QR1 Bucket +## 3. Browse Euclid Q1 cloud-hosted data ```{code-cell} ipython3 -BUCKET_NAME='nasa-irsa-euclid-q1' # internal to IPAC until public release (use LAN or VPN w/ Tunnel-all) +BUCKET_NAME = 'nasa-irsa-euclid-q1' ``` +[s3fs](https://s3fs.readthedocs.io/en/latest/) provides a filesystem-like python interface for AWS S3 buckets. First we create a s3 client: + ```{code-cell} ipython3 s3 = s3fs.S3FileSystem(anon=True) ``` -TODO: link s3fs docs +Then we list the `q1` directory that contains Euclid Q1 data products: ```{code-cell} ipython3 s3.ls(f'{BUCKET_NAME}/q1') ``` -## Find images for a coordinate search - -+++ - -### Locate MER images in the bucket +Let's navigate to MER images (available as FITS files): ```{code-cell} ipython3 -s3.ls(f'{BUCKET_NAME}/q1/MER') +s3.ls(f'{BUCKET_NAME}/q1/MER')[:10] # ls only top 10 to limit the long output ``` ```{code-cell} ipython3 -s3.ls(f'{BUCKET_NAME}/q1/MER/102018211') +s3.ls(f'{BUCKET_NAME}/q1/MER/102018211') # pick any tile ID from above ``` ```{code-cell} ipython3 -s3.ls(f'{BUCKET_NAME}/q1/MER/102018211/VIS') +s3.ls(f'{BUCKET_NAME}/q1/MER/102018211/VIS') # pick any instrument from above ``` -As per doc specification, we need `MER/{tile_id}/{instrument}/EUC_MER_BGSUB-MOSAIC*.fits` for displaying background-subtracted mosiac images. But these images are stored under TILE IDs so first we need to find TILE ID for a coordinate search we are interested in. We will use astroquery (in next section) to retrieve FITS file paths for our coordinates. +As per "Browsable Directories" section in [user guide](https://irsa.ipac.caltech.edu/data/Euclid/docs/euclid_archive_at_irsa_user_guide.pdf), we need `MER/{tile_id}/{instrument}/EUC_MER_BGSUB-MOSAIC*.fits` for displaying background-subtracted mosiac images. But these images are stored under TILE IDs so first we need to find TILE ID for a coordinate search we are interested in. We will use astroquery (in next section) to retrieve FITS file paths for our coordinates by doing spatial search. +++ -### Get image file paths for a coordinate search of interest +## 4. Do a spatial search for MER mosaics + +Pick a target and search radius: ```{code-cell} ipython3 -coord = SkyCoord.from_name("TYC 4429-1677-1") +target_name = 'TYC 4429-1677-1' +coord = SkyCoord.from_name(target_name) search_radius = 10 * u.arcsec ``` -List all Simple Image Access (SIA) collections for IRSA. +List all Simple Image Access (SIA) collections for IRSA: ```{code-cell} ipython3 collections = Irsa.list_collections(servicetype='SIA') @@ -112,25 +117,29 @@ Filter to only those containing "euclid": collections[['euclid' in v for v in collections['collection']]] ``` -As per "Data Products Overview" in [user guide](https://irsa.ipac.caltech.edu/data/Euclid/docs/euclid_archive_at_irsa_user_guide.pdf) and the above table, we identify that MER Mosiacs are available as follows: +As per "Data Products Overview" in [user guide](https://irsa.ipac.caltech.edu/data/Euclid/docs/euclid_archive_at_irsa_user_guide.pdf) and above table, we identify that MER Mosiacs are available as the following collection: ```{code-cell} ipython3 img_collection = 'euclid_DpdMerBksMosaic' ``` +Now query this collection for our target's coordinates and search radius: + ```{code-cell} ipython3 img_tbl = Irsa.query_sia(pos=(coord, search_radius), collection=img_collection).to_table() img_tbl ``` -Now we narrow it down to the images with science dataproduct subtype and Euclid facility: +Let's narrow it down to the images with science dataproduct subtype and Euclid facility: ```{code-cell} ipython3 -euclid_sci_img_tbl = img_tbl[[row['facility_name']=='Euclid' and row['dataproduct_subtype']=='science' for row in img_tbl]] +euclid_sci_img_tbl = img_tbl[[row['facility_name']=='Euclid' + and row['dataproduct_subtype']=='science' + for row in img_tbl]] euclid_sci_img_tbl ``` -We can see there's a `cloud_access` column that gives us the location info of the image files we are interested in. So let's extract the S3 bucket file path from it. +We can see there's a `cloud_access` column that gives us the location info of the image files we are interested in. So let's extract the S3 bucket file path from it: ```{code-cell} ipython3 def get_s3_fpath(cloud_access): @@ -156,7 +165,7 @@ def get_filter_name(instrument, bandpass): [get_filter_name(row['instrument_name'], row['energy_bandpassname']) for row in euclid_sci_img_tbl] ``` -## Retrieve image cutouts from the cloud +## 5. Efficiently retrieve mosaic cutouts These image files are very big (~1.4GB), so we use astropy's lazy-loading capability of FITS for better performance. (See [Obtaining subsets from cloud-hosted FITS files](https://docs.astropy.org/en/stable/io/fits/usage/cloud.html#fits-io-cloud).) ```{code-cell} ipython3 @@ -194,37 +203,40 @@ for idx, ax in enumerate(axes.flat): plt.tight_layout() ``` -## Find objects for the coordinates of our interest - -+++ - -### Locate MER catalogs in the bucket +## 6. Find the MER catalog for a given tile +Let's navigate to MER catalog in the Euclid Q1 bucket: ```{code-cell} ipython3 s3.ls(f'{BUCKET_NAME}/q1/catalogs') ``` ```{code-cell} ipython3 -s3.ls(f'{BUCKET_NAME}/q1/catalogs/MER_FINAL_CATALOG') +s3.ls(f'{BUCKET_NAME}/q1/catalogs/MER_FINAL_CATALOG')[:10] # ls only top 10 to limit the long output ``` ```{code-cell} ipython3 -s3.ls(f'{BUCKET_NAME}/q1/catalogs/MER_FINAL_CATALOG/102018211') +mer_tile_id = 102160339 # from the image paths for the target we picked +s3.ls(f'{BUCKET_NAME}/q1/catalogs/MER_FINAL_CATALOG/{mer_tile_id}') ``` -As per doc specification, we need `catalogs/MER_FINAL_CATALOG/{tile_id}/EUC_MER_FINAL-CAT*.fits` for listing the objects catalogued. But we only need to find objects in our coordinates of interest so we will use astroquery to do a spatial search in MER catalog (combined for all tiles). +As per "Browsable Directiories" section in [user guide](https://irsa.ipac.caltech.edu/data/Euclid/docs/euclid_archive_at_irsa_user_guide.pdf), we can use `catalogs/MER_FINAL_CATALOG/{tile_id}/EUC_MER_FINAL-CAT*.fits` for listing the objects catalogued. We can read the identified FITS file as table and do filtering on ra, dec columns to find object ID(s) only for the target we picked. But it will be an expensive operation so we will instead use astroquery (in next section) to do a spatial search in the MER catalog provided by IRSA. + +```{note} +Once the catalogs are available as Parquet files in the cloud, we can efficiently do spatial filtering directly on the cloud-hosted file to identify object ID(s) for our target. But for the time being, we can use catalog VO services through astroquery to do the same. +``` +++ -### Get object IDs for the coordinates of our interest +## 7. Find the MER Object ID for our target +First, list the Euclid catalogs provided by IRSA: ```{code-cell} ipython3 -tbl_catalogs = Irsa.list_catalogs(full=True).to_table() -len(tbl_catalogs) +catalogs = Irsa.list_catalogs(full=True).to_table() +len(catalogs) ``` ```{code-cell} ipython3 -tbl_catalogs[['euclid' in v for v in tbl_catalogs['schema_name']]] +catalogs[['euclid' in v for v in catalogs['schema_name']]] ``` From this table, we can extract the MER catalog name. We also see several other interesting catalogs, let's also extract spectral file association catalog for retrieving spectra later. @@ -234,17 +246,13 @@ euclid_mer_catalog = 'euclid_q1_mer_catalogue' euclid_spec_association_catalog = 'euclid.objectid_spectrafile_association_q1' ``` -Now, we do a TAP search with spatial constraints for our coordinates. We use cone of 5 arcsec around our source to pinpoint its object ID in Euclid catalog. +Now, we do a region search within a cone of 5 arcsec around our target to pinpoint its object ID in Euclid catalog: ```{code-cell} ipython3 -search_radius = (5 * u.arcsec).to('deg') +search_radius = 5 * u.arcsec -adql_query = f"SELECT * \ - FROM {euclid_mer_catalog} \ - WHERE CONTAINS(POINT('ICRS', ra, dec), \ - CIRCLE('ICRS', {coord.ra.deg}, {coord.dec.deg}, {search_radius.value})) = 1" - -mer_catalog_tbl = Irsa.query_tap(query=adql_query).to_table() +mer_catalog_tbl = Irsa.query_region(coordinates=coord, spatial='Cone', + catalog=euclid_mer_catalog, radius=search_radius) mer_catalog_tbl ``` @@ -253,8 +261,8 @@ object_id = int(mer_catalog_tbl['object_id'][0]) object_id ``` -## Find spectra for the coordinates of our interest -Using the object ID(s) we extracted above, we can narrow down the spectral file association catalog to identify spectra file path(s). +## 8. Find the spectrum of an object in the MER catalog +Using the object ID(s) we extracted above, we can narrow down the spectral file association catalog to identify spectra file path(s). So we do the following TAP search: ```{code-cell} ipython3 adql_query = f"SELECT * FROM {euclid_spec_association_catalog} \ @@ -264,7 +272,11 @@ spec_association_tbl = Irsa.query_tap(adql_query).to_table() spec_association_tbl ``` -We can see the `uri` column that gives us location of spectra file on IBE, we can map it to S3 bucket key to retrieve spectra file from the cloud. This is a very big FITS spectra file with multiple extensions where each extension contains spectrum of one object. The `hdu` column gives us the extension number for our object. So let's extract both of these. +```{warning} +If you picked a target other than what this notebook uses, it's possible that there is no spectrum associated for your target's object ID. In that case, `spec_association_tbl` will contain 0 rows. +``` + +In above table, we can see that the `uri` column gives us location of spectra file on IBE. We can map it to S3 bucket key to retrieve spectra file from the cloud. This is a very big FITS spectra file with multiple extensions where each extension contains spectrum of one object. The `hdu` column gives us the extension number for our object. So let's extract both of these. ```{code-cell} ipython3 spec_fpath_key = spec_association_tbl['uri'][0].replace('ibe/data/euclid/', '') @@ -276,8 +288,7 @@ object_hdu_idx = int(spec_association_tbl['hdu'][0]) object_hdu_idx ``` -## Retrieve spectrum from the cloud -Again we use astropy's lazy-loading capability of FITS to only retrieve the spectrum table of our object from the S3 bucket. +Again, we use astropy's lazy-loading capability of FITS to only retrieve the spectrum table of our object from the S3 bucket. ```{code-cell} ipython3 with fits.open(f's3://{BUCKET_NAME}/{spec_fpath_key}', fsspec_kwargs={'anon': True}) as hdul: @@ -294,13 +305,13 @@ plt.plot(spec_tbl['WAVELENGTH'], spec_tbl['SIGNAL']) plt.xlabel(spec_tbl['WAVELENGTH'].unit.to_string('latex_inline')) plt.ylabel(spec_tbl['SIGNAL'].unit.to_string('latex_inline')) -plt.title(f'Euclid Object ID: {object_id}'); +plt.title(f'Spectrum of Target: {target_name}\n(Euclid Object ID: {object_id})'); ``` ## About this Notebook -**Author:** Jaladh Singhal (IRSA Developer) in conjunction with Tiffany Meshkat, Vandana Desai, Brigitta Sipőcz, and the IPAC Science Platform team +**Author:** Jaladh Singhal (IRSA Developer) in conjunction with Vandana Desai, Brigitta Sipőcz, Tiffany Meshkat and the IPAC Science Platform team -**Updated:** 2025-03-13 +**Updated:** 2025-03-17 **Contact:** the [IRSA Helpdesk](https://irsa.ipac.caltech.edu/docs/help_desk.html) with questions or reporting problems.