diff --git a/notebooks/NIRISS/NIRISS_WFSS_advanced/00_niriss_mast_query_data_setup.ipynb b/notebooks/NIRISS/NIRISS_WFSS_advanced/00_niriss_mast_query_data_setup.ipynb index 01f5e6afa..fec84380e 100644 --- a/notebooks/NIRISS/NIRISS_WFSS_advanced/00_niriss_mast_query_data_setup.ipynb +++ b/notebooks/NIRISS/NIRISS_WFSS_advanced/00_niriss_mast_query_data_setup.ipynb @@ -25,6 +25,7 @@ "\n", "\n", "**Author**: Camilla Pacifici (cpacifici@stsci.edu) & Rachel Plesha (rplesha@stsci.edu) & Jo Taylor (jotaylor@stsci.edu)
\n", + "**Last edited**: October 2025
\n", "**First Published**: May 2024\n", "\n", "This notebook was inspired by the [JWebbinar session about MAST](https://github.com/spacetelescope/jwebbinar_prep/blob/main/mast_session/Crowded_Field/Crowded_Field.ipynb)." @@ -342,7 +343,9 @@ "cell_type": "code", "execution_count": null, "id": "616ff15c", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# Now let's get the products for each batch of observations, and filter down to only the products of interest.\n", @@ -381,7 +384,7 @@ "id": "31ff8769-1240-452d-b84c-eca69d53a13f", "metadata": {}, "source": [ - "If continuing on with the WFSS notebooks, let's double check that we've downloaded all of the files that we need for the remaining notebooks. There should be 149 files downloaded. " + "If continuing on with the WFSS notebooks, let's double check that we've downloaded all of the files that we need for the remaining notebooks. There should be 155 files downloaded. " ] }, { diff --git a/notebooks/NIRISS/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb b/notebooks/NIRISS/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb index 97066bb35..6b001dfe7 100644 --- a/notebooks/NIRISS/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb +++ b/notebooks/NIRISS/NIRISS_WFSS_advanced/01_niriss_wfss_image2_image3.ipynb @@ -29,7 +29,8 @@ "\n", "**Author**: Rachel Plesha (rplesha@stsci.edu), Camilla Pacifici (cpacifici@stsci.edu), JWebbinar notebooks.
\n", "**First Published**: May 2024
\n", - "**Last tested**: This notebook was last tested with JWST pipeline version 1.12.5 and the CRDS context jwst_1225.pmap." + "**Last edited**: October 2025
\n", + "**Last tested**: This notebook was last tested with JWST pipeline version 1.20.2 and the CRDS context jwst_1464.pmap." ] }, { @@ -72,10 +73,10 @@ "import glob\n", "import json\n", "import warnings\n", - "import urllib\n", - "import zipfile\n", + "#import urllib\n", + "#import zipfile\n", "import numpy as np\n", - "import pandas as pd\n", + "#import pandas as pd\n", "\n", "import astropy.units as u\n", "from astropy.io import fits\n", @@ -128,6 +129,8 @@ "outputs": [], "source": [ "data_dir = 'data'\n", + "os.makedirs(data_dir, exist_ok=True)\n", + " \n", "default_run_image3 = 'default_image3_calibrated' # where the results of the default image3 run will be saved (inside of data_dir)\n", "custom_run_image3 = 'custom_image3_calibrated'# where the results of the custom image3 run will be saved (inside of data_dir)" ] @@ -143,14 +146,74 @@ { "cell_type": "code", "execution_count": null, - "id": "39fbaa8a-5062-4a2d-b8a9-f3764086aa91", + "id": "f8a25a53-c1b0-4c23-8018-6d8793e9e675", + "metadata": {}, + "outputs": [], + "source": [ + "from astroquery.mast import Observations\n", + "\n", + "Observations.get_metadata(\"observations\")\n", + "\n", + "# Select the proposal ID, instrument, and some useful keywords (filters in this case).\n", + "obs_table = Observations.query_criteria(obs_collection=[\"JWST\"], \n", + " instrument_name=[\"NIRISS/IMAGE\"],\n", + " provenance_name=[\"CALJWST\"], # Executed observations\n", + " filters=['F115W', 'F150W', 'F200W'],\n", + " proposal_id=[2079],\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd6e5d18-41a5-476f-8afe-b6b1c14260ff", "metadata": {}, "outputs": [], + "source": [ + "batch_size = 5 # 5 files at a time maximizes the download speed.\n", + "\n", + "# Let's split up our list of files, ``obs_id_table``, into batches according to our batch size.\n", + "obs_batches = [obs_table[i:i+batch_size] for i in range(0, len(obs_table), batch_size)]\n", + "\n", + "for index, batch in enumerate(obs_batches):\n", + " \n", + " # Progress indicator...\n", + " print('\\n'+f'Batch #{index+1} / {len(obs_batches)}')\n", + " \n", + " # Make a list of the `obsid` identifiers from our Astropy table of observations to get products for.\n", + " obsids = batch['obsid']\n", + " print('Working with the following obsids:')\n", + " for number, obs_text in zip(obsids, batch['obs_id']):\n", + " print(f\"{number} : {obs_text}\")\n", + " \n", + " # Get list of products \n", + " products = Observations.get_product_list(obsids)\n", + " \n", + " # Filter the products to only get only the products of interest\n", + " filtered_products = Observations.filter_products(products, \n", + " productType=[\"SCIENCE\", \"INFO\"],\n", + " productSubGroupDescription=[\"RATE\", \"ASN\"], # Not using \"UNCAL\" here since we can start with the rate files\n", + " calib_level=[2, 3] # not using 1 here since not getting the UNCAL files\n", + " )\n", + " \n", + " # Download products for these records.\n", + " # The `flat=True` option stores all files in a single directory specified by `download_dir`.\n", + " manifest = Observations.download_products(filtered_products,\n", + " download_dir=data_dir,\n", + " flat=True, # astroquery v0.4.7 or later only\n", + " ) \n", + " print('Products downloaded:\\n', filtered_products['productFilename'])" + ] + }, + { + "cell_type": "raw", + "id": "0a579aff-61a5-49fc-acca-d61b562fa334", + "metadata": {}, "source": [ "# if you have not downloaded the data from notebook 00, run this cell. Otherwise, feel free to skip it.\n", "\n", "# Download uncalibrated data from Box into the data directory:\n", - "boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/niriss_wfss_advanced/niriss_wfss_advanced_01_input.zip'\n", + "boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/niriss_wfss_advanced/2025_niriss_wfss_advanced_01_input.zip'\n", "boxfile = os.path.basename(boxlink)\n", "urllib.request.urlretrieve(boxlink, boxfile)\n", "\n", @@ -180,8 +243,8 @@ "outputs": [], "source": [ "# From the csv file we created earlier, find a list of all of the grism observations we will want to calibrate with spec2\n", - "listrate_file = 'list_ngdeep_rate.csv'\n", - "rate_df = pd.read_csv(listrate_file)" + "#listrate_file = 'list_ngdeep_rate.csv'\n", + "#rate_df = pd.read_csv(listrate_file)" ] }, { @@ -207,9 +270,9 @@ "metadata": {}, "outputs": [], "source": [ + "# make the directories inside of the directory where the data are\n", "for temp_dir in [default_run_image3, custom_run_image3]:\n", - " if not os.path.exists(temp_dir):\n", - " os.mkdir(temp_dir)" + " os.makedirs(temp_dir, exist_ok=True)" ] }, { @@ -228,9 +291,7 @@ "metadata": {}, "source": [ "\n", - "### Run Default Image2\n", - "\n", - "Image2 is run on the direct image rate files. While your program should have valid association files to download from MAST, if for any reason you need to make your own association file, see [Creating Custom ASN Files](#customasn)." + "### Run Default Image2" ] }, { @@ -255,7 +316,7 @@ "print(len(image2_asns), 'Image2 ASN files') # there should be 40 asn files for image2\n", "\n", "# the number of association files should match the number of rate files\n", - "print(len(rate_df[rate_df['FILTER'] == 'CLEAR']), 'Direct Image rate files')" + "#print(len(rate_df[rate_df['FILTER'] == 'CLEAR']), 'Direct Image rate files')" ] }, { @@ -362,15 +423,17 @@ "print(len(image3_asns), 'Image3 ASN files') # there should be 3 image3 association files\n", "\n", "# the number of image3 association files should match the number of unique blocking filters used\n", - "uniq_filters = np.unique(rate_df[rate_df['FILTER'] == 'CLEAR']['PUPIL'])\n", - "print(f\"{len(uniq_filters)} unique filters used: {uniq_filters}\")" + "#uniq_filters = np.unique(rate_df[rate_df['FILTER'] == 'CLEAR']['PUPIL'])\n", + "#print(f\"{len(uniq_filters)} unique filters used: {uniq_filters}\")" ] }, { "cell_type": "code", "execution_count": null, "id": "add4839e", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# look at one of the association files\n", @@ -383,7 +446,9 @@ "cell_type": "code", "execution_count": null, "id": "2831ae47-5b1b-4d92-b02a-8ce1ee359710", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# in particular, take a closer look at the product filenames with the association file:\n", @@ -424,7 +489,9 @@ "cell_type": "code", "execution_count": null, "id": "6cf900af", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "for img3_asn in image3_asns:\n", @@ -434,22 +501,10 @@ " if os.path.exists(cal_file):\n", " print(cal_file, 'cal file already exists.')\n", " continue\n", - " # if not, calibrated with image3\n", + " # if not, calibrate with image3\n", " img3 = Image3Pipeline.call(img3_asn, save_results=True, output_dir=default_run_image3)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "eb5b3614-cfc3-42b7-b5ce-e243eb680ae7", - "metadata": {}, - "outputs": [], - "source": [ - "# remove unnecessary files to save disk space\n", - "for crf in glob.glob(os.path.join(default_run_image3, '*crf.fits')):\n", - " os.remove(crf)" - ] - }, { "cell_type": "markdown", "id": "33f0dbeb-4caf-4d97-8b2c-3255f906e76b", @@ -467,9 +522,9 @@ "outputs": [], "source": [ "# These are all resuts from the Image3 pipeline\n", - "image3_i2d = np.sort(glob.glob(os.path.join(default_run_image3, '*i2d.fits'))) # combined image over multiple dithers/mosaic\n", - "image3_segm = np.sort(glob.glob(os.path.join(default_run_image3, '*segm.fits'))) # segmentation map that defines the extent of a source\n", - "image3_cat = np.sort(glob.glob(os.path.join(default_run_image3, '*cat.ecsv'))) # Source catalog that defines the RA/Dec of a source at a particular pixel" + "image3_i2d = sorted(glob.glob(os.path.join(default_run_image3, '*i2d.fits'))) # combined image over multiple dithers/mosaic\n", + "image3_segm = sorted(glob.glob(os.path.join(default_run_image3, '*segm.fits'))) # segmentation map that defines the extent of a source\n", + "image3_cat = sorted(glob.glob(os.path.join(default_run_image3, '*cat.ecsv'))) # Source catalog that defines the RA/Dec of a source at a particular pixel" ] }, { @@ -478,9 +533,9 @@ "metadata": {}, "source": [ "#### Matplotlib\n", - "Matplotlib has limitations where ImViz might better suite your needs -- especially if you like to look at things in WCS coordinates. For the notebook purposes, we are highlighting a few key areas using the matplotlib package instead.\n", + "Matplotlib has limitations where ImViz might better suite your needs -- especially for looking at fields using the WCS coordinates. For the notebook purposes, we are highlighting here a few key areas using the matplotlib package.\n", "\n", - "Using the `i2d` combined image and the source catalog produced by Image3, we can visually inspect if we're happy with where the pipeline found the sources. In the following figures, what has been defined as an extended source by the pipeline is shown in orange-red, and what has been defined as a point source by the pipeline is shown in grey. This definition affects the extraction box in the WFSS images." + "Using the `i2d` combined image and the source catalog produced by Image3, we can visually inspect the pipeline found the sources. In the following figures, what has been defined as an extended source by the pipeline is shown in orange-red, and what has been defined as a point source by the pipeline is shown in grey. This definition affects the extraction box in the WFSS images." ] }, { @@ -525,7 +580,7 @@ "id": "d157e3a8-2f2a-4487-8329-f6fa56713212", "metadata": {}, "source": [ - "The segmentation maps are also a product of the Image3 pipeline, and they are used the help determine the source catalog. Let's take a look at those to ensure we are happy with what it is defining as a source.\n", + "The segmentation maps are also a product of the Image3 pipeline, and they are used the help determine the source catalog and in the contamination step. Here we look at those files to see the what the pipeline is defining as a source.\n", "\n", "In the segmentation map, each blue blob should correspond to a physical target. There are cases where sources can be blended, in which case the parameters for making the semgentation map and source catalog should be changed. An example of this can be seen below in the observation 004 F200W filter image where two galaxies at ~(1600, 1300) have been blended into one source. This is discussed in more detail below in [Custom Imaging Pipeline Run](#custom). \n", "\n", @@ -547,7 +602,7 @@ "\n", " fig = plt.figure(figsize=(10, 10*(rows/2)))\n", "\n", - " for plt_num, img in enumerate(np.sort(np.concatenate([segm_images, i2d_images]))):\n", + " for plt_num, img in enumerate(sorted(np.concatenate([segm_images, i2d_images]))):\n", " \n", " # determine where the subplot should be\n", " xpos = (plt_num % 40) % cols\n", @@ -642,8 +697,7 @@ "\n", "# this aligns the image to use the WCS coordinates; \n", "# the images need to be loaded first, but before adding markers\n", - "linking = imviz.plugins['Orientation']\n", - "linking.align_by = 'WCS'\n", + "imviz.link_data(align_by='wcs') # \"align_by\" in later versions of jdaviz; \"link_type\" in earlier versions\n", "\n", "# also plot the associated catalog\n", "# this needs to be a separate loop due to linking in imviz when using sky coordinates\n", @@ -691,67 +745,62 @@ "source": [ "Try editing a few parameters and compare the outcomes to the default run above, at first for a single file.\n", "\n", - "When we call the image3 pipeline, we can add modifications to a specific step of the pipeline. In this case we're going to edit the `source_catalog` and `tweakreg` steps of the pipeline. An explanation of the different parameters to tweak can be found in the further information below, while the default values are a combination of both the default pipeline values listed in there, and the parameter reference files that are supplied.\n", - "- [source_catalog Further Information](https://jwst-pipeline.readthedocs.io/en/latest/api/jwst.source_catalog.SourceCatalogStep.html)\n", - "- [tweakreg Futher Information](https://jwst-pipeline.readthedocs.io/en/latest/api/jwst.tweakreg.tweakreg_step.TweakRegStep.html)" + "When we call the image3 pipeline, we can add modifications to a specific step of the pipeline. In this case we're going to edit the `source_catalog` and `tweakreg` steps of the pipeline. An explanation of the different parameters to modify can be found in the \"further information\" links below, while the default values are a combination of both the default pipeline values listed in there, and the parameter reference files that are supplied.\n", + "- [source_catalog Further Information](https://jwst-pipeline.readthedocs.io/en/latest/jwst/source_catalog/arguments.html)\n", + "- [tweakreg Futher Information](https://jwst-pipeline.readthedocs.io/en/latest/jwst/tweakreg/README.html#step-arguments)" ] }, { "cell_type": "code", "execution_count": null, "id": "3f2f7f0f", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "image3_asns = np.sort(glob.glob('*image3*asn*.json'))\n", - "test_asn = image3_asns[1]\n", + "image3_asns = sorted(glob.glob('*image3*asn*.json'))\n", + "test_asn = image3_asns[0]\n", "\n", "# check if the calibrated file already exists\n", "asn_data = json.load(open(test_asn))\n", "i2d_file = os.path.join(custom_run_image3, f\"{asn_data['products'][0]['name']}_i2d.fits\")\n", "\n", - "if os.path.exists(i2d_file):\n", - " print(i2d_file, 'i2d file already exists.')\n", - "else:\n", - " # call the image3 pipeline in the same way as before, but add a few new modifications\n", - " cust_img3 = Image3Pipeline.call(test_asn,\n", - " steps={\n", - " 'source_catalog': {'kernel_fwhm': 5.0,\n", - " 'snr_threshold': 10.0,\n", - " 'npixels': 50,\n", - " 'deblend': True,\n", - " },\n", - " 'tweakreg': {'snr_threshold': 20,\n", - " 'abs_refcat': 'GAIADR3',\n", - " 'save_catalogs': True,\n", - " 'searchrad': 3.0,\n", - " 'kernel_fwhm': 2.302,\n", - " 'fitgeometry': 'shift',\n", - " },\n", - " },\n", - " save_results=True,\n", - " output_dir=custom_run_image3)" + "# call the image3 pipeline in the same way as before, but add a few new modifications\n", + "cust_img3 = Image3Pipeline.call(test_asn,\n", + " steps={\n", + " 'source_catalog': {'kernel_fwhm': 5.0,\n", + " 'snr_threshold': 10.0,\n", + " 'npixels': 50,\n", + " 'deblend': True,\n", + " },\n", + " 'tweakreg': {'snr_threshold': 20,\n", + " 'abs_refcat': 'GAIADR3',\n", + " 'save_catalogs': True,\n", + " 'searchrad': 3.0,\n", + " 'kernel_fwhm': 2.302,\n", + " 'fitgeometry': 'shift',\n", + " },\n", + " },\n", + " save_results=True,\n", + " output_dir=custom_run_image3)" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "403345ea-1f26-4ead-b680-ab6a54f73421", + "cell_type": "markdown", + "id": "17d4cdae-0cad-4385-b93b-6976ecf90898", "metadata": {}, - "outputs": [], "source": [ - "# remove unnecessary files to save disk space\n", - "for crf in glob.glob(os.path.join(custom_run_image3, '*crf.fits')):\n", - " os.remove(crf)" + "\n", + "### Inspecting Custom Results" ] }, { "cell_type": "markdown", - "id": "17d4cdae-0cad-4385-b93b-6976ecf90898", + "id": "ba4ea91a-fedd-4ad1-87c6-ea3330d175a6", "metadata": {}, "source": [ - "\n", - "### Inspecting Custom Results" + "If we compare the custom and default i2d values, we can see what affect the custom parameters above had. We encourage you to change the values and see how that affects the data below. Of particular importance to WFSS data, we want to ensure that the segmentation map properly identifies a single source." ] }, { @@ -765,7 +814,8 @@ "compare_i2ds = [i2d_file, default_i2d]\n", "compare_segm = [i2d_file.replace('i2d.fits', 'segm.fits'), default_i2d.replace('i2d.fits', 'segm.fits')]\n", "\n", - "compare_fig = plot_image_and_segmentation_map(compare_i2ds, compare_segm)" + "compare_fig = plot_image_and_segmentation_map(compare_i2ds, compare_segm)\n", + "compare_fig.suptitle('Custom Image3 (top) vs. Default Image3 (bottom)', y=1)" ] }, { @@ -794,8 +844,7 @@ " imviz.load_data(img, data_label=title)\n", "\n", " # this aligns the image to use the WCS coordinates\n", - " linking = imviz.plugins['Orientation']\n", - " linking.align_by = 'WCS'\n", + " imviz.link_data(align_by='wcs') # \"align_by\" in later versions of jdaviz; \"link_type\" in earlier versions\n", "\n", " # also plot the associated catalog\n", " cat = Table.read(img.replace('i2d.fits', 'cat.ecsv'))\n", @@ -818,17 +867,19 @@ "id": "48ae7dc1-45ce-41a6-a5a3-c07226efc912", "metadata": {}, "source": [ - "Calibrate the remaining images if you are happy with the above results" + "Calibrate the remaining images once satisfied with the above results" ] }, { "cell_type": "code", "execution_count": null, "id": "4787d1bb-57a7-4056-b93e-c5e7497d14cf", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "image3_asns = np.sort(glob.glob('*image3*asn*.json'))\n", + "image3_asns = sorted(glob.glob('*image3*asn*.json'))\n", "\n", "for img3_asn in image3_asns:\n", " # check if the calibrated file already exists\n", @@ -857,18 +908,6 @@ " output_dir=custom_run_image3)" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "828566bf-fb5f-4ba5-8ef7-0de09a73f0fb", - "metadata": {}, - "outputs": [], - "source": [ - "# remove unnecessary files to save disk space\n", - "for crf in glob.glob(os.path.join(custom_run_image3, '*crf.fits')):\n", - " os.remove(crf)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -877,8 +916,8 @@ "outputs": [], "source": [ "# These are all resuts from the Image3 pipeline\n", - "cust_image3_i2d = np.sort(glob.glob(os.path.join(custom_run_image3, '*i2d.fits'))) # combined image over multiple dithers/mosaic\n", - "cust_image3_segm = np.sort(glob.glob(os.path.join(custom_run_image3, '*segm.fits'))) # segmentation map that defines the extent of a source\n", + "cust_image3_i2d = sorted(glob.glob(os.path.join(custom_run_image3, '*i2d.fits'))) # combined image over multiple dithers/mosaic\n", + "cust_image3_segm = sorted(glob.glob(os.path.join(custom_run_image3, '*segm.fits'))) # segmentation map that defines the extent of a source\n", "\n", "custom_fig = plot_image_and_segmentation_map(cust_image3_i2d, cust_image3_segm)" ] @@ -926,7 +965,7 @@ "metadata": {}, "outputs": [], "source": [ - "custom_cats = np.sort(glob.glob(os.path.join(custom_run_image3, '*niriss_clear-f????_cat.ecsv'))) # cat filename format from image3 results\n", + "custom_cats = sorted(glob.glob(os.path.join(custom_run_image3, '*niriss_clear-f????_cat.ecsv'))) # cat filename format from image3 results\n", "print(\"All image3 catalogs:\\n\", custom_cats)\n", "\n", "base_cat = Table.read(custom_cats[0])\n", @@ -1003,7 +1042,7 @@ "\n", "#### Manually Editing the Source Catalog\n", "\n", - "Looking ahead to the WFSS extraction, it might be that we only really care about one or two sources at any particular moment. In this case, it's helpful to pair down the source catalog to just that source to make it easier to look at the extracted 1-D spectrum in the WFSS data.\n", + "Looking ahead to the WFSS spectral extraction, it might be that we want to look at one or two sources at any particular moment. In this case, it's helpful to pair down the source catalog to just that source to make it easier to look at the extracted 1-D spectrum in the WFSS data.\n", "\n", "- [Source Catalog Column Information](https://jwst-pipeline.readthedocs.io/en/latest/jwst/source_catalog/main.html#output-products)" ] @@ -1024,7 +1063,7 @@ "outputs": [], "source": [ "# first, look at the current, custom source catalog for the F200W filter\n", - "cat_name = np.sort(glob.glob(os.path.join(custom_run_image3, '*source-match_cat.ecsv')))[2]\n", + "cat_name = sorted(glob.glob(os.path.join(custom_run_image3, '*source-match_cat.ecsv')))[2]\n", "cat = Table.read(cat_name)\n", "\n", "print(cat_name)\n", @@ -1127,7 +1166,9 @@ "id": "9d2a8566-eb1c-4df0-b4eb-f24c6edc8e56", "metadata": {}, "source": [ - "Once we have an updated source catalog that we are content with, we can move on to the spec2 step of the pipeline. It likely will be necessary to come back to this step after running spec2. Let's take a quick look at the source that we will be extracting in the following notebook with spec2." + "Once we have an updated source catalog that we are content with, we can move on to the spec2 step of the pipeline. It may be necessary to come back to this step after running spec2.\n", + "\n", + "Here were first look at the source that we will be extracting in the following notebook with spec2." ] }, { @@ -1198,7 +1239,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.9" + "version": "3.11.7" } }, "nbformat": 4, diff --git a/notebooks/NIRISS/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb b/notebooks/NIRISS/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb index b9bc86878..ba952f99c 100644 --- a/notebooks/NIRISS/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb +++ b/notebooks/NIRISS/NIRISS_WFSS_advanced/02_niriss_wfss_spec2.ipynb @@ -27,8 +27,8 @@ "\n", "**Author**: Rachel Plesha (rplesha@stsci.edu), Camilla Pacifici (cpacifici@stsci.edu), JWebbinar notebooks.
\n", "**First Published**: May 2024
\n", - "**Last edited**: August 2024
\n", - "**Last tested**: This notebook was last tested with JWST pipeline version 1.12.5 and the CRDS context jwst_1225.pmap." + "**Last edited**: October 2025
\n", + "**Last tested**: This notebook was last tested with JWST pipeline version 1.20.1 and the CRDS context jwst_1464.pmap." ] }, { @@ -161,7 +161,7 @@ "# if you have not downloaded the data from notebook 00 or have not run notebook 01, run this cell. Otherwise, feel free to skip it.\n", "\n", "# Download uncalibrated data from Box into the data directory:\n", - "boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/niriss_wfss_advanced/niriss_wfss_advanced_02_input.zip'\n", + "boxlink = 'https://data.science.stsci.edu/redirect/JWST/jwst-data_analysis_tools/niriss_wfss_advanced/2025_niriss_wfss_advanced_02_input.zip'\n", "boxfile = os.path.basename(boxlink)\n", "urllib.request.urlretrieve(boxlink, boxfile)\n", "\n", @@ -303,7 +303,7 @@ "\n", "## Custom Spec2 Pipeline Run\n", "\n", - "Because this is a custom spec2 pipeline run, the first step is to make sure the correct source catalog is being used. This will define the spectral trace cutouts, and therefore inform the pipeline on where to extract the spectrum." + "Because this is a custom spec2 pipeline run, the first step is to make sure the correct source catalog is being used. This is used to define the spectral trace cutouts, and therefore inform the pipeline on where to extract the spectrum." ] }, { @@ -332,7 +332,7 @@ "metadata": {}, "outputs": [], "source": [ - "spec2_asns = np.sort(glob.glob('*spec2*asn*.json'))\n", + "spec2_asns = sorted(glob.glob('*spec2*asn*.json'))\n", "print(len(spec2_asns), 'Spec2 ASN files')\n", "# number of asn files should match the number of dispersed images -- 30 for obs 004\n", "print(len(rate_df[(rate_df['FILTER'] == 'GR150R') | (rate_df['FILTER'] == 'GR150C')]), 'Dispersed image rate files')" @@ -524,8 +524,9 @@ "# first look at how many sources we expect from the catalog\n", "print(f'There are {len(cat)} sources identified in the current catalog.\\n')\n", "\n", - "# then look at how long the cal and x1d files are for comparison\n", - "print(f'The x1d file has {len(x1d_hdu)} extensions & the cal file has {len(cal_hdu)} extensions')" + "# then look at how many sources are in the cal and x1d files are for comparison\n", + "print(f'The x1d file has {len(x1d_hdu[1].data)} science arrays in the first data extension')\n", + "print(f'The cal file has {len(cal_hdu)} extensions')" ] }, { @@ -533,11 +534,9 @@ "id": "a0f6c961-dfba-4b12-a7ee-cb61623308dd", "metadata": {}, "source": [ - "Note that the 0th and final extension in each file do not contain science data, but the remaining extensions correspond to each source. The `x1d` file contains the extracted spectrum for each source, while the `cal` file contains the 2D cutout information in seven extensions for each source (SCI, DQ, ERR, WAVELENGTH, VAR_POISSON, VAR_RNOISE, VAR_FLAT).\n", + "As of jwst pipeline version 1.19.0, the `x1d` files now follow a different format than the `cal` files. The `cal` files contain information in seven separate data extensions for each extracted source: SCI, DQ, ERR, WAVELENGTH, VAR_POISSON, VAR_RNOISE, VAR_FLAT. The `x1d` files instead contain the information for all sources in the science extension, with order 1 being in the first data extension and order 2 (for F090W only) in the second data extension. Note that for both filetypes, the 0th and final extension in each file do not contain science data,\n", "\n", - "This in part is why it is so important to have a refined source catalog. If there are sources that are not useful for your research, there is no reason to create a cutout and extract them.\n", - "\n", - "Notice that there are more sources than there are extensions in the files. This is because the pipeline defaults to only extracting the 100 brightest sources. To change this behavior, supply the pipeline with the paramter `wfss_nbright`, which we do [below](#limit_source)." + "Only the 100 brightest sources are extracted by default for NIRISS WFSS data. To change this behavior, supply the pipeline with the paramter `wfss_nbright`, which we do [below](#limit_source)." ] }, { @@ -565,7 +564,7 @@ "id": "52473e1d-c5b4-490d-b2e9-e0d049897c7b", "metadata": {}, "source": [ - "The `x1d` file is a BinTable, so there are additional columns contained inside each of the data extensions. [x1d further reading](https://jwst-pipeline.readthedocs.io/en/latest/jwst/data_products/science_products.html#extracted-1-d-spectroscopic-data-x1d-and-x1dints) goes through what is contained in each column, but we can also take a quick look by looking at one of the data columns." + "The `x1d` file is a BinTable, so there are columns contained inside the data extensions, with each source in a separate array entry inside each of these columns. [x1d further reading](https://jwst-pipeline.readthedocs.io/en/latest/jwst/data_products/science_products.html#extracted-1-d-spectroscopic-data-x1d-and-x1dints) details each column." ] }, { @@ -579,6 +578,24 @@ "print(x1d_hdu[1].data.columns)" ] }, + { + "cell_type": "markdown", + "id": "b105f895-59a1-4332-a727-912ddf71ce7f", + "metadata": {}, + "source": [ + "The format of the file requires that all data arrays inside a column (i.e. WAVELENGTH, FLUX, etc.) are the same length for every entry. Therefore, every array is as long as the longest extracted trace. The values are padded with nans at the end of the trace, and can be seen best in the \"wavelength\" array where nans are not typically a valid entry." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e4570186-531d-46ef-8df6-58b550f2d0a5", + "metadata": {}, + "outputs": [], + "source": [ + "print(x1d_hdu[1].data['WAVELENGTH'][0])" + ] + }, { "cell_type": "markdown", "id": "2e1a31a6-fbba-4ca5-920a-ac59014c8495", @@ -596,7 +613,7 @@ "\n", "### Find a Source in the Spec2 Data\n", "\n", - "Each extension of the cal and x1d files has a source ID in the header. These values should match with the values in the source catalog." + "Each science extension of the cal files has a source ID in the header. These values should match with the values in the source catalog." ] }, { @@ -608,8 +625,8 @@ "source": [ "def find_source_ext(x1d_hdu, cal_hdu, source_id, info=True):\n", " # x1d extension first\n", - " x1d_source_ids = np.array([x1d_hdu[ext].header['SOURCEID'] for ext in range(len(x1d_hdu))[1:-1]]) # cut off the 0th and last data extensions\n", - " wh_x1d = np.where(x1d_source_ids == source_id)[0][0] + 1 # need to add 1 for the primary header\n", + " x1d_source_ids = np.array(x1d_hdu[1].data['SOURCE_ID'])\n", + " wh_x1d = np.where(x1d_source_ids == source_id)[0][0]\n", " \n", " # look for cal extension, too, but only in the SCI extension; \n", " # fill in with a source ID of -999 for all other extensions to get the right extension value\n", @@ -619,7 +636,7 @@ "\n", " if info:\n", " print(f\"All source IDs in x1d file:\\n{x1d_source_ids}\\n\")\n", - " print(f\"Extension {wh_x1d} in {x1d_hdu[0].header['FILENAME']} contains the data for source {source_id} from our catalog\")\n", + " print(f\"Index {wh_x1d} of extension 1 in {x1d_hdu[0].header['FILENAME']} contains the data for source {source_id} from our catalog\")\n", " print(f\"Extension {wh_cal} in {cal_hdu[0].header['FILENAME']} contains the data for source {source_id} from our catalog\")\n", "\n", " return wh_x1d, wh_cal" @@ -636,7 +653,7 @@ "source_id = 118\n", "wh_x1d_118, wh_cal_118 = find_source_ext(x1d_hdu, cal_hdu, source_id)\n", "\n", - "x1d_data_118 = x1d_hdu[wh_x1d_118].data \n", + "x1d_data_118 = x1d_hdu[1].data[wh_x1d_118] \n", "cal_data_118 = cal_hdu[wh_cal_118].data" ] }, @@ -645,9 +662,7 @@ "id": "a362fa9f-ae33-4dcc-954c-471a80b41d37", "metadata": {}, "source": [ - "First, let's look at the extraction box as it appears on the rate image. This will help us get a global feel for how much we can trust the pipeline's extraction. \n", - "\n", - "*Note: In this example, the extraction box isn't fully cenetered around the spectrum. There is an ongoing calibration effort to better account for difference in the spectral trace shape across the NIRISS detector. Updates about the status of this calibration can be found on the [NIRISS jdox](https://jwst-docs.stsci.edu/jwst-calibration-pipeline-caveats/jwst-wide-field-slitless-spectroscopy-pipeline-caveats#JWSTWideFieldSlitlessSpectroscopyPipelineCaveats-NIRISSWFSS)*" + "First, let's look at the extraction box as it appears on the rate image to ensure that it captures all of the source flux and does not need to be adjusted." ] }, { @@ -672,8 +687,9 @@ "\n", "# plot the rate file and the extraction box\n", "fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))\n", - "ax1.imshow(rate_data, origin='lower', vmin=0.2, vmax=1, aspect='auto') # the scaling may need some adjustment\n", - "ax2.imshow(rate_data, origin='lower', vmin=0.2, vmax=1, aspect='auto') # the scaling may need some adjustment\n", + "display_vals = [np.nanpercentile(rate_data, 1), np.nanpercentile(rate_data, 99)] \n", + "ax1.imshow(rate_data, origin='lower', vmin=display_vals[0], vmax=display_vals[1], aspect='auto') # the scaling may need some adjustment\n", + "ax2.imshow(rate_data, origin='lower', vmin=display_vals[0], vmax=display_vals[1], aspect='auto') # the scaling may need some adjustment\n", "\n", "rectangle = patches.Rectangle((sx_left, sy_bottom), swidth, sheight, edgecolor='darkorange', facecolor=\"None\", linewidth=1)\n", "ax1.add_patch(rectangle)\n", @@ -693,9 +709,9 @@ "id": "22a7c98b-0a1b-4d1f-9323-25eb08696e4d", "metadata": {}, "source": [ - "We can then take a look at the extracted spectrum in this box both in the cal file and the x1d file. In the extracted spectrum below you can see the [OII] and H$\\beta$ emission lines from the galaxy.\n", + "We can then take a look at the cal file cutout form this extraction box and the resulting extracted spectrum in the x1d file. In the extracted spectrum below you can see the [OII] and H$\\beta$ emission lines from the galaxy.\n", "\n", - "*Note: The upturned edge effects seen in the 1-D spectrum are due to interpolation at the edges of the extraction box for the current flux calibration. This is also part of an ongoing calibration effort.*\n", + "*Note: The upturned edge effects seen in the 1-D spectrum are due to interpolation at the edges of the extraction box for the current flux calibration. This is part of an ongoing calibration effort.*\n", "\n", "*Additional note: The default units of flux from the pipeline are in Jansky. However, in these extracted spectra we show units of erg/s/cm^2/Angstrom. To turn this off, set `convert=False` in `plot_cutout_and_spectrum`*" ] @@ -740,9 +756,8 @@ " ax2.plot(wave, flux)\n", " ax2.set_title(os.path.basename(x1d_file))\n", "\n", - " edge_buffer = int(len(flux) * .25)\n", - " max_flux = np.nanmax(flux[edge_buffer:edge_buffer*-1])\n", - " ax2.set_ylim(0, max_flux+(max_flux*0.1)) # cutting the flux of the edges & adding 10% buffer to the limits\n", + " x1d_display_vals = [np.nanpercentile(flux, 5), np.nanpercentile(flux, 95)]\n", + " ax2.set_ylim(x1d_display_vals[0], x1d_display_vals[1])\n", " ax2.set_xlabel('Wavelength (Microns)')\n", " ax2.set_ylabel(f'Flux ({fluxunit})')\n", "\n", @@ -774,7 +789,7 @@ "\n", "### Limit the Number of Extracted Sources\n", "\n", - "Because it takes so long to extract so many sources, let's see if we can pair down the number of sources being extracted. We'll do this with parameter inputs (for bright point sources) and with a further refined source catalog." + "Because it can take a long time to extract so many sources, we show here how to reduce the number of sources being extracted. We'll do this with parameter inputs (for bright point sources) and with a further refined source catalog." ] }, { @@ -793,7 +808,9 @@ "cell_type": "code", "execution_count": null, "id": "9fb2d1af-e529-497d-92db-390d17905eb9", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# check if the calibrated file already exists\n", @@ -819,7 +836,9 @@ "# open the x1d to examine how many sources were extracted\n", "x1ds = glob.glob(os.path.join(param_outdir, '*x1d.fits*'))\n", "with fits.open(x1ds[0]) as temp_x1d:\n", - " print(f'The x1d file has {len(temp_x1d)-2} extracted sources')" + " print(f'The x1d file has {len(temp_x1d[1].data[\"SOURCE_ID\"])} extracted sources:')\n", + " for source_data in temp_x1d[1].data:\n", + " print(f\" Source {source_data['SOURCE_ID']} -- ({source_data['SOURCE_RA']}, {source_data['SOURCE_DEC']})\")" ] }, { @@ -838,7 +857,7 @@ "metadata": {}, "outputs": [], "source": [ - "source_match_cats = np.sort(glob.glob('*source-match_cat.ecsv'))\n", + "source_match_cats = sorted(glob.glob('*source-match_cat.ecsv'))\n", "source_match_cat = Table.read(source_cat)\n", "\n", "# look at the possible magnitude ranges to look at\n", @@ -993,7 +1012,9 @@ "cell_type": "code", "execution_count": null, "id": "6b7dad07-7d44-4ebe-841d-3f686b9df016", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# calibrate with the new source catalog\n", @@ -1033,14 +1054,15 @@ "cell_type": "code", "execution_count": null, "id": "94aec550-ea1f-4215-8965-bcd2ac408bfd", - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "# Explore the new Data\n", - "x1ds = np.sort(glob.glob(os.path.join(source_outdir, \"*x1d.fits\")))\n", + "x1ds = sorted(glob.glob(os.path.join(source_outdir, \"*x1d.fits\")))\n", "\n", "# get a list of all of the source IDs from the first file to look at for this example\n", - "sources = [fits.getval(x1ds[0], 'SOURCEID', ext=ext) for ext in range(len(fits.open(x1ds[0])))[1:-1]]\n", "source_id = 118\n", "\n", "# plot each x1d/cal file\n", @@ -1054,7 +1076,7 @@ " # this means the source isn't in this observation\n", " continue\n", "\n", - " x1d_data = x1d_hdu[wh_x1d].data \n", + " x1d_data = x1d_hdu[1].data[wh_x1d]\n", " cal_data = cal_hdu[wh_cal].data\n", "\n", " plot_cutout_and_spectrum(cal_data, x1d_data, cal_file, x1d_file, source_id)" @@ -1076,10 +1098,8 @@ "outputs": [], "source": [ "# Overplot the different grisms on top of each other for the same source\n", - "x1ds = np.sort(glob.glob(os.path.join(source_outdir, \"*x1d.fits\")))\n", + "x1ds = sorted(glob.glob(os.path.join(source_outdir, \"*x1d.fits\")))\n", "\n", - "# get a list of all of the source IDs from the first file to look at for this example\n", - "sources = [fits.getval(x1ds[0], 'SOURCEID', ext=ext) for ext in range(len(fits.open(x1ds[0])))[1:-1]]\n", "source_id = 118\n", "\n", "# create a figure with three panels\n", @@ -1105,9 +1125,9 @@ " # this means the source isn't in this observation\n", " continue\n", "\n", - " x1d_data = x1d_hdu[wh_x1d].data \n", + " x1d_data = x1d_hdu[1].data[wh_x1d]\n", " grism = x1d_hdu[0].header['FILTER']\n", - " filter = x1d_hdu[0].header['PUPIL']\n", + " filt = x1d_hdu[0].header['PUPIL']\n", "\n", " wave = x1d_data['WAVELENGTH'].ravel()\n", " flux = x1d_data['FLUX'].ravel()\n", @@ -1115,11 +1135,11 @@ " flux = Fnu_to_Flam(wave, flux)\n", " fluxunits = 'erg/s/cm^2/Angstrom'\n", "\n", - " ax1.plot(wave, flux, color=color_dict[filter], ls=ls_dict[grism], alpha=0.7)\n", + " ax1.plot(wave, flux, color=color_dict[filt], ls=ls_dict[grism], alpha=0.7)\n", " if grism == 'GR150C':\n", - " ax2.plot(wave, flux, color=color_dict[filter], ls=ls_dict[grism], alpha=0.7)\n", + " ax2.plot(wave, flux, color=color_dict[filt], ls=ls_dict[grism], alpha=0.7)\n", " else:\n", - " ax3.plot(wave, flux, color=color_dict[filter], ls=ls_dict[grism], alpha=0.7)\n", + " ax3.plot(wave, flux, color=color_dict[filt], ls=ls_dict[grism], alpha=0.7)\n", "\n", " # save the maximum fluxes for plotting, removing any edge effects\n", " edge_buffer = int(len(flux) * .25)\n", @@ -1128,7 +1148,7 @@ "\n", "# plot limits & labels\n", "ax1.set_ylim(0, np.max(max_fluxes))\n", - "ax1.set_xlim(np.min(all_waves), np.max(all_waves))\n", + "ax1.set_xlim(np.nanmin(all_waves), np.nanmax(all_waves))\n", "\n", "src118_fig.suptitle(f'Source {source_id}')\n", "ax1.set_title('GR150R & GR150C')\n", @@ -1152,7 +1172,7 @@ "id": "123375a9-4660-4205-9128-4b077bfc06dc", "metadata": {}, "source": [ - "##### Look at all of the sources for a single file\n", + "##### Here we look at all of the sources for a single file\n", "\n", "Note that some sources might not actually be extracting anything interesting. If this is the case, go back to your source catalog and images to ensure that you have the correct source identified and the target is centered in the cutout. This notebook uses simple examples to create and limit the source catalog, so it might not always show the most scientifically interesting sources." ] @@ -1170,9 +1190,9 @@ "cal_file = x1d_file.replace('x1d.fits', 'cal.fits')\n", "with fits.open(x1d_file) as x1d_hdu, fits.open(cal_file) as cal_hdu:\n", "\n", - " for ext in range(len(x1d_hdu))[1:-1]:\n", + " for x1d_data in x1d_hdu[1].data:\n", "\n", - " source_id = x1d_hdu[ext].header['SOURCEID']\n", + " source_id = x1d_data['SOURCE_ID']\n", "\n", " try:\n", " wh_x1d, wh_cal = find_source_ext(x1d_hdu, cal_hdu, source_id, info=False)\n", @@ -1180,7 +1200,6 @@ " # this means the source isn't in this observation\n", " continue\n", " \n", - " x1d_data = x1d_hdu[wh_x1d].data \n", " cal_data = cal_hdu[wh_cal].data\n", " \n", " plot_cutout_and_spectrum(cal_data, x1d_data, cal_file, x1d_file, source_id)" @@ -1191,7 +1210,7 @@ "id": "13a0285f-edb3-4796-8377-774a65d02ecb", "metadata": {}, "source": [ - "##### Visualize where the extracted sources are on the dispersed image, and how that compares to the direct image" + "##### Here we visualize where the extracted sources are on the dispersed image, and how that compares to the direct image" ] }, { @@ -1220,9 +1239,9 @@ "\n", "with fits.open(x1d_file) as x1d_hdu, fits.open(cal_file) as cal_hdu:\n", "\n", - " for ext in range(len(x1d_hdu))[1:-1]:\n", + " for x1d_data in x1d_hdu[1].data:\n", "\n", - " source_id = x1d_hdu[ext].header['SOURCEID']\n", + " source_id = x1d_data['SOURCE_ID']\n", "\n", " try:\n", " wh_x1d, wh_cal = find_source_ext(x1d_hdu, cal_hdu, source_id, info=False)\n", @@ -1230,7 +1249,6 @@ " # this means the source isn't in this observation\n", " continue\n", " \n", - " x1d_data = x1d_hdu[wh_x1d].data \n", " cal_data = cal_hdu[wh_cal].data\n", " \n", " # extraction box parameters from the header of the cal data:\n", @@ -1280,7 +1298,7 @@ "id": "b115e0b7-459b-4688-a27e-b8a0afb22da3", "metadata": {}, "source": [ - "Continue to explore further, including using the [spec3 stage](https://jwst-pipeline.readthedocs.io/en/latest/jwst/pipeline/calwebb_spec3.html) of the pipeline!" + "Continue to explore further, including using the [spec3 stage](https://jwst-pipeline.readthedocs.io/en/latest/jwst/pipeline/calwebb_spec3.html) of the pipeline which combines the multiple dither positions. Syntax for running the Spec3 stage can be found in the [NIRISS WFSS JWST Pipeline Notebook](https://spacetelescope.github.io/jwst-pipeline-notebooks/notebooks/NIRISS/WFSS/JWPipeNB-niriss-wfss.html)." ] }, { diff --git a/notebooks/NIRISS/NIRISS_WFSS_advanced/requirements.txt b/notebooks/NIRISS/NIRISS_WFSS_advanced/requirements.txt index 78be09038..5ff865da3 100644 --- a/notebooks/NIRISS/NIRISS_WFSS_advanced/requirements.txt +++ b/notebooks/NIRISS/NIRISS_WFSS_advanced/requirements.txt @@ -2,8 +2,8 @@ astropy >= 4.3.1 astroquery >= 0.4.7 crds glob2 -jdaviz >= 4.2.1 -jwst >= 1.12.5 +jdaviz >= 4.4.1 +jwst >= 1.19.0 matplotlib numpy pandas