Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add script to apply ESA quality mask #15

Merged
merged 6 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

## Usage
```bash
$ apply_s2_quality_mask INPUTS2DIR
```
```bash
$ check_solar_zenith_sentinel INPUTXML
```
```bash
Expand Down
Empty file.
188 changes: 188 additions & 0 deletions apply_s2_quality_mask/apply_s2_quality_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
from typing import List, Optional, Tuple, TypeVar
from pathlib import Path

import click
import rasterio
from lxml import etree


# MSI band number definitions for bands used by HLS v2 product
# (coastal, blue/green/red, NIR, SWIR1, SWIR2)
SPECTRAL_BANDS = frozenset(
{
"B01",
"B02",
"B03",
"B04",
"B8A",
"B11",
"B12",
}
)

# ensure compression is lossless to avoid changing values,
# https://gdal.org/en/stable/drivers/raster/jp2openjpeg.html#lossless-compression
JPEG2000_NO_COMPRESSION_OPTIONS = {
"QUALITY": "100",
"REVERSIBLE": "YES",
"YCBCR420": "NO",
}


T = TypeVar("T")


def _one_or_none(seq: List[T]) -> Optional[T]:
if len(seq) == 1:
return seq[0]


def find_affected_bands(granule_dir: Path) -> List[str]:
"""Check the granule GENERAL_QUALITY.xml for affected bands, if any"""
report_path = _one_or_none(list(granule_dir.glob("**/QI_DATA/GENERAL_QUALITY.xml")))

# We couldn't find the report, so check all quality masks
if report_path is None:
return list(SPECTRAL_BANDS)

quality_report = etree.parse(str(report_path))
root = quality_report.getroot()
# give default namespace a name for use in xpath
nsmap = {"qa": root.nsmap[None]}

# We have a XML structure like,
# <check>
# <inspection ... id="Data_Loss" status="FAILED">
# <message>There is data loss in this tile</message>
# <extraValues>
# <value name="Affected_Bands">B01 B02</value>
# </extraValues>
# </check>
#
# We want to grab the text from the "Affected_Bands" when the message
# indicates there is data loss in the tile.
data_loss_bands_txt = _one_or_none(
root.xpath(
(
".//qa:check[qa:message/text() = 'There is data loss in this tile']/"
"qa:extraValues/qa:value[@name = 'Affected_Bands']/text()"
),
namespaces=nsmap,
)
)
if data_loss_bands_txt is not None:
return data_loss_bands_txt.split(" ")
return []


def find_image_mask_pairs(
granule_dir: Path, bands: List[str]
) -> List[Tuple[Path, Path]]:
"""Search granule directory for image + mask pairs

The quality masks were produced in an imagery format since baseline 04.00
(~Jan 25, 2022 and onward), so this function might return nothing
if run on granules created with older baselines. These older baselines
encoded the mask quality information in the GML (vector) format.

The relevant parts of an unzipped granule looks like,
```
${GRANULE_ID}.SAFE
${GRANULE_ID}.SAFE/GRANULE
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/IMG_DATA
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/IMG_DATA/T45TXF_20221121T050121_B01.jp2
... (*B*.jp2)
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA
${GRANULE_ID}.SAFE/GRANULE/L1C_T45TXF_A038726_20221121T050115/QI_DATA/MSK_QUALIT_B01.jp2
... (MSK_QUALIT_B*.jp2)
```

References
----------
https://sentinels.copernicus.eu/web/sentinel/-/copernicus-sentinel-2-major-products-upgrade-upcoming
"""
pairs = []
for band in bands:
image = _one_or_none(list(granule_dir.glob(f"**/IMG_DATA/*{band}.jp2")))
mask = _one_or_none(list(granule_dir.glob(f"**/QI_DATA/MSK_QUALIT_{band}.jp2")))
if image and mask:
pairs.append((image, mask))

# Find all bands, or none
if len(pairs) == len(SPECTRAL_BANDS):
return pairs
return []


def apply_quality_mask(image: Path, mask: Path):
"""Apply Sentinel-2 image quality mask

Each spectral band (`IMG_DATA/B*.jp2`) has a corresponding quality mask
image (`QI_DATA/MSK_QUALIT_B*.jp2`) of the same spatial resolution. The mask
image has 8 bands, with each band encoding a boolean for the presence/absence
of a quality issue. The bands indicate,

1: lost ancillary packets
2: degraded ancillary packets
3: lost MSI packets
4: degraded MSI packets
5: defective pixels
6: no data
7: partially corrected cross talk
8: saturated pixels

We have decided to mask 3 (lost MSI packets) and 4 (degraded MSI packets) only.
So far, 5 and 7 are present in B10-B11, because interpolation is used to fill 5
and the cross talk is partially corrected.
Saturated pixels are still useful for the magnitude of the reflectance; keep it.

We apply the mask by updating the spectral data images in-place rather than
creating another file.

References
----------
https://sentinels.copernicus.eu/web/sentinel/technical-guides/sentinel-2-msi/data-quality-reports
"""
with rasterio.open(mask) as mask_src:
qa_bands = mask_src.read(indexes=[3, 4])
lost_degraded_mask = (qa_bands == 1).any(axis=0)

# only update imagery if mask shows it has any bad pixels
if lost_degraded_mask.any():
click.echo(f"Masking lost or degraded pixel values in {image}")
with rasterio.open(image, "r+", **JPEG2000_NO_COMPRESSION_OPTIONS) as img_src:
img = img_src.read(1)
# L1C images don't define the nodata value on file so we can't update the
# mask (e.g., via `write_mask`) but 0 is used as the nodata value
# (see SPECIAL_VALUE_INDEX for NODATA in metadata)
img[lost_degraded_mask] = 0
img_src.write(img, 1)


@click.command()
@click.argument(
"granule_dir",
type=click.Path(file_okay=False, dir_okay=True, exists=True),
)
@click.pass_context
def main(ctx, granule_dir: str):
"""Update Sentinel-2 imagery by masking lost or degraded pixels"""
granule_dir = Path(granule_dir)

affected_bands = find_affected_bands(granule_dir)
affected_hls_bands = SPECTRAL_BANDS.intersection(affected_bands)
if not affected_hls_bands:
click.echo(f"No bands are affected by data loss in {granule_dir}")
ctx.exit()

click.echo(f"Applying Sentinel-2 QAQC mask to granule_dir={granule_dir}")
image_mask_pairs = find_image_mask_pairs(granule_dir, affected_hls_bands)
for (image, mask) in image_mask_pairs:
apply_quality_mask(image, mask)

click.echo("Complete")


if __name__ == "__main__":
main()
5 changes: 4 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,17 @@
"click~=7.1.0",
"lxml",
"boto3~=1.17.91",
"espa-python-library @ git+https://github.com/USGS-EROS/[email protected]#egg=espa-python-library"
"espa-python-library @ git+https://github.com/USGS-EROS/[email protected]#egg=espa-python-library",
"rasterio~=1.2",
"numpy~=1.16",
],
include_package_data=True,
extras_require={
"dev": ["flake8", "black"],
"test": ["flake8", "pytest", "Jinja2==2.10.1", "moto[s3]~=2.0.8"]
},
entry_points={"console_scripts": [
"apply_s2_quality_mask=apply_s2_quality_mask.apply_s2_quality_mask:main",
"parse_fmask=parse_fmask.parse_fmask:main",
"check_solar_zenith_sentinel=check_solar_zenith_sentinel.check_solar_zenith_sentinel:main",
"check_solar_zenith_landsat=check_solar_zenith_landsat.check_solar_zenith_landsat:main",
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T12QXM_A048143_20240909T175112/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2A</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2024-07-23T03:00:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>06.03.00</Creator_Version>
<Creation_Date>UTC=2024-09-10T00:44:41</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2024-09-10T00:44:41.058Z" gippVersion="1.0" globalStatus="PASSED">
<checkList>
<parentID>S2A_OPER_GIP_OLQCPA_MPC__20240717T000043_V20240723T030000</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" url="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/"/>
<check>
<inspection creation="2024-09-10T00:44:40.633Z" duration="357" execution="2024-09-10T00:44:40.672Z" id="Solar_Angle_Zenith" item="S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/S2A_OPER_MTD_L1C_TL_2APS_20240909T231016_A048143_T12QXM.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (26.6116855171579) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">26.6116855171579</value>
</extraValues>
</check>
<check>
<inspection creation="2024-09-10T00:44:41.909Z" duration="16" execution="2024-09-10T00:44:41.914Z" id="checkMetadata" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2024-09-10T00:44:43.339Z" duration="11692" execution="2024-09-10T00:44:43.350Z" id="Data_Loss" item="S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11" itemURL="/mnt/nfs-l1-03/l1-processing/processing-id-65140-2024-09-09-23-34-02/TaskTable_20240909T233403/FORMAT_METADATA_TILE_L1C_ADAPT/output/PDI_DS_TILE_LIST/S2A_OPER_MSI_L1C_TL_2APS_20240909T231016_A048143_T12QXM_N05.11/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">No data loss in this tile.</message>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
<?xml version="1.0" encoding="UTF-8"?><Earth_Explorer_File xmlns="http://gs2.esa.int/DATA_STRUCTURE/olqcReport">
<Earth_Explorer_Header>
<Fixed_Header>
<File_Name>GRANULE/L1C_T43QDG_A031305_20230305T053523/QI_DATA/GENERAL_QUALITY.xml</File_Name>
<File_Description>Report produced by OLQC-SC</File_Description>
<Notes/>
<Mission>S2B</Mission>
<File_Class>OPER</File_Class>
<File_Type>REP_OLQCPA</File_Type>
<Validity_Period>
<Validity_Start>UTC=2022-08-30T00:25:00</Validity_Start>
<Validity_Stop>UTC=2100-01-01T00:00:00</Validity_Stop>
</Validity_Period>
<File_Version>2</File_Version>
<Source>
<System>OLQC-SC</System>
<Creator>OLQC-SC</Creator>
<Creator_Version>06.01.00</Creator_Version>
<Creation_Date>UTC=2023-03-05T08:36:44</Creation_Date>
</Source>
</Fixed_Header>
</Earth_Explorer_Header>
<Data_Block type="xml">
<report date="2023-03-05T08:36:44.399Z" gippVersion="1.0" globalStatus="FAILED">
<checkList>
<parentID>S2B_OPER_GIP_OLQCPA_MPC__20220715T000042_V20220830T002500</parentID>
<name>GENERAL_QUALITY</name>
<version>1.0</version>
<item class="http://www.esa.int/s2#pdi_level_1c_tile_container" className="PDI Level 1C Tile Folder" name="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" url="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/"/>
<check>
<inspection creation="2023-03-05T08:36:44.100Z" duration="246" execution="2023-03-05T08:36:44.145Z" id="checkMetadata" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Metadata file check. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Metadata file is present.</message>
</check>
<check>
<inspection creation="2023-03-05T08:36:45.387Z" duration="81" execution="2023-03-05T08:36:45.397Z" id="Solar_Angle_Zenith" item="S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/S2B_OPER_MTD_L1C_TL_2BPS_20230305T072429_A031305_T43QDG.xml" name="Check the zenith solar angle. " priority="5" processingStatus="done" status="PASSED"/>
<message contentType="text/plain">Zenith solar angle (37.349671777297) is within the threshold limit (82.0) </message>
<extraValues>
<value name="EXPECTED">82.0</value>
<value name="SOLAR_ZENITH_ANGLE">37.349671777297</value>
</extraValues>
</check>
<check>
<inspection creation="2023-03-05T08:36:45.870Z" duration="23651" execution="2023-03-05T08:36:45.877Z" id="Data_Loss" item="S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09" itemURL="/mnt/local/l1ctile/work/3be743a1-28c3-4010-84c9-8ccca1586310/ipf_output_L1CTile_20230305082123/L1CTile/TaskTable_20230305T082127/FORMAT_METADATA_TILE_L1C/output/PDI_DS_TILE_LIST/S2B_OPER_MSI_L1C_TL_2BPS_20230305T072429_A031305_T43QDG_N05.09/" name="Check TECQUA for data loss " priority="5" processingStatus="done" status="FAILED"/>
<message contentType="text/plain">There is data loss in this tile</message>
<extraValues>
<value name="Affected_Bands">B01 B02 B03 B09 B10 B11 B12 B8A</value>
</extraValues>
</check>
</checkList>
</report>
</Data_Block>
</Earth_Explorer_File>
Loading