diff --git a/sentinel2_feedback/.gitignore b/sentinel2_feedback/.gitignore new file mode 100644 index 00000000..061aec96 --- /dev/null +++ b/sentinel2_feedback/.gitignore @@ -0,0 +1,4 @@ +rslearn_data +chips +cache +.DS_Store diff --git a/sentinel2_feedback/README.md b/sentinel2_feedback/README.md new file mode 100644 index 00000000..32500d23 --- /dev/null +++ b/sentinel2_feedback/README.md @@ -0,0 +1,41 @@ +# Sentinel2 Feedback + +This project trains a model to determine if a given Sentinel-2 chip is valid or not. + +## Setup + +#### Dependencies + +```bash +python3 -m venv venv && source venv/bin/activate +pip install -r rslearn_projects/requirements.txt +``` + +#### Auth + +The `retrieve_dataset.py` script expects `--token`, which can be [accessed like this](https://api-int.skylight.earth/docs/#introduction-item-0). + +## Source data + +This model is trained on a dataset of Sentinel-2 chips that have been labeled as GOOD or BAD. +The feedback.csv file is an export from the Skylight In-App Feedback tool, filtered for +Sentinel-2 events. Each row in the feedback file has an event_id, a label, and a link to the +event in the Skylight app. + +A good way to generate a set of events to be labeled is to use the EAI [sample-events script](https://github.com/VulcanSkylight/eai/blob/master/ais/data/sample_events/sample-events.py#L1-L1) ([readme](https://github.com/VulcanSkylight/eai/blob/master/ais/data/sample_events/README.md#L1-L1)). + +## Dataset Pre-processing + +The `retrieve_dataset.py` script fetches event metadata from the Skylight API to identify the chip URL, and downloads the chip locally. +It outputs a csv file with the event_id, label, and local path to the chip, which is input into `create_rslearn_data.py`. + +``` +rslearn_projects/sentinel2_feedback $> +python retrieve_dataset.py --token $token --feedback_csv feedback.sample.csv --chips_dir chips --output_csv dataset.sample.csv +``` + +The `create_rslearn_data.py` script creates an rslearn dataset from the chips and labels. + +``` +python create_rslearn_data.py --dataset_csv dataset.2rows-sample.csv --out_dir rslearn_data +``` diff --git a/sentinel2_feedback/config.json b/sentinel2_feedback/config.json new file mode 100644 index 00000000..8122e909 --- /dev/null +++ b/sentinel2_feedback/config.json @@ -0,0 +1,37 @@ +{ + "layers": { + "sentinel2": { + "type": "raster", + "band_sets": [ + { + "dtype": "uint8", + "bands": [ + "R", + "G", + "B" + ], + "format": { + "name": "geotiff" + } + } + ], + "data_source": { + "name": "rslearn.data_sources.gcp_public_data.Sentinel2", + "index_cache_dir": "/home/mikej/rslearn_projects/sentinel2_feedback/cache/", + "use_rtree_index": false, + "max_time_delta": "1d", + "sort_by": "cloud_cover", + "query_config": { + "space_mode": "CONTAINS" + } + } + }, + "label": { + "type": "vector" + } + }, + "tile_store": { + "name": "file", + "root_dir": "tiles" + } +} \ No newline at end of file diff --git a/sentinel2_feedback/create_rslearn_data.py b/sentinel2_feedback/create_rslearn_data.py new file mode 100644 index 00000000..823d16b1 --- /dev/null +++ b/sentinel2_feedback/create_rslearn_data.py @@ -0,0 +1,158 @@ +import argparse +import csv +import json +import math +import os +import shutil +from datetime import datetime, timedelta +from pathlib import Path + +import shapely +from pydantic import BaseModel +from pyproj import Transformer +from rslearn.const import WGS84_PROJECTION +from rslearn.dataset.window import Window +from rslearn.utils import get_utm_ups_crs +from rslearn.utils.geometry import Projection, STGeometry +from upath import UPath + +point_geojson = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [[32, 32]], + }, + "properties": { + "label": None, + }, + } + ], + "properties": None, +} + + +class ArgsModel(BaseModel): + dataset_csv: str + out_dir: str + + +class Record(BaseModel): + event_id: str + label: str + lat: float + lon: float + time: str + + +def latlon_to_utm_zone(lat, lon): + """Determine the UTM zone for a given latitude and longitude.""" + zone_number = math.floor((lon + 180) / 6) + 1 + if lat >= 0: + epsg_code = 32600 + zone_number # Northern Hemisphere + else: + epsg_code = 32700 + zone_number # Southern Hemisphere + return epsg_code + + +def calculate_bounds( + record: Record, projection: Projection +) -> tuple[int, int, int, int]: + window_size = 128 + point = shapely.Point(record.lon, record.lat) + stgeometry = STGeometry(WGS84_PROJECTION, point, None) + geometry = stgeometry.to_projection(projection) + + bounds = [ + int(geometry.shp.x) - window_size // 2, + int(geometry.shp.y) - window_size // 2, + int(geometry.shp.x) + window_size // 2, + int(geometry.shp.y) + window_size // 2, + ] + return bounds + + +def get_label_data(record: Record, window: Window): + return { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [[32, 32]], + }, + "properties": { + "label": record.label, + }, + } + ], + "properties": window.projection.serialize(), + } + + +def create_rslearn_data(args: ArgsModel): + with open(args.dataset_csv, mode="r") as file: + reader = csv.DictReader(file) + for row in reader: + record = Record(**row) + pixel_size = 10 # 10 meters per pixel for Sentinel-2 + crs = get_utm_ups_crs(record.lat, record.lon) + projection = Projection( + crs=crs, x_resolution=pixel_size, y_resolution=-pixel_size + ) + + bounds = calculate_bounds(record, projection) + timestamp = datetime.fromisoformat(record.time) + window_root = UPath(f"{args.out_dir}/windows/sentinel2/{record.event_id}") + os.makedirs(window_root, exist_ok=True) + + # Create the Window object + window = Window( + path=window_root, + group=record.label, + name=record.event_id, + projection=projection, + bounds=bounds, + time_range=( + timestamp - timedelta(minutes=20), + timestamp + timedelta(minutes=20), + ), + ) + window.save() + + # Populate the label layer + label_layer_dir = os.path.join(window_root, "layers", "label") + os.makedirs(label_layer_dir, exist_ok=True) + with open(os.path.join(label_layer_dir, "data.geojson"), "w") as f: + json.dump(get_label_data(record, window), f) + Path(f"{label_layer_dir}/completed").touch() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Creates rslearn data from a CSV of events." + ) + parser.add_argument( + "--dataset_csv", + type=str, + required=True, + help="Dataset CSV file which was the --output_csv from retrieve_dataset.py.", + ) + parser.add_argument( + "--out_dir", + type=str, + required=True, + help="Location of the rslearn dataset.", + ) + parsed_args = parser.parse_args() + args = ArgsModel(**vars(parsed_args)) # convert parsed args to pydantic model + os.makedirs(args.out_dir, exist_ok=True) + + # Copy the model architecture definition to the window directory + shutil.copyfile("config.json", os.path.join(args.out_dir, "config.json")) + shutil.copyfile("model_config.yaml", os.path.join(args.out_dir, "config.yaml")) + + create_rslearn_data(args) diff --git a/sentinel2_feedback/dataset.2rows-sample.csv b/sentinel2_feedback/dataset.2rows-sample.csv new file mode 100644 index 00000000..28ce2323 --- /dev/null +++ b/sentinel2_feedback/dataset.2rows-sample.csv @@ -0,0 +1,3 @@ +event_id,label,lat,lon,chip_path,time +S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1,GOOD,25.017882990705132,60.23224195097599,chips/S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1.png,2024-08-24T06:42:33.239990+00:00 +S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0,BAD,27.078108098808176,35.52532781443999,chips/S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0.png,2024-08-24T08:22:45.575754+00:00 \ No newline at end of file diff --git a/sentinel2_feedback/dataset.sample.csv b/sentinel2_feedback/dataset.sample.csv new file mode 100644 index 00000000..3065ec83 --- /dev/null +++ b/sentinel2_feedback/dataset.sample.csv @@ -0,0 +1,48 @@ +event_id,label,lat,lon,chip_path,time +S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1,GOOD,25.017882990705132,60.23224195097599,chips/S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1.png,2024-08-24T06:42:33.239990+00:00 +S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0,BAD,27.078108098808176,35.52532781443999,chips/S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0.png,2024-08-24T08:22:45.575754+00:00 +S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0,GOOD,47.19227985752899,143.99787657896204,chips/S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0.png,2024-08-24T01:33:59.727425+00:00 +S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1,GOOD,46.936012943130834,38.03339685817958,chips/S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1.png,2024-08-25T08:37:08.124548+00:00 +S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43,BAD,38.76248092887349,0.2297392844560888,chips/S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43.png,2024-08-24T10:50:29.694076+00:00 +S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9,BAD,47.53486254599003,-56.82706102831171,chips/S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9.png,2024-08-24T14:59:32.287809+00:00 +S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150,BAD,43.77709323764632,-71.4833862111625,chips/S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150.png,2024-08-24T15:50:54.484061+00:00 +S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1,GOOD,21.97476628808408,-77.29573256204905,chips/S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1.png,2024-08-24T15:57:04.172389+00:00 +S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0,BAD,23.35888197008924,-164.21252767350208,chips/S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0.png,2024-08-26T21:39:12.423209+00:00 +S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0,BAD,17.13641949330121,108.1284971882313,chips/S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0.png,2024-08-24T03:23:28.858875+00:00 +S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0,GOOD,56.78536939555178,-154.14466193690345,chips/S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0.png,2024-08-29T21:39:46.826253+00:00 +S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2,GOOD,14.632537026098486,-55.1752874982515,chips/S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2.png,2024-08-24T14:18:28.819872+00:00 +S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15,GOOD,35.60967346027558,139.96927073911502,chips/S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15.png,2024-08-24T01:37:17.948576+00:00 +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3,BAD,-6.126673229908011,-33.62482615288371,chips/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3.png,2024-08-24T12:43:31.653440+00:00 +S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1,BAD,-10.31850763736735,40.43420590451193,chips/S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1.png,2024-08-24T07:42:44.986989+00:00 +S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4,GOOD,-2.6807108599634453,141.36923533444912,chips/S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4.png,2024-08-24T00:57:44.324339+00:00 +S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1,BAD,29.55758252425047,-113.5569669261962,chips/S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1.png,2024-08-24T18:26:07.702165+00:00 +S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0,GOOD,-10.964371849111295,154.03240231072866,chips/S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0.png,2024-08-24T00:09:49.809478+00:00 +S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2,BAD,-1.2951197791084286,105.13955252791277,chips/S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2.png,2024-08-24T03:28:29.119102+00:00 +S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2,BAD,-11.072755196925801,13.689995669703219,chips/S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2.png,2024-08-24T09:23:46.860376+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13,BAD,-32.775232438717744,-71.51863454934579,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13.png,2024-08-25T14:52:08.565835+00:00 +S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1,GOOD,0.16036775089994446,-45.38292269098038,chips/S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1.png,2024-08-24T13:32:11.587972+00:00 +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21,BAD,-17.615640826869498,-149.6886734533582,chips/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21.png,2024-08-24T20:20:01.269311+00:00 +S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0,BAD,-11.3521054062511,-141.99423322739145,chips/S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0.png,2024-08-30T19:48:08.143433+00:00 +S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3,GOOD,-39.265515444133214,177.8776425311039,chips/S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3.png,2024-08-24T22:06:58.656794+00:00 +S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1,GOOD,-35.657133680475866,138.0251110845672,chips/S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1.png,2024-08-26T00:57:02.434192+00:00 +S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0,BAD,-39.36279991790451,-8.621806245141176,chips/S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0.png,2024-08-30T10:42:06.639789+00:00 +S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0,GOOD,-54.198397052803706,-36.572196387687825,chips/S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0.png,2024-08-24T12:07:07.092793+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1,GOOD,-36.72280065608185,-73.0085908356438,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1.png,2024-08-25T14:53:12.199095+00:00 +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0,BAD,-35.70119107403259,-108.99844949829743,chips/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0.png,2024-08-30T17:23:57.099301+00:00 +S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2,GOOD,-39.19865767307543,146.32244630315722,chips/S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2.png,2024-08-24T00:17:47.583229+00:00 +S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1,GOOD,-38.075509695491235,140.73563030466045,chips/S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1.png,2024-08-25T00:37:41.533281+00:00 +S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0,GOOD,-46.44868128536897,36.29774599839882,chips/S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0.png,2024-08-25T07:22:51.441690+00:00 +S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2,GOOD,-1.9397016415551755,104.84391409855048,chips/S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2.png,2024-08-24T03:28:46.966992+00:00 +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1,GOOD,-5.735759242662593,-33.56706209953899,chips/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1.png,2024-08-24T12:43:31.653440+00:00 +S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2,BAD,-11.41799391835017,153.19190545897223,chips/S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2.png,2024-08-24T00:09:53.581970+00:00 +S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0,GOOD,-55.09049854365485,-34.49673066246471,chips/S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0.png,2024-08-24T12:07:11.025822+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21,BAD,-37.63275425031095,-73.68884068128601,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21.png,2024-08-25T14:53:26.460918+00:00 +S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7,GOOD,-34.74716804694033,22.461706637069028,chips/S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7.png,2024-08-24T08:39:57.987175+00:00 +S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6,GOOD,-39.32373305869609,-61.552803953235156,chips/S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6.png,2024-08-25T14:03:33.282910+00:00 +S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0,GOOD,-5.052219890483417,42.48985424182737,chips/S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0.png,2024-08-24T07:41:08.965341+00:00 +S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1,BAD,-1.2374785264111514,-45.59247068956832,chips/S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1.png,2024-08-24T13:32:40.574802+00:00 +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0,BAD,-38.561280265472774,-109.43749331355173,chips/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0.png,2024-08-30T17:24:39.985774+00:00 +S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3,GOOD,-9.851853944223212,139.88511336824504,chips/S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3.png,2024-08-24T00:59:46.987789+00:00 +S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0,GOOD,-35.47986424941379,-121.83074968445467,chips/S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0.png,2024-08-30T18:14:12.299208+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11,GOOD,-32.7733560898778,-71.51485799905284,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11.png,2024-08-25T14:52:08.565835+00:00 +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8,GOOD,-17.617113327485605,-149.78059812071663,chips/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8.png,2024-08-24T20:20:01.269311+00:00 diff --git a/sentinel2_feedback/feedback.sample.csv b/sentinel2_feedback/feedback.sample.csv new file mode 100644 index 00000000..a5248872 --- /dev/null +++ b/sentinel2_feedback/feedback.sample.csv @@ -0,0 +1,48 @@ +event_id,event_type,username,value,timestamp,comments,additional_context,event_url +S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:58:03.663261Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:57:31.244421Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:57:21.001311Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:57:09.784459Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:56:56.478010Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43?notification_type=event-history +S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:56:43.655799Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9?notification_type=event-history +S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:56:28.330937Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150?notification_type=event-history +S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:56:14.747778Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:55:57.390492Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:55:40.642555Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:55:28.134031Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:55:17.256447Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2?notification_type=event-history +S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:55:04.223047Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15?notification_type=event-history +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:54:40.315874Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3?notification_type=event-history +S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:54:29.778703Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:54:16.611818Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4?notification_type=event-history +S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:54.069775Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:53:40.258803Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:32.331938Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2?notification_type=event-history +S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:18.874314Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:07.738325Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13?notification_type=event-history +S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:51:49.940125Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:51:37.024503Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21?notification_type=event-history +S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:47:19.224923Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:47:06.242762Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3?notification_type=event-history +S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:46:40.077309Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:46:01.658667Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:45:49.925992Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:45:40.293322Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:45:27.933699Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:45:19.163767Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2?notification_type=event-history +S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:55.572755Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:44.245745Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:14.873989Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2?notification_type=event-history +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:00.749130Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:43:47.404446Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2?notification_type=event-history +S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:43:33.677807Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:43:20.830347Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21?notification_type=event-history +S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:43:11.249676Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7?notification_type=event-history +S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:42:48.199323Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6?notification_type=event-history +S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:42:37.420031Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:42:25.231095Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:42:15.848664Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:42:04.889143Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3?notification_type=event-history +S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:41:51.342104Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:41:42.374140Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11?notification_type=event-history +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:41:15.095936Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8?notification_type=event-history \ No newline at end of file diff --git a/sentinel2_feedback/model_config.yaml b/sentinel2_feedback/model_config.yaml new file mode 100644 index 00000000..647bd274 --- /dev/null +++ b/sentinel2_feedback/model_config.yaml @@ -0,0 +1,67 @@ +model: + class_path: rslearn.train.lightning_module.RslearnLightningModule + init_args: + model: + class_path: rslearn.models.multitask.MultiTaskModel + init_args: + encoder: + - class_path: rslearn.models.swin.Swin + init_args: + input_channels: 3 + output_layers: [1, 3, 5, 7] + pretrained: true + decoders: + class: + - class_path: rslearn.models.pooling_decoder.PoolingDecoder + init_args: + in_channels: 1024 + out_channels: 2 + - class_path: rslearn.train.tasks.classification.ClassificationHead + lr: 0.0001 + plateau_factor: 0.1 + plateau_patience: 10 + plateau_min_lr: 0 + plateau_cooldown: 0 +data: + class_path: rslearn.train.data_module.RslearnDataModule + init_args: + path: /home/mikej/rslearn_projects/sentinel2_feedback/joe_rslearn_data + inputs: + image: + data_type: "raster" + layers: ["sentinel2"] + bands: ["R", "G", "B"] + passthrough: true + label: + data_type: "vector" + layers: ["label"] + task: + class_path: rslearn.train.tasks.multi_task.MultiTask + init_args: + tasks: + class: + class_path: rslearn.train.tasks.classification.ClassificationTask + init_args: + property_name: "label" + classes: ["GOOD", "BAD"] + metric_kwargs: + average: "micro" # makes the wandb accurancy metric more readable + input_mapping: + class: + label: "targets" + batch_size: 64 + num_workers: 32 + default_config: + transforms: + - class_path: rslearn.train.transforms.normalize.Normalize + init_args: + mean: 0 + std: 255 +trainer: + max_epochs: 50 + callbacks: + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: "epoch" +rslp_project: sentinel2_feedback +rslp_experiment: sentinel2_20241006_joe_data diff --git a/sentinel2_feedback/retrieve_dataset.py b/sentinel2_feedback/retrieve_dataset.py new file mode 100644 index 00000000..79ba6234 --- /dev/null +++ b/sentinel2_feedback/retrieve_dataset.py @@ -0,0 +1,148 @@ +import argparse +import csv +import json +import logging +import os +import sys + +import requests +from pydantic import BaseModel + +SKYLIGHT_GRAPHQL_API = os.getenv( + "SKYLIGHT_GRAPHQL_API", "https://api-int.skylight.earth/graphql" +) + +logger = logging.getLogger(__name__) + + +class ArgsModel(BaseModel): + token: str + feedback_csv: str + output_csv: str + + +def query_event_by_id( + args: ArgsModel, session: requests.Session, event_id: str +) -> dict: + headers = { + "Authorization": f"Bearer {args.token}", + "Content-Type": "application/json", + } + query = { + "query": """ + query Event($eventId: ID!) { + event(eventId: $eventId) { + event_id + event_type + event_details { + image_url + } + start { + time + point { lat lon } + } + } + } + """, + "variables": { + "eventId": event_id, + }, + } + + response = session.post( + SKYLIGHT_GRAPHQL_API, + headers=headers, + data=json.dumps(query), + timeout=5, + ) + try: + response.raise_for_status() + if "errors" in response.json(): + raise requests.exceptions.HTTPError(response.json()["errors"]) + except requests.exceptions.HTTPError as e: + logger.error(response.text) + raise e + return response.json()["data"]["event"] + + +def download_chip(args: ArgsModel, event_data: dict) -> str: + event_id = event_data["event_id"] + chip_url = event_data["event_details"]["image_url"] + response = requests.get(chip_url, stream=True) + response.raise_for_status() + + output_path = os.path.join(args.chips_dir, f"{event_id}.png") + with open(output_path, "wb") as out_file: + for chunk in response.iter_content(chunk_size=8192): + out_file.write(chunk) + + return output_path + + +def process_events(args: ArgsModel, session: requests.Session): + with open(args.feedback_csv, mode="r") as file: + reader = csv.DictReader(file) + with open(args.output_csv, mode="w", newline="") as output_file: + fieldnames = ["event_id", "label", "lat", "lon", "time"] + writer = csv.DictWriter(output_file, fieldnames=fieldnames) + writer.writeheader() + + for row in reader: + event_id = row["event_id"] + print(f"Processing event {event_id}") + + # Query event by event_id + try: + event_data = query_event_by_id(args, session, event_id) + if not event_data: + raise Exception(f"No data found for event {event_id}") + + # Download the chip and get the local path + # chip_path = download_chip(args, event_data) + + # Extract label and coordinates + label = row["value"] + point = event_data["start"]["point"] + + # Write to the output CSV + writer.writerow( + { + "event_id": event_id, + "label": label, + "lat": point["lat"], + "lon": point["lon"], + # "chip_path": chip_path, + "time": event_data["start"]["time"], + } + ) + except Exception as e: + print(f"Failed to process event {event_id}: {e}") + # raise e + continue + + +if __name__ == "__main__": + session = requests.Session() + parser = argparse.ArgumentParser( + description="Retrieves chips for events from the Skylight API." + ) + parser.add_argument( + "--token", type=str, required=True, help="Authorization token for the API." + ) + parser.add_argument( + "--feedback_csv", + type=str, + required=True, + help="CSV file containing event eo_sentinel2 event ids and feedback labels.", + ) + parser.add_argument( + "--output_csv", + type=str, + required=True, + help="Output CSV file to store the dataset information.", + ) + parsed_args = parser.parse_args() + args = ArgsModel(**vars(parsed_args)) # convert parsed args to pydantic model + + # os.makedirs(args.chips_dir, exist_ok=True) + process_events(args, session)