From b91da314eb8d033ffae74a9b0863d131abda230c Mon Sep 17 00:00:00 2001 From: Mike Jacobi Date: Sun, 1 Sep 2024 10:22:09 -0700 Subject: [PATCH 1/7] created retrieve_dataset.py; started create_rslearn_data.py --- sentinel2_feedback/.gitignore | 3 + sentinel2_feedback/README.md | 41 ++++++ sentinel2_feedback/config.json | 52 ++++++++ sentinel2_feedback/create_rslearn_data.py | 151 +++++++++++++++++++++ sentinel2_feedback/dataset.sample.csv | 48 +++++++ sentinel2_feedback/feedback.sample.csv | 48 +++++++ sentinel2_feedback/retrieve_dataset.py | 154 ++++++++++++++++++++++ 7 files changed, 497 insertions(+) create mode 100644 sentinel2_feedback/.gitignore create mode 100644 sentinel2_feedback/README.md create mode 100644 sentinel2_feedback/config.json create mode 100644 sentinel2_feedback/create_rslearn_data.py create mode 100644 sentinel2_feedback/dataset.sample.csv create mode 100644 sentinel2_feedback/feedback.sample.csv create mode 100644 sentinel2_feedback/retrieve_dataset.py diff --git a/sentinel2_feedback/.gitignore b/sentinel2_feedback/.gitignore new file mode 100644 index 000000000..d19f7febf --- /dev/null +++ b/sentinel2_feedback/.gitignore @@ -0,0 +1,3 @@ +rslearn_data +chips +.DS_Store diff --git a/sentinel2_feedback/README.md b/sentinel2_feedback/README.md new file mode 100644 index 000000000..538e2a8c3 --- /dev/null +++ b/sentinel2_feedback/README.md @@ -0,0 +1,41 @@ +# Sentinel2 Feedback + +This project trains a model to determine if a given Sentinel-2 chip is valid or not. + +## Setup + +#### Dependencies + +```bash +python3 -m venv venv && source venv/bin/activate +pip install -r rslearn_projects/requirements.txt +``` + +#### Auth + +The `retrieve_dataset.py` script expects `--token`, which can be [accessed like this](https://api-int.skylight.earth/docs/#introduction-item-0). + +## Source data + +This model is trained on a dataset of Sentinel-2 chips that have been labeled as GOOD or BAD. +The feedback.csv file is an export from the Skylight In-App Feedback tool, filtered for +Sentinel-2 events. Each row in the feedback file has an event_id, a label, and a link to the +event in the Skylight app. + +A good way to generate a set of events to be labeled is to use the EAI [sample-events script](https://github.com/VulcanSkylight/eai/blob/master/ais/data/sample_events/sample-events.py#L1-L1) ([readme](https://github.com/VulcanSkylight/eai/blob/master/ais/data/sample_events/README.md#L1-L1)). + +## Dataset Pre-processing + +The `retrieve_dataset.py` script fetches event metadata from the Skylight API to identify the chip URL, and downloads the chip locally. +It outputs a csv file with the event_id, label, and local path to the chip, which is input into `create_rslearn_data.py`. + +``` +rslearn_projects/sentinel2_feedback $> +python retrieve_dataset.py --token $token --feedback_csv feedback.sample.csv --chips_dir chips --output_csv dataset.sample.csv +``` + +The `create_rslearn_data.py` script creates an rslearn dataset from the chips and labels. + +``` +python create_rslearn_data.py --dataset_csv dataset.sample.csv --out_dir rslearn_dataset +``` diff --git a/sentinel2_feedback/config.json b/sentinel2_feedback/config.json new file mode 100644 index 000000000..97a1b28c2 --- /dev/null +++ b/sentinel2_feedback/config.json @@ -0,0 +1,52 @@ +{ + "layers": { + "chips": { + "type": "raster", + "band_sets": [ + { + "dtype": "uint8", + "bands": [ + "R", + "G", + "B" + ], + "format": { + "name": "png" + } + } + ] + }, + "label": { + "type": "raster", + "band_sets": [ + { + "dtype": "uint8", + "bands": [ + "label" + ], + "format": { + "name": "geotiff" + } + } + ] + }, + "output": { + "type": "raster", + "band_sets": [ + { + "dtype": "uint8", + "bands": [ + "output" + ], + "format": { + "name": "geotiff" + } + } + ] + } + }, + "tile_store": { + "name": "file", + "root_dir": "tiles" + } +} \ No newline at end of file diff --git a/sentinel2_feedback/create_rslearn_data.py b/sentinel2_feedback/create_rslearn_data.py new file mode 100644 index 000000000..4e465d9bd --- /dev/null +++ b/sentinel2_feedback/create_rslearn_data.py @@ -0,0 +1,151 @@ +import argparse +import csv +import math +import os +import shutil +from datetime import datetime, timedelta + +from pydantic import BaseModel +from pyproj import CRS, Transformer +from rslearn.dataset import Window +from rslearn.utils import LocalFileAPI, Projection +from rslearn.utils.raster_format import GeotiffRasterFormat + + +class ArgsModel(BaseModel): + dataset_csv: str + out_dir: str + + +class Record(BaseModel): + event_id: str + label: str + lat: float + lon: float + chip_path: str + time: str + + +def latlon_to_utm_zone(lat, lon): + """Determine the UTM zone for a given latitude and longitude.""" + zone_number = math.floor((lon + 180) / 6) + 1 + if lat >= 0: + epsg_code = 32600 + zone_number # Northern Hemisphere + else: + epsg_code = 32700 + zone_number # Southern Hemisphere + return epsg_code + + +def create_projection(lat, lon, pixel_size=10): + """Creates a Projection object based on the center latitude and longitude.""" + epsg_code = latlon_to_utm_zone(lat, lon) + crs = CRS.from_epsg(epsg_code) + return Projection(crs=crs, x_resolution=pixel_size, y_resolution=pixel_size) + + +def calculate_bounds( + lat, lon, pixel_width, pixel_height, pixel_size, projection +) -> tuple[int, int, int, int]: + """Calculate the bounds of the image in the projected coordinates.""" + transformer = Transformer.from_crs("epsg:4326", projection.crs, always_xy=True) + center_x, center_y = transformer.transform(lon, lat) + + half_width = (pixel_width / 2) * pixel_size + half_height = (pixel_height / 2) * pixel_size + + min_x = center_x - half_width + max_x = center_x + half_width + min_y = center_y - half_height + max_y = center_y + half_height + + return (min_x, min_y, max_x, max_y) + + +def create_rslearn_data(args: ArgsModel): + with open(args.dataset_csv, mode="r") as file: + reader = csv.DictReader(file) + for row in reader: + record = Record(**row) + projection = create_projection(record.lat, record.lon) + + # Define pixel size and image dimensions (adjust if necessary) + pixel_size = 10 # 10 meters per pixel for Sentinel-2 + chip_size = 128 # 128x128 pixel image + + # Calculate the geographic bounds of the PNG image + bounds = calculate_bounds( + record.lat, + record.lon, + chip_size, + chip_size, + pixel_size, + projection, + ) + + timestamp = datetime.fromisoformat(record.time) + window_root = os.path.join( + args.out_dir, "windows", record.event_id, record.label + ) + os.makedirs(window_root, exist_ok=True) + + # Create the Window object + window = Window( + file_api=LocalFileAPI(window_root), + group="images", + name=record.event_id, + projection=projection, + bounds=bounds, + time_range=( + timestamp - timedelta(minutes=1), + timestamp + timedelta(minutes=1), + ), + ) + window.save() + + """ + populate the chip layer + + this works by copying the chip image to the chips directory. + """ + crop = None + file_api = window.file_api.get_folder("layers", "chips", "R_G_B") + GeotiffRasterFormat().encode_raster(file_api, projection, bounds, crop) + dst_chip_path = os.path.join(file_api.to_str(), "chip.png") + shutil.copyfile(record.chip_path, dst_chip_path) + complete_path = os.path.join(window_root, "layers", "chips", "completed") + os.system(f"touch {complete_path}") + + """ + populate the label layer + + this works by creating a dummy geotiff raster with the same bounds as the chip image + and a property to denote the label. + """ + file_api = window.file_api.get_folder("layers", "label", "label") + GeotiffRasterFormat().encode_raster(file_api, projection, bounds, crop) + # TODO write labels.json with the label + complete_path = os.path.join(window_root, "layers", "label", "completed") + os.system(f"touch {complete_path}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Creates rslearn data from a CSV of events." + ) + parser.add_argument( + "--dataset_csv", + type=str, + required=True, + help="Dataset CSV file which was the --output_csv from retrieve_dataset.py.", + ) + parser.add_argument( + "--out_dir", + type=str, + required=True, + help="Location of the rslearn dataset.", + ) + parsed_args = parser.parse_args() + args = ArgsModel(**vars(parsed_args)) # convert parsed args to pydantic model + os.makedirs(args.out_dir, exist_ok=True) + shutil.copyfile("config.json", os.path.join(args.out_dir, "config.json")) + create_rslearn_data(args) diff --git a/sentinel2_feedback/dataset.sample.csv b/sentinel2_feedback/dataset.sample.csv new file mode 100644 index 000000000..3065ec832 --- /dev/null +++ b/sentinel2_feedback/dataset.sample.csv @@ -0,0 +1,48 @@ +event_id,label,lat,lon,chip_path,time +S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1,GOOD,25.017882990705132,60.23224195097599,chips/S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1.png,2024-08-24T06:42:33.239990+00:00 +S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0,BAD,27.078108098808176,35.52532781443999,chips/S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0.png,2024-08-24T08:22:45.575754+00:00 +S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0,GOOD,47.19227985752899,143.99787657896204,chips/S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0.png,2024-08-24T01:33:59.727425+00:00 +S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1,GOOD,46.936012943130834,38.03339685817958,chips/S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1.png,2024-08-25T08:37:08.124548+00:00 +S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43,BAD,38.76248092887349,0.2297392844560888,chips/S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43.png,2024-08-24T10:50:29.694076+00:00 +S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9,BAD,47.53486254599003,-56.82706102831171,chips/S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9.png,2024-08-24T14:59:32.287809+00:00 +S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150,BAD,43.77709323764632,-71.4833862111625,chips/S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150.png,2024-08-24T15:50:54.484061+00:00 +S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1,GOOD,21.97476628808408,-77.29573256204905,chips/S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1.png,2024-08-24T15:57:04.172389+00:00 +S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0,BAD,23.35888197008924,-164.21252767350208,chips/S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0.png,2024-08-26T21:39:12.423209+00:00 +S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0,BAD,17.13641949330121,108.1284971882313,chips/S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0.png,2024-08-24T03:23:28.858875+00:00 +S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0,GOOD,56.78536939555178,-154.14466193690345,chips/S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0.png,2024-08-29T21:39:46.826253+00:00 +S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2,GOOD,14.632537026098486,-55.1752874982515,chips/S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2.png,2024-08-24T14:18:28.819872+00:00 +S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15,GOOD,35.60967346027558,139.96927073911502,chips/S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15.png,2024-08-24T01:37:17.948576+00:00 +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3,BAD,-6.126673229908011,-33.62482615288371,chips/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3.png,2024-08-24T12:43:31.653440+00:00 +S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1,BAD,-10.31850763736735,40.43420590451193,chips/S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1.png,2024-08-24T07:42:44.986989+00:00 +S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4,GOOD,-2.6807108599634453,141.36923533444912,chips/S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4.png,2024-08-24T00:57:44.324339+00:00 +S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1,BAD,29.55758252425047,-113.5569669261962,chips/S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1.png,2024-08-24T18:26:07.702165+00:00 +S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0,GOOD,-10.964371849111295,154.03240231072866,chips/S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0.png,2024-08-24T00:09:49.809478+00:00 +S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2,BAD,-1.2951197791084286,105.13955252791277,chips/S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2.png,2024-08-24T03:28:29.119102+00:00 +S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2,BAD,-11.072755196925801,13.689995669703219,chips/S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2.png,2024-08-24T09:23:46.860376+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13,BAD,-32.775232438717744,-71.51863454934579,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13.png,2024-08-25T14:52:08.565835+00:00 +S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1,GOOD,0.16036775089994446,-45.38292269098038,chips/S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1.png,2024-08-24T13:32:11.587972+00:00 +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21,BAD,-17.615640826869498,-149.6886734533582,chips/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21.png,2024-08-24T20:20:01.269311+00:00 +S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0,BAD,-11.3521054062511,-141.99423322739145,chips/S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0.png,2024-08-30T19:48:08.143433+00:00 +S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3,GOOD,-39.265515444133214,177.8776425311039,chips/S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3.png,2024-08-24T22:06:58.656794+00:00 +S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1,GOOD,-35.657133680475866,138.0251110845672,chips/S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1.png,2024-08-26T00:57:02.434192+00:00 +S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0,BAD,-39.36279991790451,-8.621806245141176,chips/S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0.png,2024-08-30T10:42:06.639789+00:00 +S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0,GOOD,-54.198397052803706,-36.572196387687825,chips/S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0.png,2024-08-24T12:07:07.092793+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1,GOOD,-36.72280065608185,-73.0085908356438,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1.png,2024-08-25T14:53:12.199095+00:00 +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0,BAD,-35.70119107403259,-108.99844949829743,chips/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0.png,2024-08-30T17:23:57.099301+00:00 +S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2,GOOD,-39.19865767307543,146.32244630315722,chips/S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2.png,2024-08-24T00:17:47.583229+00:00 +S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1,GOOD,-38.075509695491235,140.73563030466045,chips/S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1.png,2024-08-25T00:37:41.533281+00:00 +S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0,GOOD,-46.44868128536897,36.29774599839882,chips/S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0.png,2024-08-25T07:22:51.441690+00:00 +S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2,GOOD,-1.9397016415551755,104.84391409855048,chips/S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2.png,2024-08-24T03:28:46.966992+00:00 +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1,GOOD,-5.735759242662593,-33.56706209953899,chips/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1.png,2024-08-24T12:43:31.653440+00:00 +S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2,BAD,-11.41799391835017,153.19190545897223,chips/S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2.png,2024-08-24T00:09:53.581970+00:00 +S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0,GOOD,-55.09049854365485,-34.49673066246471,chips/S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0.png,2024-08-24T12:07:11.025822+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21,BAD,-37.63275425031095,-73.68884068128601,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21.png,2024-08-25T14:53:26.460918+00:00 +S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7,GOOD,-34.74716804694033,22.461706637069028,chips/S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7.png,2024-08-24T08:39:57.987175+00:00 +S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6,GOOD,-39.32373305869609,-61.552803953235156,chips/S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6.png,2024-08-25T14:03:33.282910+00:00 +S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0,GOOD,-5.052219890483417,42.48985424182737,chips/S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0.png,2024-08-24T07:41:08.965341+00:00 +S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1,BAD,-1.2374785264111514,-45.59247068956832,chips/S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1.png,2024-08-24T13:32:40.574802+00:00 +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0,BAD,-38.561280265472774,-109.43749331355173,chips/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0.png,2024-08-30T17:24:39.985774+00:00 +S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3,GOOD,-9.851853944223212,139.88511336824504,chips/S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3.png,2024-08-24T00:59:46.987789+00:00 +S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0,GOOD,-35.47986424941379,-121.83074968445467,chips/S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0.png,2024-08-30T18:14:12.299208+00:00 +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11,GOOD,-32.7733560898778,-71.51485799905284,chips/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11.png,2024-08-25T14:52:08.565835+00:00 +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8,GOOD,-17.617113327485605,-149.78059812071663,chips/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8.png,2024-08-24T20:20:01.269311+00:00 diff --git a/sentinel2_feedback/feedback.sample.csv b/sentinel2_feedback/feedback.sample.csv new file mode 100644 index 000000000..a5248872d --- /dev/null +++ b/sentinel2_feedback/feedback.sample.csv @@ -0,0 +1,48 @@ +event_id,event_type,username,value,timestamp,comments,additional_context,event_url +S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:58:03.663261Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:57:31.244421Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:57:21.001311Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T012659_N0511_R074_T54TYT_20240824T031649.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:57:09.784459Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T082601_N0511_R021_T37TDM_20240825T105755.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:56:56.478010Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T104021_N0511_R008_T31SBC_20240824T142025.SAFE_43?notification_type=event-history +S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:56:43.655799Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T145729_N0511_R082_T21TWN_20240824T182803.SAFE_9?notification_type=event-history +S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:56:28.330937Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T153931_N0511_R011_T19TCJ_20240824T205913.SAFE_150?notification_type=event-history +S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:56:14.747778Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T155521_N0511_R011_T17QRE_20240824T205926.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:55:57.390492Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240826T213901_N0511_R043_T03QWF_20240827T002829.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:55:40.642555Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T030519_N0511_R075_T48QZD_20240824T064532.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:55:28.134031Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240829T213531_N0511_R086_T05VMC_20240829T232900.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:55:17.256447Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T141711_N0511_R010_T21PXS_20240824T174028.SAFE_2?notification_type=event-history +S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:55:04.223047Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T012659_N0511_R074_T54SVE_20240824T031649.SAFE_15?notification_type=event-history +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:54:40.315874Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_3?notification_type=event-history +S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:54:29.778703Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T071621_N0511_R006_T37LFJ_20240824T110124.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:54:16.611818Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T005701_N0511_R002_T54MWC_20240824T033538.SAFE_4?notification_type=event-history +S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:54.069775Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T180919_N0511_R084_T12RTT_20240824T214239.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:53:40.258803Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T000729_N0511_R073_T56LPN_20240824T014112.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:32.331938Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T030519_N0511_R075_T48MWD_20240824T073032.SAFE_2?notification_type=event-history +S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:18.874314Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T085551_N0511_R007_T33LUH_20240824T130629.SAFE_2?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:53:07.738325Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_13?notification_type=event-history +S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:51:49.940125Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T133149_N0511_R081_T23NMA_20240824T151028.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:51:37.024503Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_21?notification_type=event-history +S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:47:19.224923Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240830T194651_N0511_R099_T07LCH_20240831T000725.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:47:06.242762Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T220619_N0511_R086_T60HWB_20240824T233423.SAFE_3?notification_type=event-history +S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:46:40.077309Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240826T004709_N0511_R102_T53HQA_20240826T021236.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:46:01.658667Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240830T104039_N0511_R022_T29HNS_20240830T153355.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:45:49.925992Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T120639_N0511_R080_T24FXE_20240824T134405.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:45:40.293322Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXE_20240825T175837.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:45:27.933699Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXF_20240830T220544.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:45:19.163767Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T001109_N0511_R073_T55HDS_20240824T025203.SAFE_2?notification_type=event-history +S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:55.572755Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T002701_N0511_R016_T54HVC_20240825T030743.SAFE_1?notification_type=event-history +S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:44.245745Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T072241_N0511_R020_T36GYP_20240825T104922.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:14.873989Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T030519_N0511_R075_T48MVC_20240824T073032.SAFE_2?notification_type=event-history +S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:44:00.749130Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T124311_N0511_R009_T25MDP_20240824T173954.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:43:47.404446Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T000729_N0511_R073_T56LNN_20240824T014112.SAFE_2?notification_type=event-history +S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:43:33.677807Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T120639_N0511_R080_T25FDU_20240824T134405.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:43:20.830347Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T18HXD_20240825T175837.SAFE_21?notification_type=event-history +S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:43:11.249676Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T080609_N0511_R078_T34HFG_20240824T115109.SAFE_7?notification_type=event-history +S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:42:48.199323Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240825T134701_N0511_R024_T20HPB_20240825T202404.SAFE_6?notification_type=event-history +S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:42:37.420031Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T071621_N0511_R006_T38MKV_20240824T110124.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:42:25.231095Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T133149_N0511_R081_T23MMU_20240824T151028.SAFE_1?notification_type=event-history +S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0,eo_sentinel2,mikej@allenai.org,BAD,2024-08-31T16:42:15.848664Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240830T172349_N0511_R026_T12HXC_20240830T220544.SAFE_0?notification_type=event-history +S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:42:04.889143Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240824T005701_N0511_R002_T54LUQ_20240824T033538.SAFE_3?notification_type=event-history +S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:41:51.342104Z,,,https://sc-integration.skylight.earth/event_id/S2A_MSIL1C_20240830T181401_N0511_R098_T10HFF_20240830T230705.SAFE_0?notification_type=event-history +S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:41:42.374140Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240825T143749_N0511_R096_T19HBD_20240825T175837.SAFE_11?notification_type=event-history +S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8,eo_sentinel2,mikej@allenai.org,GOOD,2024-08-31T16:41:15.095936Z,,,https://sc-integration.skylight.earth/event_id/S2B_MSIL1C_20240824T201909_N0511_R085_T06KTF_20240824T230604.SAFE_8?notification_type=event-history \ No newline at end of file diff --git a/sentinel2_feedback/retrieve_dataset.py b/sentinel2_feedback/retrieve_dataset.py new file mode 100644 index 000000000..488dcf8a5 --- /dev/null +++ b/sentinel2_feedback/retrieve_dataset.py @@ -0,0 +1,154 @@ +import argparse +import csv +import json +import logging +import os +import sys + +import requests +from pydantic import BaseModel + +SKYLIGHT_GRAPHQL_API = os.getenv( + "SKYLIGHT_GRAPHQL_API", "https://api-int.skylight.earth/graphql" +) + +logger = logging.getLogger(__name__) + + +class ArgsModel(BaseModel): + token: str + feedback_csv: str + chips_dir: str + output_csv: str + + +def query_event_by_id( + args: ArgsModel, session: requests.Session, event_id: str +) -> dict: + headers = { + "Authorization": f"Bearer {args.token}", + "Content-Type": "application/json", + } + query = { + "query": """ + query Event($eventId: ID!) { + event(eventId: $eventId) { + event_id + event_type + event_details { + image_url + } + start { + time + point { lat lon } + } + } + } + """, + "variables": { + "eventId": event_id, + }, + } + + response = session.post( + SKYLIGHT_GRAPHQL_API, + headers=headers, + data=json.dumps(query), + timeout=5, + ) + try: + response.raise_for_status() + if "errors" in response.json(): + raise requests.exceptions.HTTPError(response.json()["errors"]) + except requests.exceptions.HTTPError as e: + logger.error(response.text) + raise e + return response.json()["data"]["event"] + + +def download_chip(args: ArgsModel, event_data: dict) -> str: + event_id = event_data["event_id"] + chip_url = event_data["event_details"]["image_url"] + response = requests.get(chip_url, stream=True) + response.raise_for_status() + + output_path = os.path.join(args.chips_dir, f"{event_id}.png") + with open(output_path, "wb") as out_file: + for chunk in response.iter_content(chunk_size=8192): + out_file.write(chunk) + + return output_path + + +def process_events(args: ArgsModel, session: requests.Session): + with open(args.feedback_csv, mode="r") as file: + reader = csv.DictReader(file) + with open(args.output_csv, mode="w", newline="") as output_file: + fieldnames = ["event_id", "label", "lat", "lon", "chip_path", "time"] + writer = csv.DictWriter(output_file, fieldnames=fieldnames) + writer.writeheader() + + for row in reader: + event_id = row["event_id"] + print(f"Processing event {event_id}") + + # Query event by event_id + try: + event_data = query_event_by_id(args, session, event_id) + if not event_data: + raise Exception(f"No data found for event {event_id}") + + # Download the chip and get the local path + chip_path = download_chip(args, event_data) + + # Extract label and coordinates + label = row["value"] + point = event_data["start"]["point"] + + # Write to the output CSV + writer.writerow( + { + "event_id": event_id, + "label": label, + "lat": point["lat"], + "lon": point["lon"], + "chip_path": chip_path, + "time": event_data["start"]["time"], + } + ) + except Exception as e: + print(f"Failed to process event {event_id}: {e}") + raise e + + +if __name__ == "__main__": + session = requests.Session() + parser = argparse.ArgumentParser( + description="Retrieves chips for events from the Skylight API." + ) + parser.add_argument( + "--token", type=str, required=True, help="Authorization token for the API." + ) + parser.add_argument( + "--feedback_csv", + type=str, + required=True, + help="CSV file containing event eo_sentinel2 event ids and feedback labels.", + ) + parser.add_argument( + "--chips_dir", + type=str, + required=True, + help="Directory where to store the chips.", + ) + parser.add_argument( + "--output_csv", + type=str, + required=True, + help="Output CSV file to store the dataset information.", + ) + parsed_args = parser.parse_args() + args = ArgsModel(**vars(parsed_args)) # convert parsed args to pydantic model + + os.makedirs(args.chips_dir, exist_ok=True) + process_events(args, session) From 3e3b843eaa99ebe7487743a3baef15beb74fda64 Mon Sep 17 00:00:00 2001 From: Mike Jacobi Date: Mon, 9 Sep 2024 09:21:21 -0700 Subject: [PATCH 2/7] able to produce rslearn dataset and run prepare/ingest/materialize --- sentinel2_feedback/.gitignore | 1 + sentinel2_feedback/config.json | 46 +++--- sentinel2_feedback/create_rslearn_data.py | 147 +++++++++++--------- sentinel2_feedback/dataset.2rows-sample.csv | 3 + sentinel2_feedback/model_config.yaml | 69 +++++++++ 5 files changed, 171 insertions(+), 95 deletions(-) create mode 100644 sentinel2_feedback/dataset.2rows-sample.csv create mode 100644 sentinel2_feedback/model_config.yaml diff --git a/sentinel2_feedback/.gitignore b/sentinel2_feedback/.gitignore index d19f7febf..061aec967 100644 --- a/sentinel2_feedback/.gitignore +++ b/sentinel2_feedback/.gitignore @@ -1,3 +1,4 @@ rslearn_data chips +cache .DS_Store diff --git a/sentinel2_feedback/config.json b/sentinel2_feedback/config.json index 97a1b28c2..2a9c2e09c 100644 --- a/sentinel2_feedback/config.json +++ b/sentinel2_feedback/config.json @@ -1,6 +1,6 @@ { "layers": { - "chips": { + "sentinel2": { "type": "raster", "band_sets": [ { @@ -10,39 +10,27 @@ "G", "B" ], - "format": { - "name": "png" - } - } - ] - }, - "label": { - "type": "raster", - "band_sets": [ - { - "dtype": "uint8", - "bands": [ - "label" - ], "format": { "name": "geotiff" } } - ] + ], + "data_source": { + "name": "rslearn.data_sources.gcp_public_data.Sentinel2", + "index_cache_dir": "/Users/ai2/ai2-code/rslearn_projects/sentinel2_feedback/cache/", + "use_rtree_index": false, + "max_time_delta": "1d", + "sort_by": "cloud_cover", + "query_config": { + "max_matches": 6, + "space_mode": "CONTAINS" + }, + "time_offset": "-90d", + "duration": "180d" + } }, - "output": { - "type": "raster", - "band_sets": [ - { - "dtype": "uint8", - "bands": [ - "output" - ], - "format": { - "name": "geotiff" - } - } - ] + "label": { + "type": "vector" } }, "tile_store": { diff --git a/sentinel2_feedback/create_rslearn_data.py b/sentinel2_feedback/create_rslearn_data.py index 4e465d9bd..38a6ef2d9 100644 --- a/sentinel2_feedback/create_rslearn_data.py +++ b/sentinel2_feedback/create_rslearn_data.py @@ -1,15 +1,37 @@ import argparse import csv +import json import math import os import shutil from datetime import datetime, timedelta +from pathlib import Path +import shapely from pydantic import BaseModel -from pyproj import CRS, Transformer -from rslearn.dataset import Window -from rslearn.utils import LocalFileAPI, Projection -from rslearn.utils.raster_format import GeotiffRasterFormat +from pyproj import Transformer +from rslearn.const import WGS84_PROJECTION +from rslearn.dataset.window import Window +from rslearn.utils import get_utm_ups_crs +from rslearn.utils.geometry import Projection, STGeometry +from upath import UPath + +point_geojson = { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [[32, 32]], + }, + "properties": { + "label": None, + }, + } + ], + "properties": None, +} class ArgsModel(BaseModel): @@ -36,29 +58,42 @@ def latlon_to_utm_zone(lat, lon): return epsg_code -def create_projection(lat, lon, pixel_size=10): - """Creates a Projection object based on the center latitude and longitude.""" - epsg_code = latlon_to_utm_zone(lat, lon) - crs = CRS.from_epsg(epsg_code) - return Projection(crs=crs, x_resolution=pixel_size, y_resolution=pixel_size) - - def calculate_bounds( - lat, lon, pixel_width, pixel_height, pixel_size, projection + record: Record, projection: Projection ) -> tuple[int, int, int, int]: - """Calculate the bounds of the image in the projected coordinates.""" - transformer = Transformer.from_crs("epsg:4326", projection.crs, always_xy=True) - center_x, center_y = transformer.transform(lon, lat) - - half_width = (pixel_width / 2) * pixel_size - half_height = (pixel_height / 2) * pixel_size - - min_x = center_x - half_width - max_x = center_x + half_width - min_y = center_y - half_height - max_y = center_y + half_height - - return (min_x, min_y, max_x, max_y) + pixels_per_tile = 1024 + point = shapely.Point(record.lon, record.lat) + stgeometry = STGeometry(WGS84_PROJECTION, point, None) + geometry = stgeometry.to_projection(projection) + tile_col = int(geometry.shp.x) // pixels_per_tile + tile_row = int(geometry.shp.y) // pixels_per_tile + + bounds = ( + tile_col * pixels_per_tile, + tile_row * pixels_per_tile, + (tile_col + 1) * pixels_per_tile, + (tile_row + 1) * pixels_per_tile, + ) + return bounds + + +def get_label_data(record: Record, window: Window): + return { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Point", + "coordinates": [[32, 32]], + }, + "properties": { + "label": record.label, + }, + } + ], + "properties": window.projection.serialize(), + } def create_rslearn_data(args: ArgsModel): @@ -66,32 +101,21 @@ def create_rslearn_data(args: ArgsModel): reader = csv.DictReader(file) for row in reader: record = Record(**row) - projection = create_projection(record.lat, record.lon) - - # Define pixel size and image dimensions (adjust if necessary) pixel_size = 10 # 10 meters per pixel for Sentinel-2 - chip_size = 128 # 128x128 pixel image - - # Calculate the geographic bounds of the PNG image - bounds = calculate_bounds( - record.lat, - record.lon, - chip_size, - chip_size, - pixel_size, - projection, + crs = get_utm_ups_crs(record.lat, record.lon) + projection = Projection( + crs=crs, x_resolution=pixel_size, y_resolution=-pixel_size ) + bounds = calculate_bounds(record, projection) timestamp = datetime.fromisoformat(record.time) - window_root = os.path.join( - args.out_dir, "windows", record.event_id, record.label - ) + window_root = UPath(f"{args.out_dir}/windows/sentinel2/{record.event_id}") os.makedirs(window_root, exist_ok=True) # Create the Window object window = Window( - file_api=LocalFileAPI(window_root), - group="images", + path=window_root, + group=record.label, name=record.event_id, projection=projection, bounds=bounds, @@ -102,30 +126,17 @@ def create_rslearn_data(args: ArgsModel): ) window.save() - """ - populate the chip layer - - this works by copying the chip image to the chips directory. - """ - crop = None - file_api = window.file_api.get_folder("layers", "chips", "R_G_B") - GeotiffRasterFormat().encode_raster(file_api, projection, bounds, crop) - dst_chip_path = os.path.join(file_api.to_str(), "chip.png") - shutil.copyfile(record.chip_path, dst_chip_path) - complete_path = os.path.join(window_root, "layers", "chips", "completed") - os.system(f"touch {complete_path}") + # Populate the sentinel2 layer + image_layer_dir = os.path.join(window_root, "layers", "sentinel2", "R_G_B") + os.makedirs(image_layer_dir, exist_ok=True) + Path(f"{image_layer_dir}/completed").touch() - """ - populate the label layer - - this works by creating a dummy geotiff raster with the same bounds as the chip image - and a property to denote the label. - """ - file_api = window.file_api.get_folder("layers", "label", "label") - GeotiffRasterFormat().encode_raster(file_api, projection, bounds, crop) - # TODO write labels.json with the label - complete_path = os.path.join(window_root, "layers", "label", "completed") - os.system(f"touch {complete_path}") + # Populate the label layer + label_layer_dir = os.path.join(window_root, "layers", "label") + os.makedirs(label_layer_dir, exist_ok=True) + with open(os.path.join(label_layer_dir, "data.geojson"), "w") as f: + json.dump(get_label_data(record, window), f) + Path(f"{label_layer_dir}/completed").touch() if __name__ == "__main__": @@ -147,5 +158,9 @@ def create_rslearn_data(args: ArgsModel): parsed_args = parser.parse_args() args = ArgsModel(**vars(parsed_args)) # convert parsed args to pydantic model os.makedirs(args.out_dir, exist_ok=True) + + # Copy the model architecture definition to the window directory shutil.copyfile("config.json", os.path.join(args.out_dir, "config.json")) + shutil.copyfile("model_config.yaml", os.path.join(args.out_dir, "config.yaml")) + create_rslearn_data(args) diff --git a/sentinel2_feedback/dataset.2rows-sample.csv b/sentinel2_feedback/dataset.2rows-sample.csv new file mode 100644 index 000000000..28ce2323a --- /dev/null +++ b/sentinel2_feedback/dataset.2rows-sample.csv @@ -0,0 +1,3 @@ +event_id,label,lat,lon,chip_path,time +S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1,GOOD,25.017882990705132,60.23224195097599,chips/S2B_MSIL1C_20240824T062629_N0511_R077_T41RKH_20240824T090336.SAFE_1.png,2024-08-24T06:42:33.239990+00:00 +S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0,BAD,27.078108098808176,35.52532781443999,chips/S2B_MSIL1C_20240824T080609_N0511_R078_T36RYQ_20240824T103021.SAFE_0.png,2024-08-24T08:22:45.575754+00:00 \ No newline at end of file diff --git a/sentinel2_feedback/model_config.yaml b/sentinel2_feedback/model_config.yaml new file mode 100644 index 000000000..a8783887f --- /dev/null +++ b/sentinel2_feedback/model_config.yaml @@ -0,0 +1,69 @@ +model: + class_path: rslearn.train.lightning_module.RslearnLightningModule + init_args: + model: + class_path: rslearn.models.multitask.MultiTaskModel + init_args: + encoder: + - class_path: rslearn.models.swin.Swin + init_args: + input_channels: 3 + output_layers: [1, 3, 5, 7] + pretrained: true + decoders: + class: + - class_path: rslearn.models.pooling_decoder.PoolingDecoder + init_args: + in_channels: 1024 + out_channels: 2 + - class_path: rslearn.train.tasks.classification.ClassificationHead + lr: 0.0001 + plateau_factor: 0.1 + plateau_patience: 10 + plateau_min_lr: 0 + plateau_cooldown: 0 +data: + class_path: rslearn.train.data_module.RslearnDataModule + init_args: + root_dir: /data/favyenb/rslearn_landsat/2024-07-18-joe-check-training-phase1/ + inputs: + image: + data_type: "raster" + layers: ["chips"] + bands: ["R", "G", "B"] + passthrough: true + label: + data_type: "vector" + layers: ["label"] + task: + class_path: rslearn.train.tasks.multi_task.MultiTask + init_args: + tasks: + class: + class_path: rslearn.train.tasks.classification.ClassificationTask + init_args: + property_name: "label" + classes: ["GOOD", "BAD"] + input_mapping: + class: + label: "targets" + batch_size: 64 + num_workers: 32 + default_config: + transforms: + - class_path: rslearn.train.transforms.normalize.Normalize + init_args: + mean: 0 + std: 255 +trainer: + logger: + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + project: rslearn-sentinel2-feedback + log_model: "all" + name: phase1 + max_epochs: 50 + callbacks: + - class_path: lightning.pytorch.callbacks.LearningRateMonitor + init_args: + logging_interval: "epoch" From ec18bf6459ac9258b2e68bef3f2988b7a3adc99d Mon Sep 17 00:00:00 2001 From: Mike Jacobi Date: Mon, 9 Sep 2024 09:22:14 -0700 Subject: [PATCH 3/7] readme --- sentinel2_feedback/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentinel2_feedback/README.md b/sentinel2_feedback/README.md index 538e2a8c3..32500d238 100644 --- a/sentinel2_feedback/README.md +++ b/sentinel2_feedback/README.md @@ -37,5 +37,5 @@ python retrieve_dataset.py --token $token --feedback_csv feedback.sample.csv --c The `create_rslearn_data.py` script creates an rslearn dataset from the chips and labels. ``` -python create_rslearn_data.py --dataset_csv dataset.sample.csv --out_dir rslearn_dataset +python create_rslearn_data.py --dataset_csv dataset.2rows-sample.csv --out_dir rslearn_data ``` From 072b5d56249b35c45a3d81e4a3563eba8a09b7c6 Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 9 Sep 2024 09:39:37 -0700 Subject: [PATCH 4/7] fix indentation issue in model_config.yaml --- sentinel2_feedback/model_config.yaml | 108 +++++++++++++-------------- 1 file changed, 54 insertions(+), 54 deletions(-) diff --git a/sentinel2_feedback/model_config.yaml b/sentinel2_feedback/model_config.yaml index a8783887f..c3f501078 100644 --- a/sentinel2_feedback/model_config.yaml +++ b/sentinel2_feedback/model_config.yaml @@ -1,67 +1,67 @@ model: class_path: rslearn.train.lightning_module.RslearnLightningModule init_args: - model: - class_path: rslearn.models.multitask.MultiTaskModel - init_args: - encoder: - - class_path: rslearn.models.swin.Swin - init_args: - input_channels: 3 - output_layers: [1, 3, 5, 7] - pretrained: true - decoders: - class: - - class_path: rslearn.models.pooling_decoder.PoolingDecoder + model: + class_path: rslearn.models.multitask.MultiTaskModel + init_args: + encoder: + - class_path: rslearn.models.swin.Swin init_args: - in_channels: 1024 - out_channels: 2 - - class_path: rslearn.train.tasks.classification.ClassificationHead - lr: 0.0001 - plateau_factor: 0.1 - plateau_patience: 10 - plateau_min_lr: 0 - plateau_cooldown: 0 + input_channels: 3 + output_layers: [1, 3, 5, 7] + pretrained: true + decoders: + class: + - class_path: rslearn.models.pooling_decoder.PoolingDecoder + init_args: + in_channels: 1024 + out_channels: 2 + - class_path: rslearn.train.tasks.classification.ClassificationHead + lr: 0.0001 + plateau_factor: 0.1 + plateau_patience: 10 + plateau_min_lr: 0 + plateau_cooldown: 0 data: class_path: rslearn.train.data_module.RslearnDataModule init_args: - root_dir: /data/favyenb/rslearn_landsat/2024-07-18-joe-check-training-phase1/ - inputs: - image: - data_type: "raster" - layers: ["chips"] - bands: ["R", "G", "B"] - passthrough: true - label: - data_type: "vector" - layers: ["label"] - task: - class_path: rslearn.train.tasks.multi_task.MultiTask - init_args: - tasks: - class: - class_path: rslearn.train.tasks.classification.ClassificationTask + root_dir: /data/favyenb/rslearn_landsat/2024-07-18-joe-check-training-phase1/ + inputs: + image: + data_type: "raster" + layers: ["chips"] + bands: ["R", "G", "B"] + passthrough: true + label: + data_type: "vector" + layers: ["label"] + task: + class_path: rslearn.train.tasks.multi_task.MultiTask + init_args: + tasks: + class: + class_path: rslearn.train.tasks.classification.ClassificationTask + init_args: + property_name: "label" + classes: ["GOOD", "BAD"] + input_mapping: + class: + label: "targets" + batch_size: 64 + num_workers: 32 + default_config: + transforms: + - class_path: rslearn.train.transforms.normalize.Normalize init_args: - property_name: "label" - classes: ["GOOD", "BAD"] - input_mapping: - class: - label: "targets" - batch_size: 64 - num_workers: 32 - default_config: - transforms: - - class_path: rslearn.train.transforms.normalize.Normalize - init_args: - mean: 0 - std: 255 + mean: 0 + std: 255 trainer: logger: - class_path: lightning.pytorch.loggers.WandbLogger - init_args: - project: rslearn-sentinel2-feedback - log_model: "all" - name: phase1 + class_path: lightning.pytorch.loggers.WandbLogger + init_args: + project: rslearn-sentinel2-feedback + log_model: "all" + name: phase1 max_epochs: 50 callbacks: - class_path: lightning.pytorch.callbacks.LearningRateMonitor From 349e5c97411a8fda9fc9ce4dd469ee1a36b8e8ef Mon Sep 17 00:00:00 2001 From: Favyen Bastani Date: Mon, 9 Sep 2024 15:54:19 -0700 Subject: [PATCH 5/7] Fix config and window size. Config: remove settings that adjust the time range when looking up items in the data source. Window size: make it 128x128 and centered at the vessel instead of 1024x1024 and aligned on a grid. Also increase the time range so that it can match successfully, this may be because of using different timestamps reported by the Sentinel-2 API/image in different places. --- sentinel2_feedback/config.json | 5 +---- sentinel2_feedback/create_rslearn_data.py | 22 ++++++++++------------ 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/sentinel2_feedback/config.json b/sentinel2_feedback/config.json index 2a9c2e09c..d55e6fd4d 100644 --- a/sentinel2_feedback/config.json +++ b/sentinel2_feedback/config.json @@ -22,11 +22,8 @@ "max_time_delta": "1d", "sort_by": "cloud_cover", "query_config": { - "max_matches": 6, "space_mode": "CONTAINS" - }, - "time_offset": "-90d", - "duration": "180d" + } } }, "label": { diff --git a/sentinel2_feedback/create_rslearn_data.py b/sentinel2_feedback/create_rslearn_data.py index 38a6ef2d9..f3523d286 100644 --- a/sentinel2_feedback/create_rslearn_data.py +++ b/sentinel2_feedback/create_rslearn_data.py @@ -61,19 +61,17 @@ def latlon_to_utm_zone(lat, lon): def calculate_bounds( record: Record, projection: Projection ) -> tuple[int, int, int, int]: - pixels_per_tile = 1024 + window_size = 128 point = shapely.Point(record.lon, record.lat) stgeometry = STGeometry(WGS84_PROJECTION, point, None) geometry = stgeometry.to_projection(projection) - tile_col = int(geometry.shp.x) // pixels_per_tile - tile_row = int(geometry.shp.y) // pixels_per_tile - - bounds = ( - tile_col * pixels_per_tile, - tile_row * pixels_per_tile, - (tile_col + 1) * pixels_per_tile, - (tile_row + 1) * pixels_per_tile, - ) + + bounds = [ + int(geometry.shp.x) - window_size // 2, + int(geometry.shp.y) - window_size // 2, + int(geometry.shp.x) + window_size // 2, + int(geometry.shp.y) + window_size // 2, + ] return bounds @@ -120,8 +118,8 @@ def create_rslearn_data(args: ArgsModel): projection=projection, bounds=bounds, time_range=( - timestamp - timedelta(minutes=1), - timestamp + timedelta(minutes=1), + timestamp - timedelta(minutes=20), + timestamp + timedelta(minutes=20), ), ) window.save() From e799c10dce2b90a4c31f0ccec4803be87c9f0e6e Mon Sep 17 00:00:00 2001 From: Mike Jacobi Date: Tue, 24 Sep 2024 09:05:58 -0700 Subject: [PATCH 6/7] model tweaks --- sentinel2_feedback/config.json | 2 +- sentinel2_feedback/create_rslearn_data.py | 5 ----- sentinel2_feedback/model_config.yaml | 14 ++++++-------- 3 files changed, 7 insertions(+), 14 deletions(-) diff --git a/sentinel2_feedback/config.json b/sentinel2_feedback/config.json index d55e6fd4d..c40507dc8 100644 --- a/sentinel2_feedback/config.json +++ b/sentinel2_feedback/config.json @@ -17,7 +17,7 @@ ], "data_source": { "name": "rslearn.data_sources.gcp_public_data.Sentinel2", - "index_cache_dir": "/Users/ai2/ai2-code/rslearn_projects/sentinel2_feedback/cache/", + "index_cache_dir": "/Users/mikej/ai2/rslearn_projects/sentinel2_feedback/cache/", "use_rtree_index": false, "max_time_delta": "1d", "sort_by": "cloud_cover", diff --git a/sentinel2_feedback/create_rslearn_data.py b/sentinel2_feedback/create_rslearn_data.py index f3523d286..d436ac429 100644 --- a/sentinel2_feedback/create_rslearn_data.py +++ b/sentinel2_feedback/create_rslearn_data.py @@ -124,11 +124,6 @@ def create_rslearn_data(args: ArgsModel): ) window.save() - # Populate the sentinel2 layer - image_layer_dir = os.path.join(window_root, "layers", "sentinel2", "R_G_B") - os.makedirs(image_layer_dir, exist_ok=True) - Path(f"{image_layer_dir}/completed").touch() - # Populate the label layer label_layer_dir = os.path.join(window_root, "layers", "label") os.makedirs(label_layer_dir, exist_ok=True) diff --git a/sentinel2_feedback/model_config.yaml b/sentinel2_feedback/model_config.yaml index c3f501078..6d3ef6044 100644 --- a/sentinel2_feedback/model_config.yaml +++ b/sentinel2_feedback/model_config.yaml @@ -25,11 +25,11 @@ model: data: class_path: rslearn.train.data_module.RslearnDataModule init_args: - root_dir: /data/favyenb/rslearn_landsat/2024-07-18-joe-check-training-phase1/ + path: /Users/mikej/ai2/rslearn_projects/sentinel2_feedback/rslearn_data inputs: image: data_type: "raster" - layers: ["chips"] + layers: ["sentinel2"] bands: ["R", "G", "B"] passthrough: true label: @@ -44,6 +44,8 @@ data: init_args: property_name: "label" classes: ["GOOD", "BAD"] + metric_kwargs: + average: "micro" # makes the wandb accurancy metric more readable input_mapping: class: label: "targets" @@ -56,14 +58,10 @@ data: mean: 0 std: 255 trainer: - logger: - class_path: lightning.pytorch.loggers.WandbLogger - init_args: - project: rslearn-sentinel2-feedback - log_model: "all" - name: phase1 max_epochs: 50 callbacks: - class_path: lightning.pytorch.callbacks.LearningRateMonitor init_args: logging_interval: "epoch" +rslp_project: sentinel2_feedback +rslp_experiment: sentinel2_20240903_experiment1 From 32a9caa9335fdf49a979c0258c6e0ce290d91177 Mon Sep 17 00:00:00 2001 From: Mike Jacobi Date: Sun, 6 Oct 2024 09:38:44 -0700 Subject: [PATCH 7/7] tweaks to run via beaker --- sentinel2_feedback/config.json | 2 +- sentinel2_feedback/create_rslearn_data.py | 1 - sentinel2_feedback/model_config.yaml | 4 ++-- sentinel2_feedback/retrieve_dataset.py | 18 ++++++------------ 4 files changed, 9 insertions(+), 16 deletions(-) diff --git a/sentinel2_feedback/config.json b/sentinel2_feedback/config.json index c40507dc8..8122e9099 100644 --- a/sentinel2_feedback/config.json +++ b/sentinel2_feedback/config.json @@ -17,7 +17,7 @@ ], "data_source": { "name": "rslearn.data_sources.gcp_public_data.Sentinel2", - "index_cache_dir": "/Users/mikej/ai2/rslearn_projects/sentinel2_feedback/cache/", + "index_cache_dir": "/home/mikej/rslearn_projects/sentinel2_feedback/cache/", "use_rtree_index": false, "max_time_delta": "1d", "sort_by": "cloud_cover", diff --git a/sentinel2_feedback/create_rslearn_data.py b/sentinel2_feedback/create_rslearn_data.py index d436ac429..823d16b19 100644 --- a/sentinel2_feedback/create_rslearn_data.py +++ b/sentinel2_feedback/create_rslearn_data.py @@ -44,7 +44,6 @@ class Record(BaseModel): label: str lat: float lon: float - chip_path: str time: str diff --git a/sentinel2_feedback/model_config.yaml b/sentinel2_feedback/model_config.yaml index 6d3ef6044..647bd274d 100644 --- a/sentinel2_feedback/model_config.yaml +++ b/sentinel2_feedback/model_config.yaml @@ -25,7 +25,7 @@ model: data: class_path: rslearn.train.data_module.RslearnDataModule init_args: - path: /Users/mikej/ai2/rslearn_projects/sentinel2_feedback/rslearn_data + path: /home/mikej/rslearn_projects/sentinel2_feedback/joe_rslearn_data inputs: image: data_type: "raster" @@ -64,4 +64,4 @@ trainer: init_args: logging_interval: "epoch" rslp_project: sentinel2_feedback -rslp_experiment: sentinel2_20240903_experiment1 +rslp_experiment: sentinel2_20241006_joe_data diff --git a/sentinel2_feedback/retrieve_dataset.py b/sentinel2_feedback/retrieve_dataset.py index 488dcf8a5..79ba62347 100644 --- a/sentinel2_feedback/retrieve_dataset.py +++ b/sentinel2_feedback/retrieve_dataset.py @@ -18,7 +18,6 @@ class ArgsModel(BaseModel): token: str feedback_csv: str - chips_dir: str output_csv: str @@ -84,7 +83,7 @@ def process_events(args: ArgsModel, session: requests.Session): with open(args.feedback_csv, mode="r") as file: reader = csv.DictReader(file) with open(args.output_csv, mode="w", newline="") as output_file: - fieldnames = ["event_id", "label", "lat", "lon", "chip_path", "time"] + fieldnames = ["event_id", "label", "lat", "lon", "time"] writer = csv.DictWriter(output_file, fieldnames=fieldnames) writer.writeheader() @@ -99,7 +98,7 @@ def process_events(args: ArgsModel, session: requests.Session): raise Exception(f"No data found for event {event_id}") # Download the chip and get the local path - chip_path = download_chip(args, event_data) + # chip_path = download_chip(args, event_data) # Extract label and coordinates label = row["value"] @@ -112,13 +111,14 @@ def process_events(args: ArgsModel, session: requests.Session): "label": label, "lat": point["lat"], "lon": point["lon"], - "chip_path": chip_path, + # "chip_path": chip_path, "time": event_data["start"]["time"], } ) except Exception as e: print(f"Failed to process event {event_id}: {e}") - raise e + # raise e + continue if __name__ == "__main__": @@ -135,12 +135,6 @@ def process_events(args: ArgsModel, session: requests.Session): required=True, help="CSV file containing event eo_sentinel2 event ids and feedback labels.", ) - parser.add_argument( - "--chips_dir", - type=str, - required=True, - help="Directory where to store the chips.", - ) parser.add_argument( "--output_csv", type=str, @@ -150,5 +144,5 @@ def process_events(args: ArgsModel, session: requests.Session): parsed_args = parser.parse_args() args = ArgsModel(**vars(parsed_args)) # convert parsed args to pydantic model - os.makedirs(args.chips_dir, exist_ok=True) + # os.makedirs(args.chips_dir, exist_ok=True) process_events(args, session)