Skip to content

Commit f3c723b

Browse files
committed
modified stats and physical ranges
1 parent 182e5b2 commit f3c723b

File tree

1 file changed

+42
-47
lines changed

1 file changed

+42
-47
lines changed

gnn_model/configs/observation_config.yaml

Lines changed: 42 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ channel_weights:
4242
atms: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
4343
amsua: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
4444
avhrr: [1.0, 1.0, 1.0]
45-
surface_obs: [0.5, 1.0, 1.0, 1.0, 1.0, 1.0] # one per predicted feature (6)
45+
surface_obs: [0.5, 1.0, 1.0, 1.0, 1.0] # one per predicted feature (5): pressure, temp, dewpoint, wind_u, wind_v
4646
radiosonde: [1.0, 1.0, 1.0, 1.0]
4747
aircraft: [1.0, 1.0, 1.0, 1.0]
4848
ascat: [1.0, 1.0, 1.0]
@@ -155,35 +155,32 @@ observation_config:
155155
surface_obs:
156156
source: zarr
157157
zarr_name: raw_surface_obs
158-
# NOTE: wind_u and wind_v are computed from windSpeed and windDirection
159158
# height maps to height_pb_event_1 in the Zarr file
160159
# Using airPressure_pb_event_1 (station pressure) instead of pressureMeanSeaLevel_pb
161160
# because pressureMeanSeaLevel_pb appears to be all missing values (3.4e38)
162-
features: [airPressure_pb_event_1, airTemperature, dewPointTemperature, relativeHumidity, wind_u, wind_v]
161+
features: [airPressure_pb_event_1, airTemperature, dewPointTemperature, wind_u, wind_v]
163162
metadata: [height_pb_event_1]
164-
input_dim: 14 # 7 (geo/time enc) + 1 meta + 6 feats
165-
target_dim: 6
163+
input_dim: 13 # 7 (geo/time enc) + 1 meta + 5 feats
164+
target_dim: 5
166165
encoder_hidden_layers: 2
167166
decoder_hidden_layers: 2
168167
qc_filters:
169168
airPressure_pb_event_1:
170-
range: [300, 1200]
169+
range: [300, 1100] # hPa; physically reasonable surface pressure range
171170
qm_flag_col: airPressureQuality_event_1
172-
keep: [1,2,4,9]
171+
keep: [1,2]
173172
airTemperature:
174-
range: [-80, 60]
173+
range: [-90, 60] # Celsius; physically reasonable surface temperature range
175174
qm_flag_col: airTemperatureQuality_event_1
176175
keep: [1,2,4,9]
177176
dewPointTemperature:
178-
range: [-100, 40]
177+
range: [-100, 50] # Celsius; physically reasonable (Td ≤ T, can be very dry)
179178
qm_flag_col: dewPointTemperatureQuality_event_1
180179
keep: [1,2,4,9]
181-
relativeHumidity:
182-
range: [0, 100]
183180
windSpeed:
184-
range: [0, 75]
181+
range: [0, 100] # m/s; physically reasonable (hurricanes ~90 m/s, allow margin)
185182
windDirection:
186-
range: [0, 360] # degrees
183+
range: [0, 360] # degrees (NOTE: zarr metadata incorrectly says "radians")
187184

188185
qc_relations: # cross-variable checks
189186
dewpoint_le_temp: true # Td ≤ T (+0.5 °C margin)
@@ -211,84 +208,82 @@ observation_config:
211208
qc_filters:
212209
# Keep airPressure QC to filter bad pressure values (needed for log_pressure_height)
213210
airPressure:
214-
range: [1, 1200]
211+
range: [1, 1100] # hPa; physically reasonable (surface to high altitude)
215212
qm_flag_col: airPressureQuality
216213
keep: [1, 2]
217214
airTemperature:
218-
range: [173, 370]
215+
range: [-90, 50] # Celsius; physically reasonable atmospheric range
219216
qm_flag_col: airTemperatureQuality
220-
reject: [13, 14, 15,]
217+
reject: [13, 14, 15] # Reject bad flags
221218
dewPointTemperature:
222-
range: [135, 350]
219+
range: [-120, 40] # Celsius; physically reasonable (very dry stratosphere to humid surface)
223220
qm_flag_col: dewPointTemperatureQuality
224-
reject: [13, 14, 15,]
221+
reject: [13, 14, 15] # Reject bad flags
225222
windSpeed:
226-
range: [0, 75]
223+
range: [0, 150] # m/s; physically reasonable (jet stream can exceed 100 m/s)
227224
qm_flag_col: windQuality
228-
keep: [2,]
225+
keep: [2] # Keep only good flag 2
229226
windDirection:
230-
range: [0, 6.28] # radians
227+
range: [0, 6.28] # radians; full circle 0-2π
231228
qm_flag_col: windQuality
232-
keep: [2,]
229+
keep: [2] # Keep only good flag 2
233230

234231
aircraft:
235232
source: zarr
236233
zarr_name: aircraft
237234
# NOTE: Similar to radiosonde - airPressure used as vertical coordinate
238235
# Aircraft measures: Temperature, Wind components, and Humidity
239236
# UNITS: airTemperature=°C, airPressure=hPa, specificHumidity=kg/kg, windU/windV=m/s
240-
# Physical ranges based on 500k samples from aircraft_2022.zarr (P0.1-P99.9 with margins)
241237
features: [airTemperature, specificHumidity, windU, windV]
242238
metadata: [log_pressure_height] # Derived from airPressure: z = -8000 * ln(P/1013.25)
243239
input_dim: 12 # 7 (geo/time enc) + 1 meta + 4 feats [+8 pressure embedding added by model]
244240
target_dim: 4 # Predict T, q, u, v
245241
encoder_hidden_layers: 2
246242
decoder_hidden_layers: 2
247243
qc_filters:
248-
# QC filters based on actual data statistics
244+
# QC filters based on physical reasonableness
249245
airPressure:
250-
range: [100, 1100] # hPa (actual: 179-1018, mean=513)
246+
range: [100, 1100] # hPa; physically reasonable for aircraft altitude range
251247
qm_flag_col: airPressureQuality
252248
keep: [0, 1, 2]
253249
airTemperature:
254-
range: [-85, 40] # °C (actual: -70 to +29, mean=-22.7)
250+
range: [-90, 50] # Celsius; physically reasonable for atmospheric temperature
255251
qm_flag_col: airTemperatureQuality
256252
keep: [0, 1, 2]
257253
specificHumidity:
258-
range: [0.0, 0.030] # kg/kg (actual: 0-0.019, mean=0.0022)
254+
range: [0.0, 0.040] # kg/kg; physically reasonable (max ~35 g/kg at 40°C)
259255
qm_flag_col: specificHumidityQuality
260-
keep: [0, 1, 2]
256+
reject: [3, 9, 13, 14, 15] # Reject doubtful/bad flags
261257
windU:
262-
range: [-110, 110] # m/s (actual: -18 to +89, mean=19.2)
258+
range: [-100, 100] # m/s; physically reasonable (jet stream winds)
263259
qm_flag_col: windQuality
264260
keep: [0, 1, 2]
265261
windV:
266-
range: [-130, 130] # m/s (actual: -104 to +56, mean=1.8)
262+
range: [-100, 100] # m/s; physically reasonable (jet stream winds)
267263
qm_flag_col: windQuality
268264
keep: [0, 1, 2]
269265

270-
feature_stats: # Statistics for the features used in the model
266+
feature_stats: # Statistics for the features used in the model (with FULL QC filtering applied)
271267
surface_obs:
272-
airPressure_pb_event_1: [978.24, 57.32] # Station pressure (was pressureMeanSeaLevel_pb)
273-
airTemperature: [14.12, 12.15]
274-
dewPointTemperature: [8.07, 11.52]
275-
relativeHumidity: [74.38, 19.18]
276-
wind_u: [0.16, 3.64]
277-
wind_v: [0.10, 3.37]
268+
airPressure_pb_event_1: [985.30, 49.97] # hPa (station pressure); 2020-2024: 602M valid samples (95.8% pass QC)
269+
airTemperature: [13.99, 12.14] # Celsius; 2020-2024: 545M valid samples (87.2% pass QC)
270+
dewPointTemperature: [7.94, 11.47] # Celsius; 2020-2024: 485M valid samples (77.7% pass QC)
271+
wind_u: [0.03, 3.30] # m/s (u-component, computed from windSpeed/windDirection)
272+
wind_v: [-0.03, 3.37] # m/s (v-component, computed from windSpeed/windDirection)
278273

279274
radiosonde:
280-
airPressure: [319.23, 296.08] # hPa (2024 zarr: 42.9M samples)
281-
airTemperature: [238.30, 27.99] # K (2024 zarr: 33.1M samples)
282-
dewPointTemperature: [221.02, 34.03] # K (2024 zarr: 32.4M samples)
283-
wind_u: [7.66, 15.60] # m/s (computed from windSpeed/windDirection: 35.1M samples)
284-
wind_v: [-0.02, 9.05] # m/s (computed from windSpeed/windDirection: 35.1M samples)
275+
airPressure: [319.23, 296.08] # hPa (not in features, used as metadata for log_pressure_height)
276+
airTemperature: [-32.49, 28.82] # Celsius (as stored in zarr); 2020-2024: ~54% pass QC
277+
dewPointTemperature: [-47.40, 34.48] # Celsius (as stored in zarr); 2020-2024: ~52% pass QC
278+
wind_u: [4.95, 13.71] # m/s (computed from windSpeed/windDirection); 2020-2024: ~60% pass QC
279+
wind_v: [-0.31, 8.13] # m/s (computed from windSpeed/windDirection); 2020-2024: ~60% pass QC
285280

286281
aircraft:
287-
airPressure: [505.53, 274.24] # hPa (2024 zarr: 209.3M samples)
288-
airTemperature: [-18.39, 27.15] # °C (2024 zarr: 208.5M samples)
289-
specificHumidity: [0.0028, 0.0040] # kg/kg (2024 zarr: 17.0M samples, 8.1% valid)
290-
windU: [12.32, 15.96] # m/s (2024 zarr: 206.5M samples)
291-
windV: [0.30, 12.09] # m/s (2024 zarr: 206.5M samples)
282+
airPressure: [505.53, 274.24] # hPa (not in features, used as metadata for log_pressure_height)
283+
airTemperature: [-18.56, 27.68] # Celsius; 2020-2024: ~91% pass QC (flags 0,1,2)
284+
specificHumidity: [0.0038, 0.0043] # kg/kg; 2020-2024: ~6% pass QC (sparse data!)
285+
windU: [11.28, 15.43] # m/s; 2020-2024: ~92% pass QC (flags 0,1,2)
286+
windV: [0.27, 11.46] # m/s; 2020-2024: ~92% pass QC (flags 0,1,2)
292287

293288
atms:
294289
bt_channel_1: [211.00, 40.60]

0 commit comments

Comments
 (0)