@@ -42,7 +42,7 @@ channel_weights:
4242 atms : [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
4343 amsua : [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]
4444 avhrr : [1.0, 1.0, 1.0]
45- surface_obs : [0.5, 1.0, 1.0, 1.0, 1.0, 1.0 ] # one per predicted feature (6)
45+ surface_obs : [0.5, 1.0, 1.0, 1.0, 1.0] # one per predicted feature (5): pressure, temp, dewpoint, wind_u, wind_v
4646 radiosonde : [1.0, 1.0, 1.0, 1.0]
4747 aircraft : [1.0, 1.0, 1.0, 1.0]
4848 ascat : [1.0, 1.0, 1.0]
@@ -155,35 +155,32 @@ observation_config:
155155 surface_obs :
156156 source : zarr
157157 zarr_name : raw_surface_obs
158- # NOTE: wind_u and wind_v are computed from windSpeed and windDirection
159158 # height maps to height_pb_event_1 in the Zarr file
160159 # Using airPressure_pb_event_1 (station pressure) instead of pressureMeanSeaLevel_pb
161160 # because pressureMeanSeaLevel_pb appears to be all missing values (3.4e38)
162- features : [airPressure_pb_event_1, airTemperature, dewPointTemperature, relativeHumidity, wind_u, wind_v]
161+ features : [airPressure_pb_event_1, airTemperature, dewPointTemperature, wind_u, wind_v]
163162 metadata : [height_pb_event_1]
164- input_dim : 14 # 7 (geo/time enc) + 1 meta + 6 feats
165- target_dim : 6
163+ input_dim : 13 # 7 (geo/time enc) + 1 meta + 5 feats
164+ target_dim : 5
166165 encoder_hidden_layers : 2
167166 decoder_hidden_layers : 2
168167 qc_filters :
169168 airPressure_pb_event_1 :
170- range : [300, 1200]
169+ range : [300, 1100] # hPa; physically reasonable surface pressure range
171170 qm_flag_col : airPressureQuality_event_1
172- keep : [1,2,4,9 ]
171+ keep : [1,2]
173172 airTemperature :
174- range : [-80 , 60]
173+ range : [-90 , 60] # Celsius; physically reasonable surface temperature range
175174 qm_flag_col : airTemperatureQuality_event_1
176175 keep : [1,2,4,9]
177176 dewPointTemperature :
178- range : [-100, 40]
177+ range : [-100, 50] # Celsius; physically reasonable (Td ≤ T, can be very dry)
179178 qm_flag_col : dewPointTemperatureQuality_event_1
180179 keep : [1,2,4,9]
181- relativeHumidity :
182- range : [0, 100]
183180 windSpeed :
184- range : [0, 75]
181+ range : [0, 100] # m/s; physically reasonable (hurricanes ~90 m/s, allow margin)
185182 windDirection :
186- range : [0, 360] # degrees
183+ range : [0, 360] # degrees (NOTE: zarr metadata incorrectly says "radians")
187184
188185 qc_relations : # cross-variable checks
189186 dewpoint_le_temp : true # Td ≤ T (+0.5 °C margin)
@@ -211,84 +208,82 @@ observation_config:
211208 qc_filters :
212209 # Keep airPressure QC to filter bad pressure values (needed for log_pressure_height)
213210 airPressure :
214- range : [1, 1200]
211+ range : [1, 1100] # hPa; physically reasonable (surface to high altitude)
215212 qm_flag_col : airPressureQuality
216213 keep : [1, 2]
217214 airTemperature :
218- range : [173, 370]
215+ range : [-90, 50] # Celsius; physically reasonable atmospheric range
219216 qm_flag_col : airTemperatureQuality
220- reject : [13, 14, 15,]
217+ reject : [13, 14, 15] # Reject bad flags
221218 dewPointTemperature :
222- range : [135, 350]
219+ range : [-120, 40] # Celsius; physically reasonable (very dry stratosphere to humid surface)
223220 qm_flag_col : dewPointTemperatureQuality
224- reject : [13, 14, 15,]
221+ reject : [13, 14, 15] # Reject bad flags
225222 windSpeed :
226- range : [0, 75]
223+ range : [0, 150] # m/s; physically reasonable (jet stream can exceed 100 m/s)
227224 qm_flag_col : windQuality
228- keep : [2,]
225+ keep : [2] # Keep only good flag 2
229226 windDirection :
230- range : [0, 6.28] # radians
227+ range : [0, 6.28] # radians; full circle 0-2π
231228 qm_flag_col : windQuality
232- keep : [2,]
229+ keep : [2] # Keep only good flag 2
233230
234231 aircraft :
235232 source : zarr
236233 zarr_name : aircraft
237234 # NOTE: Similar to radiosonde - airPressure used as vertical coordinate
238235 # Aircraft measures: Temperature, Wind components, and Humidity
239236 # UNITS: airTemperature=°C, airPressure=hPa, specificHumidity=kg/kg, windU/windV=m/s
240- # Physical ranges based on 500k samples from aircraft_2022.zarr (P0.1-P99.9 with margins)
241237 features : [airTemperature, specificHumidity, windU, windV]
242238 metadata : [log_pressure_height] # Derived from airPressure: z = -8000 * ln(P/1013.25)
243239 input_dim : 12 # 7 (geo/time enc) + 1 meta + 4 feats [+8 pressure embedding added by model]
244240 target_dim : 4 # Predict T, q, u, v
245241 encoder_hidden_layers : 2
246242 decoder_hidden_layers : 2
247243 qc_filters :
248- # QC filters based on actual data statistics
244+ # QC filters based on physical reasonableness
249245 airPressure :
250- range : [100, 1100] # hPa (actual: 179-1018, mean=513)
246+ range : [100, 1100] # hPa; physically reasonable for aircraft altitude range
251247 qm_flag_col : airPressureQuality
252248 keep : [0, 1, 2]
253249 airTemperature :
254- range : [-85, 40 ] # °C (actual: -70 to +29, mean=-22.7)
250+ range : [-90, 50 ] # Celsius; physically reasonable for atmospheric temperature
255251 qm_flag_col : airTemperatureQuality
256252 keep : [0, 1, 2]
257253 specificHumidity :
258- range : [0.0, 0.030 ] # kg/kg (actual: 0-0.019, mean=0.0022 )
254+ range : [0.0, 0.040 ] # kg/kg; physically reasonable (max ~35 g/kg at 40°C )
259255 qm_flag_col : specificHumidityQuality
260- keep : [0, 1, 2]
256+ reject : [3, 9, 13, 14, 15] # Reject doubtful/bad flags
261257 windU :
262- range : [-110, 110 ] # m/s (actual: -18 to +89, mean=19.2 )
258+ range : [-100, 100 ] # m/s; physically reasonable (jet stream winds )
263259 qm_flag_col : windQuality
264260 keep : [0, 1, 2]
265261 windV :
266- range : [-130, 130 ] # m/s (actual: -104 to +56, mean=1.8 )
262+ range : [-100, 100 ] # m/s; physically reasonable (jet stream winds )
267263 qm_flag_col : windQuality
268264 keep : [0, 1, 2]
269265
270- feature_stats : # Statistics for the features used in the model
266+ feature_stats : # Statistics for the features used in the model (with FULL QC filtering applied)
271267 surface_obs :
272- airPressure_pb_event_1 : [978.24, 57.32] # Station pressure (was pressureMeanSeaLevel_pb)
273- airTemperature : [14.12, 12.15]
274- dewPointTemperature : [8.07, 11.52]
275- relativeHumidity : [74.38, 19.18]
276- wind_u : [0.16, 3.64]
277- wind_v : [0.10, 3.37]
268+ airPressure_pb_event_1 : [985.30, 49.97] # hPa (station pressure); 2020-2024: 602M valid samples (95.8% pass QC)
269+ airTemperature : [13.99, 12.14] # Celsius; 2020-2024: 545M valid samples (87.2% pass QC)
270+ dewPointTemperature : [7.94, 11.47] # Celsius; 2020-2024: 485M valid samples (77.7% pass QC)
271+ wind_u : [0.03, 3.30] # m/s (u-component, computed from windSpeed/windDirection)
272+ wind_v : [-0.03, 3.37] # m/s (v-component, computed from windSpeed/windDirection)
278273
279274 radiosonde :
280- airPressure : [319.23, 296.08] # hPa (2024 zarr: 42.9M samples )
281- airTemperature : [238.30, 27.99 ] # K (2024 zarr: 33.1M samples)
282- dewPointTemperature : [221.02 , 34.03] # K (2024 zarr: 32.4M samples)
283- wind_u : [7.66, 15.60 ] # m/s (computed from windSpeed/windDirection: 35.1M samples)
284- wind_v : [-0.02, 9.05 ] # m/s (computed from windSpeed/windDirection: 35.1M samples)
275+ airPressure : [319.23, 296.08] # hPa (not in features, used as metadata for log_pressure_height )
276+ airTemperature : [-32.49, 28.82 ] # Celsius (as stored in zarr); 2020-2024: ~54% pass QC
277+ dewPointTemperature : [-47.40 , 34.48] # Celsius (as stored in zarr); 2020-2024: ~52% pass QC
278+ wind_u : [4.95, 13.71 ] # m/s (computed from windSpeed/windDirection); 2020-2024: ~60% pass QC
279+ wind_v : [-0.31, 8.13 ] # m/s (computed from windSpeed/windDirection); 2020-2024: ~60% pass QC
285280
286281 aircraft :
287- airPressure : [505.53, 274.24] # hPa (2024 zarr: 209.3M samples )
288- airTemperature : [-18.39 , 27.15 ] # °C ( 2024 zarr: 208.5M samples )
289- specificHumidity : [0.0028 , 0.0040 ] # kg/kg ( 2024 zarr: 17.0M samples, 8.1% valid )
290- windU : [12.32 , 15.96 ] # m/s ( 2024 zarr: 206.5M samples )
291- windV : [0.30, 12.09 ] # m/s ( 2024 zarr: 206.5M samples )
282+ airPressure : [505.53, 274.24] # hPa (not in features, used as metadata for log_pressure_height )
283+ airTemperature : [-18.56 , 27.68 ] # Celsius; 2020- 2024: ~91% pass QC (flags 0,1,2 )
284+ specificHumidity : [0.0038 , 0.0043 ] # kg/kg; 2020- 2024: ~6% pass QC (sparse data! )
285+ windU : [11.28 , 15.43 ] # m/s; 2020- 2024: ~92% pass QC (flags 0,1,2 )
286+ windV : [0.27, 11.46 ] # m/s; 2020- 2024: ~92% pass QC (flags 0,1,2 )
292287
293288 atms :
294289 bt_channel_1 : [211.00, 40.60]
0 commit comments