-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathload_product.py
2559 lines (2279 loc) · 153 KB
/
load_product.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# -*- coding: utf-8 -*-
import os
import re
import warnings
import h5py
from scipy import constants
import scipy.interpolate as spin
import scipy.io.netcdf as spnc
import scipy.io as sio
from scipy import stats
from datetime import datetime, timedelta
from numpy import *
import pandas as pd
import xarray as xr
import pickle
import gsw
import seawater
import codecs
import geo_tools as gt
import time_tools as tt
import download_file as df
############# OCEAN DATA - OUTWARD-FACING FUNCTIONS ################
def argo_soccom(soccom_dir):
""" Processes existing SOCCOM float profiles in text format, e.g. from quarterly snapshot with DOI.
Example citation: see https://library.ucsd.edu/dc/object/bb0687110q
"""
save_to_floats = soccom_dir
# do a find-and-replace on data files to remove whitespace between some column names
for data_filename in os.listdir(save_to_floats):
if data_filename == 'README_snapshot.txt' or data_filename == 'get_FloatViz_data.m': continue
orig_file_as_list = codecs.open(save_to_floats + data_filename,'rb',encoding='latin-1').readlines()
new_file_as_list = []
for line in orig_file_as_list:
first_edit = line.replace('Lon [°E]', 'Lon[°E]')
second_edit = first_edit.replace('Lat [°N]', 'Lat[°N]')
new_file_as_list.append(second_edit)
out_file = codecs.open(save_to_floats + data_filename,'wb',encoding='latin-1')
out_file.writelines(new_file_as_list)
out_file.close()
def argo_gdac_load_index(save_to_root):
""" Accessor for index of locally stored Argo profiles from GDAC.
Returns dict 'argo_gdac_index' with keys:
'local_prof_index': full index of locally downloaded profiles in array format (see original file for details)
'wmoids': list of float WMOids
'num_profs': number of profiles in index
'num_floats': number of floats in index
"""
save_to_meta = save_to_root + 'Meta/'
local_index_filename = 'ar_index_local_prof.txt'
argo_gdac_index = {}
data_frame = pd.read_csv(save_to_meta + local_index_filename, header=-1, low_memory=False)
argo_gdac_index['local_prof_index'] = data_frame.values
argo_gdac_index['num_profs'] = argo_gdac_index['local_prof_index']
argo_gdac_index['wmoids'] = unique(argo_gdac_index['local_prof_index'][:,0])
argo_gdac_index['num_floats'] = len(argo_gdac_index['wmoids'])
return argo_gdac_index
def argo_soccom_load_index(save_to_soccom,save_to_UW_O2,include_UW_O2=True,verbose=False):
""" Accessor for index of locally stored Argo profiles from SOCCOM FloatViz (plus a basic index of UW-O2 data)).
Returns dict 'argo_soccom_index' with keys:
'num_floats': number of SOCCOM floats
'wmoids': list of SOCCOM float WMOids
'uwids': list of SOCCOM float University of Washington IDs (NOTE: these are strings, not ints)
'filenames': list of SOCCOM float data filenames
'profile_nums': list of arrays of profile numbers for each float
'UW_O2_wmoids': list of UW-O2 float WMOids (if include_UW_O2 is True)
'UW_O2_filenames': list of UW-O2 float data filenames
"""
save_to_floats = save_to_soccom
argo_soccom_index = {}
all_filenames = os.listdir(save_to_floats)
argo_soccom_index['num_floats'] = len(all_filenames)
argo_soccom_index['filenames'] = all_filenames
argo_soccom_index['wmoids'] = []
argo_soccom_index['uwids'] = []
argo_soccom_index['profile_nums'] = []
uwid_regexp = re.compile('//Univ. of Washington ID: ([0-9]*)')
if '.DS_Store' in all_filenames: all_filenames.remove('.DS_Store')
if 'README_snapshot.txt' in all_filenames: all_filenames.remove('README_snapshot.txt')
if 'get_FloatViz_data.m' in all_filenames: all_filenames.remove('get_FloatViz_data.m')
for filename in all_filenames:
if verbose:
print(filename)
header_line_counter = 0
with open(save_to_floats + filename, 'rb') as f:
for line in f:
if header_line_counter == 4:
argo_soccom_index['uwids'].append(uwid_regexp.findall(line.decode('latin-1'))[0])
if 'Cruise ' in line.decode('latin-1'):
break
header_line_counter += 1
data_frame = pd.read_csv(save_to_floats + filename, header=header_line_counter, delim_whitespace=True,
na_values=-1e10, encoding='latin-1')
argo_soccom_index['wmoids'].append(unique(data_frame['Cruise'].values)[0])
argo_soccom_index['profile_nums'].append(unique(data_frame['Station'].values))
if include_UW_O2:
save_to_floats = save_to_UW_O2
all_filenames = os.listdir(save_to_floats)
if '.DS_Store' in all_filenames: all_filenames.remove('.DS_Store')
filename_regexp_1_1 = re.compile('UW_O2_V1_1.*.WMO([0-9]*).*.nc')
filename_regexp_1_2b = re.compile('UW_O2_V1.2b.*.WMO([0-9]*).*.nc')
argo_soccom_index['UW_O2_wmoids'] = []
argo_soccom_index['UW_O2_filenames'] = []
for filename in all_filenames:
if filename[-3:] == '.nc':
try:
this_wmoid = int(filename_regexp_1_2b.findall(filename)[0])
new_version = True
except IndexError:
this_wmoid = int(filename_regexp_1_1.findall(filename)[0])
new_version = False
if this_wmoid not in argo_soccom_index['UW_O2_wmoids']:
argo_soccom_index['UW_O2_wmoids'].append(this_wmoid)
argo_soccom_index['UW_O2_filenames'].append(filename)
elif new_version:
argo_soccom_index['UW_O2_wmoids'][argo_soccom_index['UW_O2_wmoids'].index(this_wmoid)] = this_wmoid
argo_soccom_index['UW_O2_filenames'][argo_soccom_index['UW_O2_wmoids'].index(this_wmoid)] = filename
return argo_soccom_index
def argo_gdac_float_meta(profile_index,float_wmoid):
""" Accessor for metadata on a specific Argo float from GDAC.
Returns dict 'this_float_meta' relevant to the given float's profiles, with following keys:
'num_profs': number of profiles (integer)
'prof_nums': array of profile numbers (integer form, so '_000D' and '_000' would both be integer 0)
'prof_nums_full': array of profile numbers (full string [alphanumeric] form, preserving, e.g. '_000D')
'prof_datetimes': array of profile datetimes (18-digit integer format)
'prof_statuses': array of profile statuses (e.g. 'D' for delayed mode)
'prof_lats': array of profile latitudes
'prof_lons': array of profile longitudes
'prof_position_flags': array of profile position QC flags (1 = likely good, 2 = interpolated, assumed under ice, 9 = bad)
'prof_filenames': array of profile filenames
"""
this_float_mask = profile_index[:,0] == float_wmoid
prof_filenames = profile_index[this_float_mask, 1]
this_float_num_profs = len(prof_filenames)
filename_regexp_full = re.compile('[A-Z][0-9]*_([0-9]*[A-Z]*).nc')
filename_regexp_int = re.compile('[A-Z][0-9]*_([0-9]*)[A-Z]*.nc')
this_float_meta = {}
this_float_meta['num_profs'] = this_float_num_profs
this_float_meta['prof_nums'] = [int(filename_regexp_int.findall(prof_filenames[n])[0]) for n in range(this_float_num_profs)]
this_float_meta['prof_nums_full'] = [filename_regexp_full.findall(prof_filenames[n])[0] for n in range(this_float_num_profs)]
this_float_meta['prof_datetimes'] = profile_index[this_float_mask, 5]
this_float_meta['prof_statuses'] = profile_index[this_float_mask, 2]
this_float_meta['prof_lats'] = profile_index[this_float_mask, 6]
this_float_meta['prof_lons'] = profile_index[this_float_mask, 7]
this_float_meta['prof_position_flags'] = profile_index[this_float_mask, 3]
this_float_meta['prof_filenames'] = profile_index[this_float_mask, 1]
return this_float_meta
def argo_float_data(wmoid,argo_gdac_dir,argo_gdac_index,argo_soccom_index,prof_nums='all',smooth_sal=True,
compute_extras=False,smooth_N2_PV=False,smooth_N2_PV_window=25.0,use_unadjusted=False,
use_UW_O2_not_SOCCOM=False,verbose=False,correct_5904468_interim=True,allow_bad_soccom_qc=False):
""" Accessor for Argo profile data. Parses/aggregates netCDF-3 files from GDAC, FloatViz text files from SOCCOM,
and UW O2 v1.1 and assorted v1.2b netCDF-4 files from Robert Drucker.
Args:
wmoid: int (disallows the handful [one? two?] of early SOCCOM floats without WMOids)
argo_gdac_dir: add 'Profiles/' + this_float_meta['prof_filenames'][prof] to get a GDAC profile
add 'Argo_index_pickles/' + argo_soccom_index['filenames'][float] to get a SOCCOM float
argo_gdac_index: index of locally stored profiles from GDAC
argo_soccom_index: index of locally stored float data files from SOCCOM FloatViz
prof_nums: 'all', or specified as array of ints
smooth_sal ([True] or False): apply mild 1-D quadratic smoothing spline fit to salinity measurements to
eliminate artificial "staircases" at depth due to 0.001 psu resolution of data
compute_extras (True or [False]): compute N^2 (buoyancy frequency / Brunt-Väisäla frequency squared),
approximate isopycnic potential vorticity (IPV), and buoyancy loss required
for convection to reach each observed depth
note: attached 'pres' vectors for 'Nsquared' and 'PV' represent midpoints of
original CTD pressures
smooth_N2_PV (True or [False]): apply running average smoothing to noisy Nsquared and PV data
smooth_N2_PV_window: running average window (in meters)
use_unadjusted (True or [False]): import unadjusted profiles and ignore bad QC flags
use_UW_O2_not_SOCCOM (True or [False]): if available, use UW-calibrated O2 profiles instead of SOCCOM FloatViz
correct_5904468_interim ([True] or False): apply interim salinity drift correction to 5904468 (see below)
allow_bad_soccom_qc (True or [False]): allow SOCCOM data with QC flag of '4' (questionable) as well as '0' (good)
Returns: dict 'this_float_data' with keys:
wmoid: int (WMOid)
uwid: 'unknown_or_not_applicable' or str (University of Washington ID; will only fill in for SOCCOM floats)
is_soccom: True or False (is this a SOCCOM float?)
is_uw_o2: True or False (does this float have O2 data corrected by UW?)
institution: string, e.g. 'UW'
num_profs: int (number of profiles in GDAC; not necessarily same as in SOCCOM FloatViz file;
also does not reflect removal of profiles with bad QC in this function)
profiles: list of dicts for each profile in GDAC, with keys:
prof_num: int (profile number; integer form, so '_000D' and '_000' would both be integer 0)
prof_num_full: string (GDAC full profile number; alphanumeric form, preserving, e.g. '_000D')
datetime: int (GDAC datetime; 14-digit integer format)
status: str (GDAC profile status, e.g. 'D' for delayed mode)
lat: float (GDAC latitude)
lon: float (GDAC longitude; -180 to 180)
position_flag: int (GDAC profile position QC flag, as below):
1 = likely good, 2 = interpolated, assumed under ice, 9 = bad
DATA PARAMETERS: from GDAC: temp, ptmp, ctmp, psal, asal, sigma_theta, Nsquared, PV, destab (if included)
note: if NUM_PROFS > 1, this will be ignoring all except the first profile
note: these GDAC parameters are guaranteed to have identical pres/depth arrays
from SOCCOM or UW-O2: SEE ARRAY 'soccom_param_names' FOR PARAMETER NAMES
each is a dict with keys:
data: array of measured values, in direction of increasing depth (surface downwards)
name: string of name of parameter, formatted for a plot axis
pres: array of corresponding pressure (dbar)
depth: array of corresponding depths (m, positive)
units: string of units of data
Example:
this_float_data['wmoid']
this_float_data['num_profs']
len(this_float_data['profiles']) or consider for profile in this_float_data['profiles']
this_float_data['profiles'][55]['prof_num']
this_float_data['profiles'][55]['ptmp']['data']
this_float_data['profiles'][55]['ptmp']['pres']
this_float_data['profiles'][55]['ptmp']['units']
"""
save_to_gdac_dir = argo_gdac_dir + 'Profiles/'
save_to_soccom_dir = argo_gdac_dir + 'SOCCOM/'
save_to_UW_O2_dir = argo_gdac_dir + 'UW-O2/'
soccom_params_to_save = array(['Oxygen[µmol/kg]','OxygenSat[%]','Nitrate[µmol/kg]','Chl_a[mg/m^3]',
'Chl_a_corr[mg/m^3]','b_bp700[1/m]','b_bp_corr[1/m]','POC[mmol/m^3]',
'pHinsitu[Total]','pH25C[Total]','TALK_LIAR[µmol/kg]','DIC_LIAR[µmol/kg]',
'pCO2_LIAR[µatm]'])
soccom_param_abbrevs = array(['Oxygen','OxygenSat','Nitrate','Chl_a','Chl_a_corr','b_bp700','b_bp_corr','POC',
'pHinsitu','pH25C','TALK_LIAR','DIC_LIAR','pCO2_LIAR'])
soccom_param_names = array(['Oxygen', 'Oxygen saturation', 'Nitrate', 'Chl-a', 'Chl-a', 'Backscatter (700 nm)',
'Backscatter', 'POC', 'In-situ pH', 'pH25C', 'Total alkalinity', 'DIC', 'pCO2'])
soccom_units_names = array(['µmol/kg','%','µmol/kg',r'mg/m$^3$',r'mg/m$^3$','1/m','1/m',r'mmol/m$^3$','Total','Total',
'µmol/kg','µmol/kg','µatm'])
this_float_meta = argo_gdac_float_meta(argo_gdac_index['local_prof_index'], wmoid)
this_float_data = {}
this_float_data['wmoid'] = wmoid
this_float_data['num_profs'] = this_float_meta['num_profs']
this_float_data['profiles'] = []
# load GDAC profiles
for prof_index, prof in enumerate(this_float_meta['prof_nums']):
if verbose:
print('examining profile #: ' + str(prof))
if prof_nums is not 'all':
if prof not in prof_nums:
continue
# load profile netCDF file; save institution
gdac_prof_file = spnc.netcdf_file(save_to_gdac_dir + this_float_meta['prof_filenames'][prof_index],'r',mmap=False)
if prof_index is 1:
try:
this_float_data['institution'] = str(gdac_prof_file.institution,'utf-8')
except:
this_float_data['institution'] = 'Unknown institution'
# ignore this profile if it has major QC issues
gdac_prof_position_flag = this_float_meta['prof_position_flags'][prof_index]
if gdac_prof_position_flag is 9: # bad QC!
continue
gdac_data_mode = str(gdac_prof_file.variables['DATA_MODE'][0], 'utf-8')
if (gdac_data_mode is 'D' or gdac_data_mode is 'A') and use_unadjusted is False:
var_suffix = '_ADJUSTED'
qc_var_suffix = '_ADJUSTED_QC'
else:
var_suffix = ''
qc_var_suffix = '_QC'
try:
gdac_pres_qc = gdac_prof_file.variables['PRES' + qc_var_suffix][0].astype(int)
gdac_temp_qc = gdac_prof_file.variables['TEMP' + qc_var_suffix][0].astype(int)
gdac_psal_qc = gdac_prof_file.variables['PSAL' + qc_var_suffix][0].astype(int)
except:
continue
# examine QC flags of CTD data; create mask that is True where all data is good
if use_unadjusted is False:
gdac_qc_mask = ~logical_or(logical_and(gdac_pres_qc != 1, gdac_pres_qc != 2),
logical_or(logical_and(gdac_temp_qc != 1, gdac_temp_qc != 2),
logical_and(gdac_psal_qc != 1, gdac_psal_qc != 2)))
else:
gdac_qc_mask = tile(True,len(gdac_psal_qc))
# FIXME: temporary patch to allow bad-flagged data from 5904468 from May 23, 2017 (#84) to May 8, 2018 (#118),
# FIXME: a period of approximately linear drift following the GDAC-corrected data for #84 and prior profiles
if wmoid == 5904468:
if this_float_meta['prof_nums'][prof_index] <= 83:
pass # these profiles should be delayed mode and corrected by GDAC already
if this_float_meta['prof_nums'][prof_index] >= 84:
if gdac_data_mode == 'D':
# when profiles #84 and beyond become 'D', this code and correction must be reevaluated
raise RuntimeError('IMPORTANT: reevaluate ldp.argo_float_data() correction for 5904468')
else: # still 'R'
if use_unadjusted:
gdac_qc_mask = tile(True,len(gdac_psal_qc)) # accept without correction
pass
elif use_unadjusted is False:
if not correct_5904468_interim:
# accept; this situation should only occur during calibration routine in main script
gdac_qc_mask = tile(True,len(gdac_psal_qc))
if correct_5904468_interim and this_float_meta['prof_nums'][prof_index] >= 119:
continue # reject; a correction scheme hasn't been developed for profiles 119 and onwards yet
if correct_5904468_interim and this_float_meta['prof_nums'][prof_index] <= 118:
gdac_qc_mask = tile(True,len(gdac_psal_qc)) # accept; use manual correction below...
# reject profile if T, S, and/or P had completely bad QC
if sum(gdac_qc_mask) <= 2:
if verbose:
print('this profile has completely or almost completely bad QC')
continue
# manual blacklist
if wmoid == 7900123 and str(this_float_meta['prof_datetimes'][prof_index])[:8] == '20071229':
continue # why? big cold, fresh bias - could be real but more likely bad data, since it's December
if wmoid == 7900407 and this_float_meta['prof_nums'][prof_index] in [3,4,5,*range(59,78+1),114]:
continue # why? position jumps, date jumps
if wmoid == 7900343:
continue # why? hugely anomalous surface salinity (+0.5 psu), other QC issues
# only save first profile taken on a given day; reject additional profiles
# note: this is a common issue with the early German floats (7900***), which probably recorded date
# (usually January) of transmission of all of past winter's under-ice profiles
# so, for these 7900*** floats, I am discarding the first profile in these series, too
if int(wmoid/1000) == 7900:
if len(this_float_meta['prof_datetimes']) > prof_index+1:
if int(this_float_meta['prof_datetimes'][prof_index]/1000000) \
== int(this_float_meta['prof_datetimes'][prof_index+1]/1000000):
continue
if prof_index > 0:
if int(this_float_meta['prof_datetimes'][prof_index]/1000000) \
== int(this_float_meta['prof_datetimes'][prof_index-1]/1000000):
continue
# save profile metadata
this_float_data['profiles'].append({})
good_prof_index = len(this_float_data['profiles']) - 1
this_float_data['profiles'][good_prof_index]['position_flag'] = gdac_prof_position_flag
this_float_data['profiles'][good_prof_index]['prof_num'] = this_float_meta['prof_nums'][prof_index]
this_float_data['profiles'][good_prof_index]['prof_num_full'] = this_float_meta['prof_nums_full'][prof_index]
this_float_data['profiles'][good_prof_index]['datetime'] = this_float_meta['prof_datetimes'][prof_index]
this_float_data['profiles'][good_prof_index]['status'] = gdac_data_mode
this_float_data['profiles'][good_prof_index]['lat'] = this_float_meta['prof_lats'][prof_index]
this_float_data['profiles'][good_prof_index]['lon'] = this_float_meta['prof_lons'][prof_index]
# # interim lat/lon patch to allow missing-location profiles from 5904468
# # (keep as example for other floats; edit 'if gdac_prof_position_flag is 9' statement above if using this)
# if wmoid == 5904468 and this_float_data['profiles'][good_prof_index]['position_flag'] == 9:
# this_float_data['profiles'][good_prof_index]['position_flag'] = 2
# this_float_data['profiles'][good_prof_index]['lat'] = this_float_meta['prof_lats'][95]
# this_float_data['profiles'][good_prof_index]['lon'] = this_float_meta['prof_lons'][95]
# use T, S, P to derive related data fields and save everything
gdac_pres = gdac_prof_file.variables['PRES' + var_suffix][0][gdac_qc_mask]
gdac_temp = gdac_prof_file.variables['TEMP' + var_suffix][0][gdac_qc_mask]
gdac_psal = gdac_prof_file.variables['PSAL' + var_suffix][0][gdac_qc_mask]
# FIXME: interim custom linear salinity drift correction for 5904468 (see above)
if correct_5904468_interim is True and use_unadjusted is False \
and wmoid == 5904468 and 84 <= this_float_meta['prof_nums'][prof_index] <= 118:
# update these filepaths as necessary
data_dir = os.getcwd() + '/Data/'
argo_gdac_dir = data_dir + 'Argo/'
argo_index_pickle_dir = argo_gdac_dir + 'Argo_index_pickles/'
[cal_prof_nums,cal_deltas] = pickle.load(open(argo_index_pickle_dir + 'argo_5904468_cal.pickle','rb'))
cal_delta_this_prof = cal_deltas[where(cal_prof_nums == this_float_meta['prof_nums'][prof_index])[0][0]]
gdac_psal = gdac_psal + cal_delta_this_prof
# print('profile num and cal delta: ',this_float_meta['prof_nums'][prof_index],cal_delta_this_prof)
if smooth_sal:
# note: 's' is a smoothing factor (s=0 is no smoothing)
# s=0.00010 and below is insufficient for smoothing over the spike around 985 m, caused by switch
# from spot (single) to continuous sampling in some floats (per Annie Wong)
# s=0.00015 was determined visually to give best results over most of the water column,
# and successfully mitigates this spike
psal_spline_interpolant = spin.UnivariateSpline(gdac_pres,gdac_psal,k=2,s=0.00015)
gdac_psal = psal_spline_interpolant(gdac_pres)
gdac_depth = -1 * gsw.z_from_p(gdac_pres, this_float_data['profiles'][good_prof_index]['lat'])
gdac_asal = gsw.SA_from_SP(gdac_psal,gdac_pres,this_float_data['profiles'][good_prof_index]['lon'],
this_float_data['profiles'][good_prof_index]['lat'])
gdac_ptmp = gsw.pt0_from_t(gdac_asal,gdac_temp,gdac_pres)
gdac_ctmp = gsw.CT_from_pt(gdac_asal,gdac_ptmp)
gdac_sigma_theta = gsw.sigma0(gdac_asal,gdac_ctmp)
this_float_data['profiles'][good_prof_index]['sigma_theta'] = {}
this_float_data['profiles'][good_prof_index]['sigma_theta']['data'] = gdac_sigma_theta
this_float_data['profiles'][good_prof_index]['sigma_theta']['name'] = r'$\sigma_\theta$'
this_float_data['profiles'][good_prof_index]['sigma_theta']['units'] = r'kg/m$^3$'
this_float_data['profiles'][good_prof_index]['sigma_theta']['pres'] = gdac_pres
this_float_data['profiles'][good_prof_index]['sigma_theta']['depth'] = gdac_depth
this_float_data['profiles'][good_prof_index]['temp'] = {}
this_float_data['profiles'][good_prof_index]['temp']['data'] = gdac_temp
this_float_data['profiles'][good_prof_index]['temp']['name'] = 'Temperature'
this_float_data['profiles'][good_prof_index]['temp']['units'] = '°C'
this_float_data['profiles'][good_prof_index]['temp']['pres'] = gdac_pres
this_float_data['profiles'][good_prof_index]['temp']['depth'] = gdac_depth
this_float_data['profiles'][good_prof_index]['ptmp'] = {}
this_float_data['profiles'][good_prof_index]['ptmp']['data'] = gdac_ptmp
this_float_data['profiles'][good_prof_index]['ptmp']['name'] = 'Potential temperature' # r'$\Theta$'
this_float_data['profiles'][good_prof_index]['ptmp']['units'] = '°C'
this_float_data['profiles'][good_prof_index]['ptmp']['pres'] = gdac_pres
this_float_data['profiles'][good_prof_index]['ptmp']['depth'] = gdac_depth
this_float_data['profiles'][good_prof_index]['ctmp'] = {}
this_float_data['profiles'][good_prof_index]['ctmp']['data'] = gdac_ctmp
this_float_data['profiles'][good_prof_index]['ctmp']['name'] = 'Conservative temperature'
this_float_data['profiles'][good_prof_index]['ctmp']['units'] = '°C'
this_float_data['profiles'][good_prof_index]['ctmp']['pres'] = gdac_pres
this_float_data['profiles'][good_prof_index]['ctmp']['depth'] = gdac_depth
this_float_data['profiles'][good_prof_index]['psal'] = {}
this_float_data['profiles'][good_prof_index]['psal']['data'] = gdac_psal
this_float_data['profiles'][good_prof_index]['psal']['name'] = 'Salinity'
this_float_data['profiles'][good_prof_index]['psal']['units'] = 'PSS-78'
this_float_data['profiles'][good_prof_index]['psal']['pres'] = gdac_pres
this_float_data['profiles'][good_prof_index]['psal']['depth'] = gdac_depth
this_float_data['profiles'][good_prof_index]['asal'] = {}
this_float_data['profiles'][good_prof_index]['asal']['data'] = gdac_asal
this_float_data['profiles'][good_prof_index]['asal']['name'] = 'Absolute salinity'
this_float_data['profiles'][good_prof_index]['asal']['units'] = 'g/kg'
this_float_data['profiles'][good_prof_index]['asal']['pres'] = gdac_pres
this_float_data['profiles'][good_prof_index]['asal']['depth'] = gdac_depth
if compute_extras:
# compute N^2
gdac_Nsquared, gdac_midpoint_pres = gsw.Nsquared(gdac_asal,gdac_ctmp,gdac_pres,
this_float_data['profiles'][good_prof_index]['lat'])
gdac_midpoint_depth = -1 * gsw.z_from_p(gdac_midpoint_pres,
this_float_data['profiles'][good_prof_index]['lat'])
this_float_data['profiles'][good_prof_index]['Nsquared'] = {}
this_float_data['profiles'][good_prof_index]['Nsquared']['data'] = gdac_Nsquared * 10e7
this_float_data['profiles'][good_prof_index]['Nsquared']['name'] = 'Buoyancy frequency squared'
this_float_data['profiles'][good_prof_index]['Nsquared']['units'] = r'10$^{-7}$ s$^{-2}$'
this_float_data['profiles'][good_prof_index]['Nsquared']['pres'] = gdac_midpoint_pres
this_float_data['profiles'][good_prof_index]['Nsquared']['depth'] = gdac_midpoint_depth
if smooth_N2_PV:
smooth_N2_depths, smooth_N2 = gt.vert_prof_running_mean(this_float_data['profiles'][good_prof_index],
'Nsquared',window=smooth_N2_PV_window,
extrap='NaN',top='top',bottom='bottom')
smooth_N2_pres = gsw.p_from_z(-1 * smooth_N2_depths,this_float_data['profiles'][good_prof_index]['lat'])
this_float_data['profiles'][good_prof_index]['Nsquared']['data'] = smooth_N2
this_float_data['profiles'][good_prof_index]['Nsquared']['pres'] = smooth_N2_pres
this_float_data['profiles'][good_prof_index]['Nsquared']['depth'] = smooth_N2_depths
# compute PV (isopycnic potential vorticity, which neglects contribution of relative vorticity)
# per Talley et al. (2011), Ch. 3.5.6 (Eq. 3.12b): Q ~ -(f/g)*(N^2)
omega = 7.2921e-5 # rad/s
coriolis_freq = 2 * omega * sin(this_float_data['profiles'][good_prof_index]['lat'] * pi/180)
gdac_PV = -1 * (coriolis_freq / constants.g) * gdac_Nsquared
this_float_data['profiles'][good_prof_index]['PV'] = {}
this_float_data['profiles'][good_prof_index]['PV']['data'] = gdac_PV * 10e12
this_float_data['profiles'][good_prof_index]['PV']['name'] = 'Isopycnic potential vorticity'
this_float_data['profiles'][good_prof_index]['PV']['units'] = r'10$^{-12}$ m$^{-1}$ s$^{-1}$'
this_float_data['profiles'][good_prof_index]['PV']['pres'] = gdac_midpoint_pres
this_float_data['profiles'][good_prof_index]['PV']['depth'] = gdac_midpoint_depth
if smooth_N2_PV:
smooth_PV_depths, smooth_PV = gt.vert_prof_running_mean(this_float_data['profiles'][good_prof_index],
'PV',window=smooth_N2_PV_window,
extrap='NaN',top='top',bottom='bottom')
smooth_PV_pres = gsw.p_from_z(-1 * smooth_PV_depths,this_float_data['profiles'][good_prof_index]['lat'])
this_float_data['profiles'][good_prof_index]['PV']['data'] = smooth_PV
this_float_data['profiles'][good_prof_index]['PV']['pres'] = smooth_PV_pres
this_float_data['profiles'][good_prof_index]['PV']['depth'] = smooth_PV_depths
# compute buoyancy loss required for convection to reach each observed depth, as in de Lavergne et al. (2014)
# see gt.destab() for details
gdac_destab = gt.destab(this_float_data['profiles'][good_prof_index],gdac_depth[1:],verbose_warn=True)
this_float_data['profiles'][good_prof_index]['destab'] = {}
this_float_data['profiles'][good_prof_index]['destab']['data'] = gdac_destab
this_float_data['profiles'][good_prof_index]['destab']['name'] = 'Buoyancy loss required for destabilization to given depth'
this_float_data['profiles'][good_prof_index]['destab']['units'] = r'm$^2$ s$^{-2}$'
this_float_data['profiles'][good_prof_index]['destab']['pres'] = gdac_pres[1:]
this_float_data['profiles'][good_prof_index]['destab']['depth'] = gdac_depth[1:]
# load SOCCOM data and merge with GDAC profiles
# TODO: modify to include SOCCOM profiles even when no matching GDAC profile (or when GDAC temp/salinity is bad QC)
if wmoid in argo_soccom_index['wmoids']:
soccom_index_number = argo_soccom_index['wmoids'].index(wmoid)
this_float_data['is_soccom'] = True
this_float_data['uwid'] = argo_soccom_index['uwids'][soccom_index_number]
soccom_filename = argo_soccom_index['filenames'][soccom_index_number]
header_line_counter = 0
with open(save_to_soccom_dir + soccom_filename,'rb') as f:
for line in f:
if 'Cruise ' in line.decode('latin-1'):
break
header_line_counter += 1
data_frame = pd.read_csv(save_to_soccom_dir + soccom_filename, header=header_line_counter,
delim_whitespace=True, na_values=-1e10, encoding='latin-1')
soccom_var_names = data_frame.axes[1].values
soccom_prof_nums = argo_soccom_index['profile_nums'][soccom_index_number]
soccom_all_entries_prof_nums = data_frame['Station'].values
for gdac_prof_index, gdac_profile_list_of_dicts in enumerate(this_float_data['profiles']):
this_prof_num = this_float_data['profiles'][gdac_prof_index]['prof_num']
if this_prof_num in soccom_prof_nums:
soccom_all_entries_match_prof = (soccom_all_entries_prof_nums == this_prof_num)
# ignore blank last line-entry of profile
soccom_all_entries_match_prof[where(soccom_all_entries_match_prof)[0][-1]] = False
soccom_pres = data_frame['Pressure[dbar]'].values[soccom_all_entries_match_prof]
soccom_depth = data_frame['Depth[m]'].values[soccom_all_entries_match_prof]
for param_string in soccom_params_to_save:
if param_string in soccom_var_names:
soccom_param_data = data_frame[param_string].values[soccom_all_entries_match_prof]
soccom_param_QC_str = soccom_var_names[1 + where(soccom_var_names == param_string)[0][0]]
soccom_param_QC = data_frame[soccom_param_QC_str].values[soccom_all_entries_match_prof].astype(int)
if allow_bad_soccom_qc:
soccom_QC_mask = logical_or(soccom_param_QC == 0,soccom_param_QC == 4)
else:
soccom_QC_mask = (soccom_param_QC == 0)
soccom_params_index = where(soccom_params_to_save == param_string)[0][0]
soccom_param_name = soccom_param_names[soccom_params_index]
soccom_param_abbrev = soccom_param_abbrevs[soccom_params_index]
soccom_param_units = soccom_units_names[soccom_params_index]
this_float_data['profiles'][gdac_prof_index][soccom_param_abbrev] = {}
this_float_data['profiles'][gdac_prof_index][soccom_param_abbrev]['data'] \
= soccom_param_data[soccom_QC_mask][::-1]
this_float_data['profiles'][gdac_prof_index][soccom_param_abbrev]['name'] = soccom_param_name
this_float_data['profiles'][gdac_prof_index][soccom_param_abbrev]['units'] = soccom_param_units
this_float_data['profiles'][gdac_prof_index][soccom_param_abbrev]['pres'] \
= soccom_pres[soccom_QC_mask][::-1]
this_float_data['profiles'][gdac_prof_index][soccom_param_abbrev]['depth'] \
= soccom_depth[soccom_QC_mask][::-1]
# re-compute oxygen saturation using quality controlled T and S from GDAC
# note: O2 solubility converted from ml/L to µmol/kg
if soccom_param_abbrev == 'OxygenSat':
assert 'Oxygen' in this_float_data['profiles'][gdac_prof_index].keys(), 'No O2 data found.'
soccom_pres_for_interp = this_float_data['profiles'][gdac_prof_index]['OxygenSat']['pres']
gdac_psal_interp = gt.vert_prof_eval(this_float_data['profiles'][gdac_prof_index],'psal',
soccom_pres_for_interp,z_coor='pres')
gdac_temp_interp = gt.vert_prof_eval(this_float_data['profiles'][gdac_prof_index],'temp',
soccom_pres_for_interp,z_coor='pres')
soccom_O2_sol = 44.6596 * seawater.satO2(gdac_psal_interp,gdac_temp_interp)
soccom_O2_sat = 100.0 * this_float_data['profiles'][gdac_prof_index]['Oxygen']['data'] \
/ soccom_O2_sol
this_float_data['profiles'][gdac_prof_index]['OxygenSat']['data'] = soccom_O2_sat
else:
this_float_data['is_soccom'] = False
this_float_data['uwids'] = 'unknown_or_not_applicable'
# load UW O2 data and merge with GDAC profiles
if wmoid in argo_soccom_index['UW_O2_wmoids']:
UW_O2_index_number = argo_soccom_index['UW_O2_wmoids'].index(wmoid)
UW_O2_filename = argo_soccom_index['UW_O2_filenames'][UW_O2_index_number]
with h5py.File(save_to_UW_O2_dir + UW_O2_filename,'r') as UW_O2_file:
UW_O2_prof_nums = UW_O2_file['PROFILE'].value[0][0].astype(int)
UW_O2_pres = UW_O2_file['PRES'].value[0]
UW_O2_data = UW_O2_file['OXYGEN_UW_CORRECTED'].value[0]
# manual quality control for 5903616 based on offset of O2 estimated in density space
if wmoid == 5903616: UW_O2_data = UW_O2_data - 6.0
for gdac_prof_index in range(len(this_float_data['profiles'])):
this_prof_num = this_float_data['profiles'][gdac_prof_index]['prof_num']
if use_UW_O2_not_SOCCOM: load_UW_O2_if_available = True
else: load_UW_O2_if_available = ('Oxygen' not in
this_float_data['profiles'][gdac_prof_index].keys())
if load_UW_O2_if_available and (this_prof_num in UW_O2_prof_nums):
UW_prof_index = list(UW_O2_prof_nums).index(this_prof_num)
if not all(isnan(UW_O2_data[:,UW_prof_index])):
good_z = ~isnan(UW_O2_pres[:,UW_prof_index])
unique_indices = unique(UW_O2_pres[:,UW_prof_index],return_index=True)[1]
for good_or_not_idx, good_or_not in enumerate(good_z):
if good_or_not_idx not in unique_indices: good_z[good_or_not_idx] = False
UW_O2_depth = -1 * gsw.z_from_p(UW_O2_pres[:,UW_prof_index][good_z],
this_float_data['profiles'][gdac_prof_index]['lat'])
# unfortunately no O2 solubility routine in GSW-Python yet...
# note: O2 solubility converted from ml/L to µmol/kg
UW_O2_psal = gt.vert_prof_eval(this_float_data['profiles'][gdac_prof_index],'psal',
UW_O2_pres[:,UW_prof_index][good_z],z_coor='pres')
UW_O2_temp = gt.vert_prof_eval(this_float_data['profiles'][gdac_prof_index],'temp',
UW_O2_pres[:,UW_prof_index][good_z],z_coor='pres')
UW_O2_sol = 44.6596 * seawater.satO2(UW_O2_psal,UW_O2_temp)
assert len(UW_O2_sol) == len(UW_O2_data[:,UW_prof_index][good_z]), 'Check UW-O2 data/sol vector lengths.'
UW_O2_sat = 100.0 * UW_O2_data[:,UW_prof_index][good_z] / UW_O2_sol
this_float_data['profiles'][gdac_prof_index]['Oxygen'] = {}
this_float_data['profiles'][gdac_prof_index]['Oxygen']['data'] = UW_O2_data[:,UW_prof_index][good_z]
this_float_data['profiles'][gdac_prof_index]['Oxygen']['name'] = 'Oxygen'
this_float_data['profiles'][gdac_prof_index]['Oxygen']['units'] = 'µmol/kg'
this_float_data['profiles'][gdac_prof_index]['Oxygen']['pres'] = UW_O2_pres[:,UW_prof_index][good_z]
this_float_data['profiles'][gdac_prof_index]['Oxygen']['depth'] = UW_O2_depth
this_float_data['profiles'][gdac_prof_index]['OxygenSat'] = {}
this_float_data['profiles'][gdac_prof_index]['OxygenSat']['data'] = UW_O2_sat
this_float_data['profiles'][gdac_prof_index]['OxygenSat']['name'] = 'Oxygen saturation'
this_float_data['profiles'][gdac_prof_index]['OxygenSat']['units'] = '%'
this_float_data['profiles'][gdac_prof_index]['OxygenSat']['pres'] = UW_O2_pres[:,UW_prof_index][good_z]
this_float_data['profiles'][gdac_prof_index]['OxygenSat']['depth'] = UW_O2_depth
this_float_data['is_uw_o2'] = True
return this_float_data
def wod_load_index(wod_dir,data_dirs):
""" Parse index of WOD cast data in netCDF format.
Args:
data_dirs: list of strings specifying data directory locations to examine
Returns:
wod_index: dict with following keys to NumPy arrays of equal length
'filepaths' (complete paths to data files)
'datetimes' (Datetime objects)
'lats' and 'lons'
Data provenance:
NCEI/NODC WODselect utility: https://www.nodc.noaa.gov/OC5/SELECT/dbsearch/dbsearch.html
Information on updates every 3 months: https://www.nodc.noaa.gov/OC5/WOD/wod_updates.html
Note on prelease of WOD 2018 data: https://www.nodc.noaa.gov/OC5/WOD/wod18-notes.html
Acknowledgements:
see https://www.nodc.noaa.gov/OC5/wod-woa-faqs.html
"""
wod_index = {'filepaths':array([]), 'datetimes':array([]), 'lats':array([]), 'lons':array([])}
index_file_prefix = 'ocldb'
for data_subdir in data_dirs:
all_filenames = os.listdir(wod_dir + data_subdir)
for filename in all_filenames:
if filename[0:5] == index_file_prefix:
index_file = spnc.netcdf_file(wod_dir + data_subdir + '/' + filename,'r',mmap=False)
valid_time_mask = index_file.variables['time'][:] >= 0
wod_index['lats'] = append(wod_index['lats'],index_file.variables['lat'][valid_time_mask])
wod_index['lons'] = append(wod_index['lons'],index_file.variables['lon'][valid_time_mask])
wod_index['datetimes'] = append(wod_index['datetimes'],
array([tt.convert_days_since_ref_to_datetime(days_since,1770,1,1)
for days_since in index_file.variables['time'][valid_time_mask]]))
wod_index['filepaths'] = append(wod_index['filepaths'],
array([wod_dir + data_subdir + '/' + 'wod_{0:09}O.nc'.format(cast_num)
for cast_num in index_file.variables['cast'][valid_time_mask]]))
break
else:
continue
return wod_index
def wod_load_cast(filepath):
""" Accessor method for WOD cast observed data in netCDF format. See ldp.wod_load_index() for details.
QC flag handling: returns strictly good data from good profiles.
Args:
filepath: string representing full filepath of data file
Returns:
None if cast contained bad depth, temperature, and/or salinity profiles
(or)
cast_data: dict with keys:
country: country in all caps (string)
platform: long description of ship, for instance (string)
cast_num: unique WOD cast number (integer)
lat: float
lon: float
datetime: Datetime object
cast: dict with keys: temp, ptmp, ctmp, psal, asal, sigma_theta
each is a dict with keys:
data: array of measured values, in direction of increasing depth (surface downwards)
name: string of name of parameter, formatted for a plot axis
pres: array of corresponding pressure (dbar)
depth: array of corresponding depths (m, positive)
units: string of units of data
Example:
cast_data['datetime'] = datetime(2017,1,1)
cast_data['cast']['sigma_theta']['data'] = ...
cast_data['cast']['sigma_theta']['pres'] = ...
"""
data_file = spnc.netcdf_file(filepath,'r',mmap=False)
cast_data = dict()
cast_data['country'] = b''.join(data_file.variables['country'][:]).decode('utf-8')
if 'Platform' in data_file.variables.keys():
cast_data['platform'] = b''.join(data_file.variables['Platform'][:]).decode('utf-8')
else:
cast_data['platform'] = 'unknown'
cast_data['cast_num'] = int(data_file.variables['wod_unique_cast'].data)
cast_data['lat'] = float(data_file.variables['lat'].data)
cast_data['lon'] = float(data_file.variables['lon'].data)
cast_data['datetime'] = tt.convert_days_since_ref_to_datetime(float(data_file.variables['time'].data),1770,1,1)
cast_data['cast'] = dict()
depth = data_file.variables['z'][:]
depth_good_qc_mask = data_file.variables['z_WODflag'][:] == 0
if all(~depth_good_qc_mask): return None # if all depths are bad
if 'Temperature' in data_file.variables.keys():
temp_qc_flag = int(data_file.variables['Temperature_WODprofileflag'].data)
if temp_qc_flag == 0:
temp = data_file.variables['Temperature'][:]
temp_good_qc_mask = data_file.variables['Temperature_WODflag'][:] == 0
temp_good_qc_mask = logical_and(temp_good_qc_mask, temp > -2.5)
cast_data['cast']['temp'] = dict()
else: return None
else: return None
if 'Salinity' in data_file.variables.keys():
psal_qc_flag = int(data_file.variables['Salinity_WODprofileflag'].data)
if psal_qc_flag == 0:
psal = data_file.variables['Salinity'][:]
psal_good_qc_mask = data_file.variables['Salinity_WODflag'][:] == 0
psal_good_qc_mask = logical_and(psal_good_qc_mask, psal > 0.0)
cast_data['cast']['psal'] = dict()
else: return None
else: return None
cast_qc_mask = logical_and(depth_good_qc_mask,logical_and(temp_good_qc_mask,psal_good_qc_mask))
if all(~cast_qc_mask): return None
depth = depth[cast_qc_mask]
temp = temp[cast_qc_mask]
psal = psal[cast_qc_mask]
pres = gsw.p_from_z(-1 * depth,cast_data['lat'])
asal = gsw.SA_from_SP(psal,pres,cast_data['lon'],cast_data['lat'])
ptmp = gsw.pt0_from_t(asal,temp,pres)
ctmp = gsw.CT_from_pt(asal,ptmp)
sigma_theta = gsw.sigma0(asal,ctmp)
cast_data['cast']['sigma_theta'] = {}
cast_data['cast']['sigma_theta']['data'] = sigma_theta
cast_data['cast']['sigma_theta']['name'] = r'$\sigma_\theta$'
cast_data['cast']['sigma_theta']['units'] = r'kg/m$^3$'
cast_data['cast']['sigma_theta']['pres'] = pres
cast_data['cast']['sigma_theta']['depth'] = depth
cast_data['cast']['temp'] = {}
cast_data['cast']['temp']['data'] = temp
cast_data['cast']['temp']['name'] = 'Temperature'
cast_data['cast']['temp']['units'] = '°C'
cast_data['cast']['temp']['pres'] = pres
cast_data['cast']['temp']['depth'] = depth
cast_data['cast']['ptmp'] = {}
cast_data['cast']['ptmp']['data'] = ptmp
cast_data['cast']['ptmp']['name'] = 'Potential temperature' # r'$\Theta$'
cast_data['cast']['ptmp']['units'] = '°C'
cast_data['cast']['ptmp']['pres'] = pres
cast_data['cast']['ptmp']['depth'] = depth
cast_data['cast']['ctmp'] = {}
cast_data['cast']['ctmp']['data'] = ctmp
cast_data['cast']['ctmp']['name'] = 'Conservative temperature'
cast_data['cast']['ctmp']['units'] = '°C'
cast_data['cast']['ctmp']['pres'] = pres
cast_data['cast']['ctmp']['depth'] = depth
cast_data['cast']['psal'] = {}
cast_data['cast']['psal']['data'] = psal
cast_data['cast']['psal']['name'] = 'Salinity'
cast_data['cast']['psal']['units'] = 'PSS-78'
cast_data['cast']['psal']['pres'] = pres
cast_data['cast']['psal']['depth'] = depth
cast_data['cast']['asal'] = {}
cast_data['cast']['asal']['data'] = asal
cast_data['cast']['asal']['name'] = 'Absolute salinity'
cast_data['cast']['asal']['units'] = 'g/kg'
cast_data['cast']['asal']['pres'] = pres
cast_data['cast']['asal']['depth'] = depth
return cast_data
def waghc_load_field(subdir):
""" Use xarray to load WOCE/Argo Global Hydrographic Climatology (WAGHC) 2017 fields in netCDF format.
Information: http://icdc.cen.uni-hamburg.de/1/daten/ocean/waghc/
Citation: Gouretski and Koltermann (2004) as well as the following:
Gouretski, Viktor (2018). WOCE-Argo Global Hydrographic Climatology (WAGHC Version 1.0). World Data Center for
Climate (WDCC) at DKRZ. https://doi.org/10.1594/WDCC/WAGHC_V1.0
Example plot:
plt.pcolormesh(fields['salinity'].sel(time=3,depth=15.0,latitude=slice(-70,-55),longitude=slice(-20,20)))
"""
fields = xr.open_mfdataset(subdir + '*.nc',decode_times=False)
return fields
def waghc_interp_to_location(waghc_fields,param,depth,lon,lat,month_exact=None,datetime_for_interp=None):
""" Use nearest neighbor interpolation to estimate WAGHC value at a given (lat,lon).
Args:
waghc_fields: see ldp.waghc_load_field()
param: 'salinity' or 'psal' or 'temperature' or 'temp'
depth: integer or float depth
lat: latitude to interpolate to
lon: longitude to interpolate to
month_exact: integer month (1-12) to use that month's field, or None to interpolate between months
datetime_for_interp: Datetime to use to interpolate between months, or None to use specific month's field
Returns:
val: interpolated value
"""
if param == 'psal': param = 'salinity'
elif param == 'temp': param = 'temperature'
if month_exact is not None:
return float(waghc_fields[param].sel(time=month_exact,depth=depth,latitude=lat,longitude=lon,method='nearest'))
else:
climo_vector = waghc_fields[param].sel(depth=depth,latitude=lat,longitude=lon,method='nearest').values
climo_vector_padded = array([climo_vector[-1],*climo_vector,climo_vector[0]])
climo_vector_doys = arange(-365.24/12/2,365+30,365.24/12) # somewhat imprecise, but good enough
interpolator = spin.interp1d(climo_vector_doys,climo_vector_padded)
return float(interpolator(datetime_for_interp.timetuple().tm_yday))
def compile_hydrographic_obs(argo_index_pickle_dir,argo_gdac_dir,wod_dir,
wod_ship_dirs=['WOD_CTD_Weddell','WOD_OSD_Weddell'],
wod_seal_dirs=['WOD_APB_Weddell'],
lon_bounds=[-60,15],lat_bounds=[-90,-55],toi_bounds=[datetime(1970,1,1),datetime.today()],
distance_check=None,distance_center=[None,None],
include_argo=True,include_wod=True,params=['ptmp','psal','sigma_theta'],
compute_extras=False,max_cast_min_depth=20,min_cast_max_depth=1000,
reject_mld_below=500,reject_mld_ref=False,strict_mld_reject=True,
interp_spacing=0.1,interp_depths=(0,1000),calc_mld=True,calc_ml_avg=True,
calc_at_depths=None,calc_depth_avgs=None,calc_sd=None,calc_tb=None,
pickle_dir=None,pickle_filename=None,
prof_count_dir=None,prof_count_filename=None,verbose=True):
""" Compile regularly-interpolated Argo and/or WOD hydrographic profiles and calculate quantities of interest.
Save in pickle if requested.
Args:
lon_bounds: longitude range [W,E] to search for profiles/casts
lat_bounds: latitude range [S,N] to search for profiles/casts
toi_bounds: Datetime range [start,end] to search for profiles/casts
distance_check: None or maximum distance (in km) from <<distance_center>> as requirement for profiles/casts
or [smaller_radius,larger_radius] to specify toroid (donut) to search within
note: must still specify lon_bounds, lat_bounds to whittle down search
distance_center: [lat,lon] of center location from which to check distance
include_argo: True or False
include_wod: True or False
params: list of parameter abbreviations to include
note: data for all params listed here must be available for a profile to be examined
compute_extras: False or True (compute N2, convection resistance, etc.) for float profiles
max_cast_min_depth: None or deepest minimum depth of cast to allow (if deeper, ignore)
min_cast_max_depth: None or shallowest maximum depth of cast to allow (if shallower, ignore)
reject_mld_below: None or depth (set MLD to NaN, or ignore cast, if calculated MLD is below given depth; this
behavior depends on strict_mld_reject)
note: only tested if not None AND calc_mld is True
reject_mld_ref: False (default)
or True (recommended; set MLD to NaN if shallowest measurement is below reference depth)
note: only tested if calc_mld is True
strict_mld_reject: True (default; ignore cast if MLD is NaN, None, or greater than reject_mld_below)
or False (recommended; keep cast if these conditions met, but don't calculate ML averages)
interp_spacing: vertical depth spacing (meters) of interpolated profiles returned ('nearest' extrap used)
interp_depths: None or depth range of interpolated profiles to return: (shallow,deep)
calc_mld: True or False
calc_ml_avg: False or True (to calculate parameter averages within ML) (calc_mld must be True)
calc_at_depths: None or list of:
single depths to calculate parameter values at using interpolation
tuples of depths, all of which must have valid calculated parameter values using interpolation
NOTE: these tuples must contain at least 3 depths
calc_depth_avgs: None or LIST (!) of depth ranges (shallow,deep) to calculate parameter averages by interp
calc_sd: None or list of depths for calculating Martinson salt deficit (see geo_tools.martinson())
if single depths provided, calculate original 0-[depth] metrics
if tuple of depths provided, e.g. (upper,lower), calculate interior upper-lower metrics
if three-tuple provided, e.g. (upper,lower,sd_ref_psal), calculate interior metrics with ref_psal
calc_tb: None or list of depths for calculating Martinson thermal barrier (")
if single depths provided, calculate original 0-[depth] metrics
if tuple of depths provided, e.g. (upper,lower), calculate interior upper-lower metrics
pickle_dir: None (simply return data found) or filepath to store pickle
pickle_filename: None (") or name of pickle in which to save data
prof_count_dir: None or filepath to save text file with profile and float counts
prof_count_filename: None or filename for text file with profile and float counts
verbose: True or False
Returns:
compiled_obs: dict with following keys:
'platforms': NumPy vector of all obs' WMOids (for floats) or ship name / other platform ID (for WOD)
'types': NumPy vector of all obs' type: 'ship', 'float', or 'seal'
'datetimes': NumPy vector of all obs' datetimes
'doys': NumPy vector of all obs' dates-of-year
'lats': NumPy vector of all obs' latitudes
'lons': NumPy vector of all obs' longitudes
'mlds': NumPy vector of all obs' MLDs (if calc_mld is True)
'depths': NumPy vector of evenly spaced depths used in profile interpolation
each parameter abbreviation in <<params>>: dicts with following keys for each parameter's data:
'profiles': NumPy 2D array of profiles corresponding to <<depths>> (see above)
note: shape is (# profiles, # depths)
'ml_avg': NumPy vector of profiles' ML averages (if calc_mld and calc_ml_avg are True)
depths given in calc_at_depths (will automatically truncate trailing zeros):
NumPy vector of interpolated values at each depth
two-tuples corresponding to depth ranges given in calc_depth_avgs:
NumPy vector of average values for each depth range
tuples of two or more depths corresponding to tuples given in calc_at_depths
'detailed_info': dict with same structure as above (see example) containing
lat/lon/type/platform/datetime/doy for each non-NaN derived data
e.g. compiled_obs['detailed_info']['psal'][15]['lats'] is a list of lats (not a NumPy array)
"""
compiled_data = {'platforms':[],'types':[],'datetimes':[],'doys':[],'lats':[],'lons':[],'depths':[],
'detailed_info':dict()}
if calc_mld: compiled_data['mlds'] = []
float_wmoids_used = []
float_prof_count = 0
wod_ship_prof_count = 0
wod_seal_prof_count = 0
counts = dict()
def counter(count_dict,param_name,key_name,prof_data_dict):
if param_name not in count_dict.keys(): count_dict[param_name] = dict()
if key_name not in count_dict[param_name].keys():
count_dict[param_name][key_name] \
= {'float_wmoids_used':[], 'float_profs':0, 'wod_ship_profs':0, 'wod_seal_profs':0}
if 'seal_platform' in prof_data_dict.keys():
count_dict[param_name][key_name]['wod_seal_profs'] += 1
elif 'ship_platform' in prof_data_dict.keys():
count_dict[param_name][key_name]['wod_ship_profs'] += 1
elif 'wmoid' in prof_data_dict.keys():
count_dict[param_name][key_name]['float_profs'] += 1
count_dict[param_name][key_name]['float_wmoids_used'].append(prof_data_dict['wmoid'])
# store lat/lon/type/platform corresponding to specific non-rejected, non-NaN derived data
if param_name not in compiled_data['detailed_info'].keys():
compiled_data['detailed_info'][param_name] = dict()
if key_name not in compiled_data['detailed_info'][param_name].keys():
compiled_data['detailed_info'][param_name][key_name] = dict()
if 'types' not in compiled_data['detailed_info'][param_name][key_name].keys():
compiled_data['detailed_info'][param_name][key_name]['platforms'] = []
compiled_data['detailed_info'][param_name][key_name]['types'] = []
compiled_data['detailed_info'][param_name][key_name]['datetimes'] = []
compiled_data['detailed_info'][param_name][key_name]['doys'] = []
compiled_data['detailed_info'][param_name][key_name]['lats'] = []
compiled_data['detailed_info'][param_name][key_name]['lons'] = []
compiled_data['detailed_info'][param_name][key_name]['lats'].append(prof_data['lat'])
compiled_data['detailed_info'][param_name][key_name]['lons'].append(prof_data['lon'])
compiled_data['detailed_info'][param_name][key_name]['datetimes'].append(prof_data['datetime'])
compiled_data['detailed_info'][param_name][key_name]['doys'].append(prof_data['datetime'].timetuple().tm_yday)
if 'seal_platform' in prof_data.keys():
compiled_data['detailed_info'][param_name][key_name]['platforms'].append(prof_data['seal_platform'])
compiled_data['detailed_info'][param_name][key_name]['types'].append('seal')
elif 'ship_platform' in prof_data.keys():
compiled_data['detailed_info'][param_name][key_name]['platforms'].append(prof_data['ship_platform'])
compiled_data['detailed_info'][param_name][key_name]['types'].append('ship')
elif 'wmoid' in prof_data.keys():
compiled_data['detailed_info'][param_name][key_name]['platforms'].append(prof_data['wmoid'])
compiled_data['detailed_info'][param_name][key_name]['types'].append('float')
else:
compiled_data['detailed_info'][param_name][key_name]['platforms'].append('Unknown platform')
compiled_data['detailed_info'][param_name][key_name]['types'].append('unknown')
def dist_check(obs_lat,obs_lon):
if distance_check is None:
return True
else:
if isnan(obs_lat) or isnan(obs_lon): return False
radius = gt.distance_between_two_coors(obs_lat,obs_lon,distance_center[0],distance_center[1])
if type(distance_check) == int or type(distance_check) == float:
if (distance_check * 1000.0) <= radius: return False
else: return True
elif len(distance_check) == 2:
if (distance_check[0] * 1000.0) < radius < (distance_check[1] * 1000.0): return True
else: return False
all_profs = []
if include_argo:
argo_gdac_index = pickle.load(open(argo_index_pickle_dir + 'argo_gdac_index.pickle','rb'))
argo_soccom_index = pickle.load(open(argo_index_pickle_dir + 'argo_soccom_index.pickle','rb'))
toi_int = [tt.convert_datetime_to_14(toi_bounds[0]),tt.convert_datetime_to_14(toi_bounds[1])]
all_float_data = []
for wmoid in argo_gdac_index['wmoids']:
this_float_meta = argo_gdac_float_meta(argo_gdac_index['local_prof_index'],wmoid)
toi_match = logical_and(this_float_meta['prof_datetimes'] >= toi_int[0],
this_float_meta['prof_datetimes'] <= toi_int[1])
lon_match = logical_and(this_float_meta['prof_lons'] >= lon_bounds[0],
this_float_meta['prof_lons'] <= lon_bounds[1])
lat_match = logical_and(this_float_meta['prof_lats'] >= lat_bounds[0],
this_float_meta['prof_lats'] <= lat_bounds[1])
prof_match = logical_and(logical_and(toi_match,lon_match),lat_match)
if any(prof_match):
if distance_check is not None:
for p_idx in range(len(prof_match)):
if prof_match[p_idx]:
prof_match[p_idx] = dist_check(this_float_meta['prof_lats'][p_idx],
this_float_meta['prof_lons'][p_idx])
if any(prof_match):
if verbose: print(wmoid)
prof_nums_match = array(this_float_meta['prof_nums'])[prof_match]
this_float_data = argo_float_data(wmoid,argo_gdac_dir,argo_gdac_index,argo_soccom_index,
prof_nums=prof_nums_match,compute_extras=compute_extras)
all_float_data.append(this_float_data)
for float_idx,float_data in enumerate(all_float_data):
for prof_idx in range(len(float_data['profiles'])):
prof_data = float_data['profiles'][prof_idx]
prof_data['wmoid'] = float_data['wmoid'] # annoying but necessary
prof_data['datetime'] = tt.convert_tuple_to_datetime(tt.convert_14_to_tuple(prof_data['datetime']))
all_profs.append(prof_data)