forked from singingsea/OMI_onGit
-
Notifications
You must be signed in to change notification settings - Fork 0
/
read_omi_no2_so2_and_dump_df.py
189 lines (168 loc) · 6.65 KB
/
read_omi_no2_so2_and_dump_df.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# -*- coding: utf-8 -*-
"""
Created on Wed Mar 21 10:12:55 2018
@author: ZhaoX
"""
#!/usr/bin/python
import h5py
import numpy as np
import sys
import time
import calendar
import os
import pandas as pd
from read_OMI_at_a_location import OMI_at_a_location
OMI_data_path = 'C:\\Projects\\OMI\\NO2\\download2\\'
user_lat = 43.781
user_lon = -79.468
process_all_files = True
os.chdir(OMI_data_path)
#This finds the user's current path so that all hdf4 files can be found
try:
fileList=open(OMI_data_path + 'fileList.txt','r')
except:
print('Did not find a text file containing file names (perhaps name does not match)')
sys.exit()
df = pd.DataFrame()
j = 1
#loops through all files listed in the text file
for FILE_NAME in fileList:
FILE_NAME=FILE_NAME.strip()
if process_all_files == False:
user_input=input('\nWould you like to process\n' + FILE_NAME + '\n\n(Y/N)')
else:
user_input = 'Y'
if(user_input == 'N' or user_input == 'n'):
print('Skipping...')
continue
else:
try:
file = h5py.File(FILE_NAME, 'r') # 'r' means that hdf5 file is open in read-only mode
except:
print('Skipping...')
continue
#checks if the file contains NO2 or SO2 data, and reacts accordingly
if 'NO2' in FILE_NAME:
print('This is an OMI NO2 file. Saving... ')
#utilizes a python dictionary to determine the variable specified by user input
SDS=dict([(1,'ColumnAmountNO2'),(2,'ColumnAmountNO2Std'),
(3,'VcdQualityFlags'),
(4,'ColumnAmountNO2Strat'),(5,'ColumnAmountNO2StratStd'),
(6,'ColumnAmountNO2Trop'),(7,'ColumnAmountNO2TropStd'),
(8,'CloudFraction'),(9,'CloudFractionStd'),
(10,'CloudPressure'),(11,'CloudPressureStd'),])
#this is how you access the data tree in an hdf5 file
dataFields=file['HDFEOS']['SWATHS']['ColumnAmountNO2']['Data Fields']
#for key in dataFields:
#Y print(key, dataFields[key].shape)
geolocation=file['HDFEOS']['SWATHS']['ColumnAmountNO2']['Geolocation Fields']
elif 'SO2' in FILE_NAME:
print('This is an OMI SO2 file. Saving... ')
SDS=dict([(1,'ColumnAmountSO2_PBL'),(2,'ColumnAmountO3'),(3,'QualityFlags_PBL')])
dataFields=file['HDFEOS']['SWATHS']['OMI Total Column Amount SO2']['Data Fields']
geolocation=file['HDFEOS']['SWATHS']['OMI Total Column Amount SO2']['Geolocation Fields']
else:
print('The file named :',FILE_NAME, ' is not a valid OMI file. \n')
#if the program is unable to determine that it is an OMI SO2 or NO2 file, then it will skip to the next file
continue
#get lat and lon info as vectors
lat=geolocation['Latitude'][:].ravel()
lon=geolocation['Longitude'][:].ravel()
#get scan time and turn it into a vector
scan_time=geolocation['Time'][:].ravel()
#creates arrays the same size as scan time to receive time attributes
year=np.zeros(scan_time.shape[0])
month=np.zeros(scan_time.shape[0])
day=np.zeros(scan_time.shape[0])
hour=np.zeros(scan_time.shape[0])
min=np.zeros(scan_time.shape[0])
sec=np.zeros(scan_time.shape[0])
#gets date info for each pixel to be saved later
for i in range(scan_time.shape[0]):
temp=time.gmtime(scan_time[i-1]+calendar.timegm(time.strptime('Dec 31, 1992 @ 23:59:59 UTC', '%b %d, %Y @ %H:%M:%S UTC')))
year[i-1]=temp[0]
month[i-1]=temp[1]
day[i-1]=temp[2]
hour[i-1]=temp[3]
min[i-1]=temp[4]
sec[i-1]=temp[5]
#Begin saving to an output array
end=8+len(SDS)#this is the number of columns needed (based on number of SDS read)
output=np.array(np.zeros((year.shape[0]*60,end)))
#print(output.shape)
output[0:,0]=year[:].repeat(60)
output[0:,1]=month[:].repeat(60)
output[0:,2]=day[:].repeat(60)
output[0:,3]=hour[:].repeat(60)
output[0:,4]=min[:].repeat(60)
output[0:,5]=sec[:].repeat(60)
output[0:,6]=lat[:]
output[0:,7]=lon[:]
#list for the column titles (because you can't combine string values and float values into a single array)
tempOutput=[]
tempOutput.append('Year')
tempOutput.append('Month')
tempOutput.append('Day')
tempOutput.append('Hour')
tempOutput.append('Minute')
tempOutput.append('Second')
tempOutput.append('Latitude')
tempOutput.append('Longitude')
#This for loop saves all of the SDS in the dictionary at the top (dependent on file type) to the array (with titles)
for i in range(8,end):
SDS_NAME=SDS[(i-7)] # The name of the sds to read
#get current SDS data, or exit program if the SDS is not found in the file
try:
sds=dataFields[SDS_NAME]
except:
print('Sorry, your OMI hdf5 file does not contain the SDS:',SDS_NAME,'. Please try again with the correct file type.')
sys.exit()
#get attributes for current SDS
scale=sds.attrs['ScaleFactor']
fv=sds.attrs['_FillValue']
mv=sds.attrs['MissingValue']
offset=sds.attrs['Offset']
#get SDS data as a vector
data=sds[:].ravel()
#The next few lines change fill value/missing value to NaN so that we can multiply valid values by the scale factor, then back to fill values for saving
data=data.astype(float)
data[data==float(fv)]=np.nan
data[data==float(mv)]=np.nan
data=(data-offset)*scale
data[np.isnan(data)]=fv
#the SDS and SDS name are saved to arrays which will be written to the .txt file
#print(data.shape)
#print(output.shape)
output[0:,i]=data
tempOutput.append(SDS_NAME)
#changes list to an array so it can be stacked
tempOutput=np.asarray(tempOutput)
#This stacks the titles on top of the data
output=np.row_stack((tempOutput,output))
#save the new array to a text file, which is the name of the HDF4 file .txt instead of .hdf
#print(output[0,:].shape[0])
nlines=output.shape[0]
#np.savetxt('{0}.txt'.format(FILE_NAME[:-4]),output.reshape(output.shape[::-1]),fmt='%s',delimiter=',', newline='\n')
outfilename=FILE_NAME[:-4]+'.txt'
outfile=open(outfilename,'w')
for i in range(0,nlines):
tarray=','.join(output[i,:])
outfile.write(tarray)
outfile.write('\n')
#outfile.write('hello')
outfile.close()
file.close()
df_at_a_location_single_file = OMI_at_a_location( OMI_data_path + outfilename ,user_lat,user_lon)
# save data to dataframe
df_single_file = pd.read_csv(OMI_data_path + outfilename)
if j == 1:
df = df_single_file
df_at_a_location = df_at_a_location_single_file
j += 1
else:
df = pd.concat([df,df_single_file])
df_at_a_location = pd.concat([df_at_a_location,df_at_a_location_single_file])
print('\nFiles' + outfilename +'have been saved successfully.')
print('\nAll files have been saved successfully.')
df.to_csv(OMI_data_path + 'combined_data.csv', index = False)
df_at_a_location.to_csv(OMI_data_path + 'combined_data_at_a_location.csv', index = False)