forked from harshberia93/HydroMix
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsimple_mixing.py
More file actions
168 lines (126 loc) · 6.25 KB
/
simple_mixing.py
File metadata and controls
168 lines (126 loc) · 6.25 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
#################################################################################################################################
'''
Created on: 8th November 2018 (Harsh Beria)
Last updated on
What it does?
Computes the proportion of groundwater that is made of snow vs rain
The dataset used is a dummy one, not a real one. The purpose of this script is to show the working of HydroMix
SourceFiles used:
SourceFiles/Rain_SP_GW_VdN.xlsx
OutputFiles processed:
OutputFiles/VdN/H2_results.csv
OutputFiles/VdN/O18_results.csv
Figures made:
OutputFiles/VdN/posterior.jpeg
'''
#################################################################################################################################
# %% Imports
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
#################################################################################################################################
# %% Custom functions
def HydroMix(source1, source2, mixture, LAMBDA_prior, ErrorSTD_prior, NUMBER_ITERATIONS):
'''
Contains the core of HydroMix
source1 => Concentration data of source 1
source2 => Concentration data of source 2
mixture => Concentration data of the mixture made up of a linear combination of sources 1 and 2
LAMBDA_prior => prior distribution of the proportion of source1 in the mixture
ErrorSTD_prior => prior distribution of the error standard deviation
NUMBER_ITERATIONS => Number of times HydroMix has be run
Returns a tuple containing log likelihood values, lambda values and error standard deviations for all the model runs
'''
LIKELIHOOD = []
for i in range(0, NUMBER_ITERATIONS, 1):
# For displaying purposes
if (i%100 == 0):
print ("Iteration number:" + str(i+1))
# Computing likelihood
temp_likelihood = []
for temp_mix in mixture:
temp_value = 0.
temp_residual = []
for temp_s1 in source1:
for temp_s2 in source2:
temp_estimated_mix = LAMBDA_prior[i]*1.*temp_s1 + (1-LAMBDA_prior[i])*temp_s2*1.
# Likelihood computation
temp_value += (-1 * (temp_estimated_mix - temp_mix)**2) / (2 * (ErrorSTD_prior[i]**2))
temp_value -= (0.5* np.log(2 * np.pi * ErrorSTD_prior[i] * ErrorSTD_prior[i]))
temp_residual.append(temp_estimated_mix-temp_mix)
temp_likelihood.append(temp_value)
LIKELIHOOD.append(np.sum(temp_likelihood))
# Sorting the parameter sets by best runs (highest likelihood values)
zipped = sorted(zip(LIKELIHOOD, LAMBDA_prior, ErrorSTD_prior), reverse = True)
LIKELIHOOD, LAMBDA_prior, ErrorSTD_prior = zip(*zipped)
return ((LIKELIHOOD, LAMBDA_prior, ErrorSTD_prior))
# This is a csv writer
import csv
def csv_writer(data, path):
with open(path, "wb") as csv_file:
writer = csv.writer(csv_file, delimiter=',')
for line in data:
writer.writerow(line)
#################################################################################################################################
# %% Main variables
NUMBER_ITERATIONS = 1000
LAMBDA_RANGE = [0., 1.] # LAMBDA values imply the fraction of snow in groundwater
# Number of best simulations using which lambda is computed
BEST_SIM_PER = 5. # In percentage
OUTPUTFILEPATH = "OutputFiles/VdN/"
# %% Setting up a random seed
np.random.seed(1) # Setting up a common seed number
#################################################################################################################################
# %% Reading all the isotopic data
filename = "SourceFiles/Rain_SP_GW_VdN.xlsx"
df = pd.read_excel(filename, sheet_name='Sheet1')
# Separating the isotopic ratios in rain, snow and groundwater
rain_df = df[(df["Type"] == "Rain")]
snow_df = df[(df["Type"] == "Snow")]
gw_df = df[(df["Type"] == "Groundwater")]
#################################################################################################################################
# %% Initializing the model parameters
LAMBDA_params = np.random.uniform(LAMBDA_RANGE[0], LAMBDA_RANGE[1], NUMBER_ITERATIONS)
# Assuming constant error variance computed from data
LIKELIHOOD_std_params_H2 = np.full(NUMBER_ITERATIONS, np.std(gw_df["H2 isotope"].values, ddof=1))
LIKELIHOOD_std_params_O18 = np.full(NUMBER_ITERATIONS, np.std(gw_df["O18 isotope"].values, ddof=1))
# %% Running HydroMix for H2
rain, snow, gw = rain_df["H2 isotope"].values, snow_df["H2 isotope"].values, gw_df["H2 isotope"].values
LIKELIHOOD_H2, LAMBDA_H2, ErrorSTD_H2 = HydroMix(snow, rain, gw, LAMBDA_params, LIKELIHOOD_std_params_H2, NUMBER_ITERATIONS)
# %% Writing output in csv file
# Checking if directory exists, if not creating it
if not os.path.exists(OUTPUTFILEPATH):
os.makedirs(OUTPUTFILEPATH)
final_lis = [["Snow ratio", "Log likelihood", "Error std"]]
path = OUTPUTFILEPATH + "H2_results.csv"
for index in range(0, len(LIKELIHOOD_H2)):
final_lis.append([ round(LAMBDA_H2[index], 2), round(LIKELIHOOD_H2[index], 2), round(ErrorSTD_H2[index], 2) ])
csv_writer(final_lis, path)
print (path)
# %% Running HydroMix for O18
rain, snow, gw = rain_df["O18 isotope"].values, snow_df["O18 isotope"].values, gw_df["O18 isotope"].values
LIKELIHOOD_O18, LAMBDA_O18, ErrorSTD_O18 = HydroMix(snow, rain, gw, LAMBDA_params, LIKELIHOOD_std_params_O18, NUMBER_ITERATIONS)
# %% Writing output in csv file
final_lis = [["Snow ratio", "Log likelihood", "Error std"]]
path = OUTPUTFILEPATH + "O18_results.csv"
for index in range(0, len(LIKELIHOOD_O18)):
final_lis.append([ round(LAMBDA_O18[index], 2), round(LIKELIHOOD_O18[index], 2), round(ErrorSTD_O18[index], 2) ])
csv_writer(final_lis, path)
print (path)
# %% Histogram plot showing snow ratio in groundwater using H2 and O18
plt.figure()
plt.hist(LAMBDA_H2[0:int(0.01 * BEST_SIM_PER * NUMBER_ITERATIONS)], color='blue', alpha=0.4, label=r'$\delta^{2}$H' + u' \u2030 (VSMOW)', normed='True')
plt.hist(LAMBDA_O18[0:int(0.01 * BEST_SIM_PER * NUMBER_ITERATIONS)], color='red', alpha=0.4, label=r'$\delta^{18}$O' + u' \u2030 (VSMOW)', normed='True')
plt.xlim(0., 1.)
plt.grid(linestyle='dotted')
plt.xlabel("Fraction of snow in groundwater", fontsize=14)
plt.ylabel("Normalised frequency", fontsize=14)
plt.legend()
plt.tight_layout()
path = OUTPUTFILEPATH + "posterior.jpeg"
plt.savefig(path, dpi=300)
plt.close()
print (path)
#################################################################################################################################