generated from CDCgov/template
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathExample-eMKF-Simulated-Mortality-Data.sas
331 lines (301 loc) · 13.1 KB
/
Example-eMKF-Simulated-Mortality-Data.sas
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
/*
* This file illustrates the enhanced Modified Kalman Filter (MKF) macro using simulated state-level mortality data.
*
* Annual state-level data were queried using CDC WONDER for the 21-year period 1999-2000. Data were tabulated by:
* - Age group (< 1 year, 1-4, 5-14, 15-24, 25-34, 35-44, 45-54, 55-64, 65-74, 75-84, 85 years and over)
* - Race (American Indian or Alaska Native; Asian or Pacific Islander; Black or African American; and White)
* - Hispanic origin (Hispanic or Latino and not Hispanic or Latino)
*
* States with numerator case counts <10 for selected combinations of year, age, and race and Hispanic origin,
* are suppressed in CDC WONDER due to NCHS confidentiality protection rules. Missing cell case counts were
* simulated/imputed, holding fixed the marginal count by state, year, age, and race and Hispanic origin.
* A similar strategy was used with county-level data in Talih et al (2022), Population Health Metrics DOI: 10.1186/s12963-022-00288-1
*
* Technical guidance for using the enhanced MKF macro is available from:
*
* Talih M, Rossen LM, Patel P, Earp M, Parker JD. Technical Guidance for Using the Modified Kalman Filter
* in Small Domain Estimation at the National Center for Health Statistics. National Center for Health Statistics.
* Vital Health Stat 2(209). 2024. DOI: 10.15620/cdc:157496.
*
* Main methodological differences between the enhanced and earlier MKF macros are described in README.md.
*/
/* Specify the directory path:
* The macro is assumed to be saved in a user directory called: ..\eMKF\MKFmacro
* The data is assumed to be in user directory: ..\eMKF\MKFdata
*/
%let user_path = \\..; /* Replace \\.. with the applicable path */
/* Define the data library */
libname sdata "&user_path\eMKF\MKFdata";
/* State-level crude and age-adjusted mortality data from external causes (V01-Y89) */
data ExternalCauses ;
%let _EFIERR_ = 0;
infile "&user_path\eMKF\MKFdata\Underlying Cause of Death, 1999-2020 States V01-Y89 by Race and Ethnicity.csv"
delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
informat Cause $7. ; /* Cause of death code */
informat Year_Code best32.; /* Year (numeric) */
informat State $20. ; /* State name */
informat State_Code best32. ; /* State code (numeric) */
informat Population_Group $56. ; /* Race and Hispanic origin */
informat Deaths best32. ; /* Number of deaths (simulated/imputed if < 10) */
informat Population_Size best32. ; /* Population at risk */
informat Crude_Rate best32. ; /* Crude rate, per 100,000 */
informat Crude_SE best32. ; /* SE for crude rate, calculating assuming Poisson number of deaths */
informat Age_Adjusted_Rate best32. ;/* Age-adjusted rate, per 100,000 - see https://wonder.cdc.gov/wonder/help/ucd-expanded.html */
informat Age_Adjusted_SE best32. ; /* SE for age-adjusted rate, calculating assuming Poisson number of deaths */
format Cause $7. ;
format Year_Code best16. ;
format State $20. ;
format State_Code best16. ;
format Population_Group $56. ;
format Deaths best16. ;
format Population_Size best16. ;
format Crude_Rate best16. ;
format Crude_SE best16. ;
format Age_Adjusted_Rate best16. ;
format Age_Adjusted_SE best16. ;
input
Cause $
Year_Code
State $
State_Code
Population_Group $
Deaths
Population_Size
Crude_Rate
Crude_SE
Age_Adjusted_Rate
Age_Adjusted_SE
;
if _ERROR_ then call symputx('_EFIERR_',1);
run;
/* State-level age-specific mortality data from external causes (V01-Y89) */
data ExternalCausesByAge ;
%let _EFIERR_ = 0;
infile "&user_path\eMKF\MKFdata\Underlying Cause of Death, 1999-2020 States V01-Y89 by Race, Ethnicity, and Age.csv"
delimiter = ',' MISSOVER DSD lrecl=32767 firstobs=2 ;
informat Cause $7. ; /* Cause of death code */
informat Year_Code best32.; /* Year (numeric) */
informat State $20. ; /* State name */
informat State_Code best32. ; /* State code (numeric) */
informat Population_Group $56. ; /* Race and Hispanic origin */
informat Age_Group $11. ; /* Age group */
informat Deaths best32. ; /* Number of deaths (simulated/imputed if < 10) */
informat Population_Size best32. ; /* Population at risk */
informat Age_Specific_Rate best32. ;/* Age-specific rate, per 100,000 */
informat Age_Specific_SE best32. ; /* SE for age-specific rate, calculating assuming Poisson number of deaths */
format Cause $7. ;
format Year_Code best16. ;
format State $20. ;
format State_Code best16. ;
format Population_Group $56. ;
format Age_Group $11. ;
format Deaths best16. ;
format Population_Size best16. ;
format Age_Specific_Rate best16. ;
format Age_Specific_SE best16. ;
input
Cause $
Year_Code
State $
State_Code
Population_Group $
Age_Group $
Deaths
Population_Size
Age_Specific_Rate
Age_Specific_SE
;
if _ERROR_ then call symputx('_EFIERR_',1);
run;
/* Remove "All" from the population groups and abbreviate */
data ExternalCauses;
set ExternalCauses;
if Population_Group ^= "All";
if Population_Group = "Hispanic or Latino" then Population_Group = "Hispanic";
if Population_Group = "American Indian or Alaska Native, Not Hispanic or Latino" then Population_Group = "AIAN, NH";
if Population_Group = "Asian or Pacific Islander, Not Hispanic or Latino" then Population_Group = "API, NH";
if Population_Group = "Black or African American, Not Hispanic or Latino" then Population_Group = "B, NH";
if Population_Group = "White, Not Hispanic or Latino" then Population_Group = "W, NH";
rename Population_Group = Population Year_Code = Year;
run;
data ExternalCausesByAge;
set ExternalCausesByAge;
if Population_Group ^= "All";
if Population_Group = "Hispanic or Latino" then Population_Group = "Hispanic";
if Population_Group = "American Indian or Alaska Native, Not Hispanic or Latino" then Population_Group = "AIAN, NH";
if Population_Group = "Asian or Pacific Islander, Not Hispanic or Latino" then Population_Group = "API, NH";
if Population_Group = "Black or African American, Not Hispanic or Latino" then Population_Group = "B, NH";
if Population_Group = "White, Not Hispanic or Latino" then Population_Group = "W, NH";
rename Population_Group = Population Year_Code = Year Age_Group = Age;
run;
/* For age-specific rates, create r-e by age grouping to use in model */
data ExternalCausesByAge;
set ExternalCausesByAge;
PopByAge = catx(":", Population, Age);
run;
/* Print 20-line previews of the datasets */
proc print data=ExternalCauses(obs=20);
run;
proc print data=ExternalCausesByAge(obs=20);
run;
/* Compile the enhanced Modified Kalman Filter macro (eMKF) */
%include "&user_path\eMKF\MKFmacro\emkf_macro.sas";
/*******************************************************************************/
/* Age-adjusted data stratified by state (borrowing strength across r-e groups */
/*******************************************************************************/
/* Bayesian model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Bayesian model averaging for up to cubic trends ";
%mkf(data = ExternalCauses,
group = Population,
time = Year,
by = State,
outcome = Age_Adjusted_Rate,
se = Age_Adjusted_SE,
randomVars = NO, /* over-ride default due to underlying Poisson model for deaths whereby mean = variance */
Bayesmodel = bma_cubic,
out = bmac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/* Maximum likelihood-based model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Maximum likelihood-based model averaging with up to cubic trends ";
%mkf(data = ExternalCauses,
group = Population,
time = Year,
by = State,
outcome = Age_Adjusted_Rate,
se = Age_Adjusted_SE,
Bayesmodel = ,
slopes = indep_cubic indep_quad indep_linear
common_cubic common_quad common_linear
dropped,
out = mac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/*******************************************************************************/
/* Age-adjusted data stratified by r-e group (borrowing strength across states */
/*******************************************************************************/
/* Bayesian model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Bayesian model averaging for up to cubic trends ";
%mkf(data = ExternalCauses,
group = State,
time = Year,
by = Population,
outcome = Age_Adjusted_Rate,
se = Age_Adjusted_SE,
randomVars = NO, /* over-ride default due to underlying Poisson model for deaths whereby mean = variance */
Bayesmodel = bma_cubic,
out = bmac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/* Maximum likelihood-based model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Maximum likelihood-based model averaging with up to cubic trends ";
%mkf(data = ExternalCauses,
group = State,
time = Year,
by = Population,
outcome = Age_Adjusted_Rate,
se = Age_Adjusted_SE,
Bayesmodel = ,
slopes = indep_cubic indep_quad indep_linear
common_cubic common_quad common_linear
dropped,
out = mac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/************************************************************************************/
/* Age-specific data stratified by state (borrowing strength across PopByAge groups */
/* Running the below code is not recommended. Each model may take 40 hours or more. */
/************************************************************************************/
/*
/* Bayesian model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Bayesian model averaging for up to cubic trends ";
%mkf(data = ExternalCausesByAge,
group = PopByAge,
time = Year,
by = State,
outcome = Age_Specific_Rate,
se = Age_Specific_SE,
randomVars = NO, /* over-ride default due to underlying Poisson model for deaths whereby mean = variance */
Bayesmodel = bma_cubic,
out = bmac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/* Maximum likelihood-based model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Maximum likelihood-based model averaging with up to cubic trends ";
%mkf(data = ExternalCausesByAge,
group = PopByAge,
time = Year,
by = State,
outcome = Age_Specific_Rate,
se = Age_Specific_SE,
Bayesmodel = ,
slopes = /*indep_cubic indep_quad indep_linear
common_cubic common_quad common_linear */
dropped,
out = mac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/************************************************************************************/
/* Age-specific data stratified by PopByAge group (borrowing strength across states */
/************************************************************************************/
/* Bayesian model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Bayesian model averaging for up to cubic trends ";
%mkf(data = ExternalCausesByAge,
group = State,
time = Year,
by = PopByAge,
outcome = Age_Specific_Rate,
se = Age_Specific_SE,
randomVars = NO, /* over-ride default due to underlying Poisson model for deaths whereby mean = variance */
Bayesmodel = bma_cubic,
out = bmac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;
/* Maximum likelihood-based model averaging estimation */
%let _timer_start = %sysfunc(datetime()); /* start timer */
title "Enhanced MKF. One outcome: Maximum likelihood-based model averaging with up to cubic trends ";
%mkf(data = ExternalCausesByAge,
group = State,
time = Year,
by = PopByAge,
outcome = Age_Specific_Rate,
se = Age_Specific_SE,
Bayesmodel = ,
slopes = indep_cubic indep_quad indep_linear
common_cubic common_quad common_linear
dropped,
out = mac
);
data _null_;
dur = datetime() - &_timer_start; /* stop timer */
*/
put 30*'-' / ' TOTAL DURATION:' dur time13.2 / 30*'-';
run;