-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathVetUnemploy.py
246 lines (202 loc) · 10.6 KB
/
VetUnemploy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
import os
import requests
import json
import openpyxl
from openpyxl.utils import get_column_letter
from openpyxl.chart import LineChart, BarChart, PieChart, Reference, ScatterChart, Series
from dotenv import load_dotenv
from scipy.stats import pearsonr
# Load environment variables from .env file
load_dotenv()
# Retrieve API keys from environment variables
bls_api_key = os.getenv("BLS_API_KEY")
census_api_key = os.getenv("CENSUS_API_KEY")
# BLS API Configuration
bls_series_ids = {
"national_unemployment_rate": "LNU04000000", # National unemployment rate
"veteran_unemployment_rate": "LNU04049526" # Veteran unemployment rate
}
bls_start_year = "2021"
bls_end_year = "2021"
# Census API Configuration
census_base_url = "https://api.census.gov/data/2021/acs/acs5"
census_params = {
"get": "B21005_001E,B21005_002E,B21005_003E,B21005_007E,B21001_002E,B21001_003E,B21001_023E,B21001_024E,"
"B21005_010E,B21005_012E,B21005_011E,B21005_009E,B23003_014E",
"for": "us:*", # National data
"key": census_api_key
}
# Mapping of Census variables to their definitions
census_variable_map = {
"B21005_001E": "Total Civilian Population 18 Years and Over",
"B21005_002E": "Civilian Population 18 Years and Over, Male",
"B21005_003E": "Total Military Population 18 Years and Over",
"B21005_007E": "Unemployed Veterans, Civilian Population 18 Years and Over",
"B21001_002E": "Total Male Population 18 Years and Over",
"B21001_003E": "Total Female Population 18 Years and Over",
"B21001_023E": "Total Male Veterans 18 Years and Over",
"B21001_024E": "Total Female Veterans 18 Years and Over",
"B21005_010E": "Nonveteran, 18 to 34 years, Unemployed",
"B21005_012E": "Nonveteran, 18 to 34 years, Not in Labor Force",
"B21005_011E": "Nonveteran, 18 to 34 years, Employed",
"B21005_009E": "Nonveteran, 18 to 34 years, Total",
"B23003_014E": "Presence of Own Children Under 18 Years by Age of Householder"
}
def fetch_bls_data():
"""Fetch data from the BLS API."""
headers = {'Content-type': 'application/json'}
bls_data = {}
for label, series_id in bls_series_ids.items():
data = json.dumps({
"seriesid": [series_id],
"startyear": bls_start_year,
"endyear": bls_end_year,
"registrationkey": bls_api_key
})
response = requests.post('https://api.bls.gov/publicAPI/v2/timeseries/data/', data=data, headers=headers)
bls_data[label] = json.loads(response.text)
return bls_data
def fetch_census_data():
"""Fetch data from the Census API."""
response = requests.get(census_base_url, params=census_params)
return response.json()
def calculate_veteran_unemployment_rate(census_data):
"""Calculate the veteran unemployment rate."""
total_veteran_population = int(census_data[1][6]) + int(census_data[1][7]) # B21001_023E + B21001_024E
unemployed_veterans = int(census_data[1][3]) # B21005_007E: Unemployed Veterans
veteran_unemployment_rate = (unemployed_veterans / total_veteran_population) * 100
return veteran_unemployment_rate
def calculate_civilian_unemployment_rate(census_data):
"""Calculate the civilian unemployment rate."""
total_civilian_population = int(census_data[1][0]) # B21005_001E: Total Civilian Population 18 Years and Over
unemployed_veterans = int(census_data[1][3]) # B21005_007E: Unemployed Veterans
civilian_unemployment_rate = (unemployed_veterans / total_civilian_population) * 100
return civilian_unemployment_rate
def calculate_correlation_analysis(national_rates, veteran_rates):
"""Calculate Pearson correlation between national and veteran unemployment rates."""
correlation, _ = pearsonr(national_rates, veteran_rates)
return correlation
# Function to add charts to the Excel workbook
def add_charts(workbook):
"""Add various charts to the Excel workbook for data visualization."""
# Line Chart: National Unemployment Rate vs Veteran Unemployment Rate
sheet4 = workbook["Trend Analysis"]
line_chart = LineChart()
line_chart.title = "Unemployment Rate Trend Analysis"
line_chart.x_axis.title = "Month"
line_chart.y_axis.title = "Unemployment Rate"
data = Reference(sheet4, min_col=2, min_row=1, max_col=3, max_row=len(sheet4['A']))
categories = Reference(sheet4, min_col=1, min_row=2, max_row=len(sheet4['A']))
line_chart.add_data(data, titles_from_data=True)
line_chart.set_categories(categories)
sheet4.add_chart(line_chart, "E5")
# Scatter Plot: Correlation Analysis
scatter_chart = ScatterChart()
scatter_chart.title = "Correlation Between National and Veteran Unemployment Rates"
scatter_chart.x_axis.title = "National Unemployment Rate"
scatter_chart.y_axis.title = "Veteran Unemployment Rate"
xvalues = Reference(sheet4, min_col=2, min_row=2, max_row=len(sheet4['A']))
yvalues = Reference(sheet4, min_col=3, min_row=2, max_row=len(sheet4['A']))
series = Series(yvalues, xvalues, title_from_data=True)
scatter_chart.series.append(series)
sheet4.add_chart(scatter_chart, "L5")
def write_to_excel(bls_data, census_data, veteran_unemployment_rate, civilian_unemployment_rate, correlation, filename):
"""Write BLS and Census data to an Excel file using openpyxl."""
# Create a new Excel workbook
workbook = openpyxl.Workbook()
# Write BLS National Unemployment Rate
sheet1 = workbook.active
sheet1.title = "National Unemployment Rate"
headers = ["Year", "Month", "Unemployment Rate"]
sheet1.append(headers)
national_rates = []
veteran_rates = []
for item in bls_data['national_unemployment_rate']['Results']['series'][0]['data']:
year = item['year']
period_name = item['periodName']
value = item['value']
sheet1.append([year, period_name, value])
national_rates.append(float(value))
# Auto-size columns based on content
for col in sheet1.columns:
max_length = max(len(str(cell.value)) for cell in col)
adjusted_width = (max_length + 2)
sheet1.column_dimensions[get_column_letter(col[0].column)].width = adjusted_width
# Write BLS Veteran Unemployment Rate
sheet2 = workbook.create_sheet(title="Veteran Unemployment Rate")
sheet2.append(headers)
for item in bls_data['veteran_unemployment_rate']['Results']['series'][0]['data']:
year = item['year']
period_name = item['periodName']
value = item['value']
sheet2.append([year, period_name, value])
veteran_rates.append(float(value))
# Auto-size columns based on content
for col in sheet2.columns:
max_length = max(len(str(cell.value)) for cell in col)
adjusted_width = (max_length + 2)
sheet2.column_dimensions[get_column_letter(col[0].column)].width = adjusted_width
# Write Census Data on Veteran Employment with clear meanings (Pivoted Table)
sheet3 = workbook.create_sheet(title="Census Veteran Data (2021)")
# Add header row
sheet3.append(["Population", "Amount"])
# Add each demographic data as a row
for i, var in enumerate(census_data[0]):
description = census_variable_map.get(var, var)
value = census_data[1][i]
sheet3.append([description, value])
# Auto-size columns based on content
for col in sheet3.columns:
max_length = max(len(str(cell.value)) for cell in col)
adjusted_width = (max_length + 2)
sheet3.column_dimensions[get_column_letter(col[0].column)].width = adjusted_width
# Write Trend Analysis comparing National vs Veteran Unemployment Rates
sheet4 = workbook.create_sheet(title="Trend Analysis")
sheet4.append(["Month", "National Unemployment Rate", "Veteran Unemployment Rate"])
# Assuming the data from BLS for both national and veteran unemployment is synchronized by month
for i in range(len(national_rates)):
month = bls_data['national_unemployment_rate']['Results']['series'][0]['data'][i]['periodName']
national_rate = national_rates[i]
veteran_rate = veteran_rates[i]
sheet4.append([month, national_rate, veteran_rate])
# Auto-size columns based on content
for col in sheet4.columns:
max_length = max(len(str(cell.value)) for cell in col)
adjusted_width = (max_length + 2)
sheet4.column_dimensions[get_column_letter(col[0].column)].width = adjusted_width
sheet4.append(["Correlation between National and Veteran Unemployment Rates", correlation])
# Write Civilian and Veteran Unemployment Rates
sheet5 = workbook.create_sheet(title="Unemployment Rate Summary")
sheet5.append(["Category", "Unemployment Rate"])
sheet5.append(["Civilian Population", civilian_unemployment_rate])
sheet5.append(["Veteran Population", veteran_unemployment_rate])
# Auto-size columns based on content
for sheet in [sheet4, sheet5]:
for col in sheet.columns:
max_length = max(len(str(cell.value)) for cell in col)
adjusted_width = (max_length + 2)
sheet.column_dimensions[get_column_letter(col[0].column)].width = adjusted_width
# Add charts to the workbook
add_charts(workbook)
# Save the workbook to the specified location
workbook.save(filename)
print(f"Data has been written to {filename}")
def main():
print("Fetching data from BLS API...")
bls_data = fetch_bls_data()
print("Fetching data from Census API...")
census_data = fetch_census_data()
print("Calculating Veteran Unemployment Rate...")
veteran_unemployment_rate = calculate_veteran_unemployment_rate(census_data)
print("Calculating Civilian Unemployment Rate...")
civilian_unemployment_rate = calculate_civilian_unemployment_rate(census_data)
print("Calculating Correlation Analysis...")
national_rates = [float(item['value']) for item in bls_data['national_unemployment_rate']['Results']['series'][0]['data']]
veteran_rates = [float(item['value']) for item in bls_data['veteran_unemployment_rate']['Results']['series'][0]['data']]
correlation = calculate_correlation_analysis(national_rates, veteran_rates)
# Specify the full path where the file should be saved
file_path = "/Users/sh1vam5harma/Documents/Vets in Tech Python Course/new team/ViT project workbook.xlsx"
print("Writing data to Excel...")
write_to_excel(bls_data, census_data, veteran_unemployment_rate, civilian_unemployment_rate, correlation, file_path)
if __name__ == "__main__":
main()