Skip to content

Commit 7e04cf6

Browse files
Testing accuracy of surgeo approximations
1 parent 832b9a1 commit 7e04cf6

1 file changed

Lines changed: 52 additions & 0 deletions

File tree

bisg/surgeoAccuracyTest.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# Surgeo accuracy analysis using collected demographic data
2+
import numpy as np
3+
import pandas as pd
4+
import surgeo
5+
6+
# Tree plenish event demographic responses... pre-downloaded
7+
df = pd.read_csv("/home/justinmiller/Documents/Data/tpEventDemographic.csv")
8+
9+
names = df[df.columns[2]].values
10+
last_names = [name.split(" ")[-1] if name.split(" ")[-1] else name.split(" ")[-2] for name in names]
11+
12+
fsg = surgeo.SurnameModel()
13+
14+
fsg_results = fsg.get_probabilities(pd.Series(last_names))
15+
print(str(len(fsg_results)) + " Total Entries")
16+
17+
df = pd.concat([df, fsg_results], axis = 1)
18+
19+
df = df.dropna(subset = ['white'])
20+
print(str(len(df)) + " Usable Surnames")
21+
22+
pd.set_option('display.max_columns', None)
23+
print(df)
24+
25+
# Mapping keys for comparison row by row
26+
r = 5 # Column with race question
27+
keys = np.unique(df[df.columns[r]]) # Race row
28+
keyMap = {}
29+
keyMap[keys[0]] = "api"
30+
keyMap[keys[1]] = "black"
31+
keyMap[keys[2]] = "hispanic"
32+
keyMap[keys[3]] = "multiple"
33+
keyMap[keys[6]] = "white"
34+
35+
# Looping through to find percent estimate of actual race
36+
probList = np.array([])
37+
38+
for i in df.index:
39+
40+
race = df[df.columns[r]][i]
41+
try:
42+
probability = df[keyMap[race]][i]
43+
probList = np.append(probList, probability)
44+
except:
45+
probList = np.append(probList, np.nan)
46+
print("The user answered: " + race)
47+
48+
49+
# Adding problist back onto df
50+
df = pd.concat([df, pd.DataFrame(probList, index = df.index)], axis = 1)
51+
df.columns[-1] = "probability"
52+

0 commit comments

Comments
 (0)