You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
parser.add_option('-t','--train', dest='training_set', help='training set name')
15
+
parser.add_option('-k','--test', dest='test_set', help='test set name')
10
16
11
17
(options, args) =parser.parse_args()
12
18
13
19
ifoptions.gold_standard_nameisNone:
14
20
options.gold_standard_name=raw_input('Enter gold standard file name:')
15
21
22
+
ifoptions.training_setisNone:
23
+
options.training_set=options.gold_standard_name
24
+
25
+
training_set=options.training_set
26
+
16
27
gold_standard_name=options.gold_standard_name
17
28
18
-
data=pd.read_csv(gold_standard_name)
29
+
test_set=options.test_set
30
+
31
+
#READ DATA
32
+
33
+
gs_ml_whole=pd.read_csv(gold_standard_name) #WHOLE GS IN ML FORMAT
34
+
35
+
train_duke=pd.read_csv(training_set) #TRAINING SET IN DUKE FORMAT
36
+
37
+
test_duke=pd.read_csv(test_set) #TEST SET IN DUKE FORMAT
38
+
39
+
#define the IDS to use for testing and select test set in ML format
40
+
41
+
ids_1_test=test_duke.values[:,1]
42
+
43
+
ids_2_test=test_duke.values[:,2]
19
44
20
-
X=data.values[:,2:-1] #x variables, the last one is the y
21
-
y=np.array(data['y']) #class variables
45
+
test_ml=gs_ml_whole[(gs_ml_whole.FFIEC_ID.isin(ids_1_test)) & (gs_ml_whole.SEC_ID.isin(ids_2_test))] #those that are in the test data and have the shape of the 1,1,1,0,0,1
22
46
23
-
#print X,y
47
+
#define the IDS to use for train and select training set in ML format
0 commit comments