forked from AlaieT/pytorch-spice-cnn
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.py
53 lines (37 loc) · 1.78 KB
/
analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
__author__ = 'Alaie Titor'
from typing import *
import argparse
import pandas as pd
import numpy as np
import os
def generated_data_analysis(path: str, threshold: Union[None, str], out: Union[None, str]):
df = pd.read_csv(path)
new_df = [] if out != None and threshold != None else None
targets = df.iloc[:, 1]
sources = df.iloc[:, 0]
print(f'\nData length: {df.shape[0]}')
diff = np.array([])
for source, target in zip(sources, targets):
if os.path.exists(target):
df_target = pd.read_csv(target)
values = df_target.iloc[:, 1]
if np.max(values) != 0:
curretn_diff = 1 - values[values > 0]/np.max(values)
diff = np.append(diff, curretn_diff)
if(new_df != None and np.max(curretn_diff) < 0.11):
new_df.append([source, target])
else:
print("Found negative numbers: ", target)
print(f'Diff - mean: {np.mean(diff)*100:.8}% max: {np.max(diff)*100:.8}% min: {np.min(diff)*100:.8}%')
if new_df:
pd.DataFrame(data=new_df, columns=["Source", "Target"]).to_csv(out, index=False)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Anlaysis irdrop of dataset(min, mean and max values)')
parser.add_argument('-p', '--path', default='./assets/train.csv', help='Path to .csv of dataset.')
parser.add_argument('-t', '--threshold', default=None, type=float, help='The threshold hold value for dataset filter.')
parser.add_argument('-o', '--out', default=None, help='Out path for filtered dataset')
namespace = parser.parse_args()
path = namespace.path
threshold = float(namespace.threshold)
out = namespace.out
generated_data_analysis(path, threshold, out)