-
Notifications
You must be signed in to change notification settings - Fork 0
/
helper.py
126 lines (91 loc) · 4.14 KB
/
helper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from sys import prefix
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
import datetime, pytz
import glob, os
from sklearn.preprocessing import LabelEncoder
excel_type =["vnd.ms-excel","vnd.openxmlformats-officedocument.spreadsheetml.sheet", "vnd.oasis.opendocument.spreadsheet", "vnd.oasis.opendocument.text"]
def data(data, file_type, seperator=None):
if file_type == "csv":
data = pd.read_csv(data)
elif file_type in excel_type:
data = pd.read_excel(data)
st.sidebar.info("If you are using Excel file so there could be chance of getting minor error.")
elif file_type == "plain":
try:
data = pd.read_table(data, sep=seperator)
except ValueError:
st.info("Enter delimiter")
return data
def download_data(data, label):
current_time = datetime.datetime.now(pytz.timezone('Asia/Kolkata'))
current_time = "{}.{}-{}-{}".format(current_time.date(), current_time.hour, current_time.minute, current_time.second)
export_data = st.download_button(
label="Download {} data as CSV".format(label),
data=data.to_csv(),
file_name='{}{}.csv'.format(label, current_time),
mime='text/csv',
help = "When You Click On Download Button You can download your {} CSV File".format(label)
)
return export_data
def describe(data):
global num_category, str_category
num_category = [feature for feature in data.columns if data[feature].dtypes != "O"]
str_category = [feature for feature in data.columns if data[feature].dtypes == "O"]
column_with_null_values = data.columns[data.isnull().any()]
return data.describe(), data.shape, data.columns, num_category, str_category, data.isnull().sum(),data.dtypes.astype("str"), data.nunique(), str_category, column_with_null_values
def outliers(data, num_category_outliers):
plt.figure(figsize=(6,2))
flierprops = dict(marker='o', markerfacecolor='purple', markersize=6,
linestyle='none', markeredgecolor='black')
path_list = []
for i in range(len(num_category_outliers)):
column = num_category_outliers[i]
plt.xlim(min(data[column]), max(data[column]))
plt.title("Checking Outliers for {} Column".format(column))
plot = sns.boxplot(x=column, flierprops=flierprops, data=data)
fig = plot.get_figure()
path = 'plots/pic{}.png'.format(i)
fig.savefig(path)
path_list.append(path)
return path_list
def label_encode(data,selected_name):
le = LabelEncoder()
arr = le.fit_transform(data[selected_name])
df = pd.DataFrame(arr, columns = [selected_name])
data[selected_name]=df
return data
def drop_items(data, selected_name):
droped = data.drop(selected_name, axis = 1)
return droped
def filter_data(data, selected_column, selected_name):
if selected_name == []:
filtered_data = data
else:
filtered_data = data[~ data[selected_column].isin(selected_name)]
return filtered_data
def num_filter_data(data, start_value, end_value, column, param):
if param == "Delete data inside the range":
if column in num_category:
num_filtered_data = data[~data[column].isin(range(int(start_value), int(end_value)+1))]
else:
if column in num_category:
num_filtered_data = data[data[column].isin(range(int(start_value), int(end_value)+1))]
return num_filtered_data
def rename_columns(data, column_names):
rename_column = data.rename(columns=column_names)
return rename_column
def handling_missing_values(data, option_type, dict_value=None):
if option_type == "Drop all null value rows":
data = data.dropna()
elif option_type == "Only Drop Rows that contanines all null values":
data = data.dropna(how="all")
elif option_type == "Filling in Missing Values":
data = data.fillna(dict_value)
return data
def clear_image_cache():
removing_files = glob.glob('plots/*.png')
for i in removing_files:
os.remove(i)