Skip to content

Commit 9092d1c

Browse files
committed
indentation
1 parent 9705dfa commit 9092d1c

File tree

1 file changed

+34
-38
lines changed

1 file changed

+34
-38
lines changed

language/mlsql/mlsql/functions/keywords/preprocessing/encode_functions.py

+34-38
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from sklearn.base import BaseEstimator, TransformerMixin
55
from sklearn.feature_extraction import DictVectorizer
66

7-
# Encodes Categorical variables to be numerical values
8-
# Usage:
7+
# Encodes Categorical variables to be numerical values
8+
# Usage:
99
# encoder = EncodeCategorical()
1010
# encoder.fit_transform(data)
1111
# For now, do not use since this does not work well with python3
@@ -37,28 +37,28 @@
3737
May need runtime improvements
3838
Parameters:
3939
df: Dataframe to encode
40-
cols: Columns to encode. If None, then encode all object columns
40+
cols: Columns to encode. If None, then encode all object columns
4141
Returns:
4242
1 Dimensionally encoded dataframe
4343
"""
4444
def encode_categorical(df, cols=None):
45-
categorical = list()
46-
if cols is not None:
47-
categorical = cols
48-
else:
49-
for col in df.columns:
50-
if df[col].dtype == 'object':
51-
categorical.append(col)
45+
categorical = list()
46+
if cols is not None:
47+
categorical = cols
48+
else:
49+
for col in df.columns:
50+
if df[col].dtype == 'object':
51+
categorical.append(col)
5252

53-
for feature in categorical:
54-
l = list(df[feature])
55-
s = set(l)
56-
l2 = list(s)
57-
numbers = list()
58-
for i in range(0,len(l2)):
59-
numbers.append(i)
60-
df[feature] = df[feature].replace(l2, numbers)
61-
return df
53+
for feature in categorical:
54+
l = list(df[feature])
55+
s = set(l)
56+
l2 = list(s)
57+
numbers = list()
58+
for i in range(0,len(l2)):
59+
numbers.append(i)
60+
df[feature] = df[feature].replace(l2, numbers)
61+
return df
6262

6363
"""
6464
encode_onehot()
@@ -67,27 +67,23 @@ def encode_categorical(df, cols=None):
6767
https://gist.github.com/ramhiser/982ce339d5f8c9a769a0
6868
Parameters:
6969
df: Dataframe to encode
70-
cols: Columns to encode. If None, then encode all object columns
70+
cols: Columns to encode. If None, then encode all object columns
7171
Returns:
7272
1 Hot encoded dataframe
7373
"""
7474
def encode_onehot(df, cols=None):
75-
categorical = list()
76-
if cols is not None:
77-
categorical = cols
78-
else:
79-
for feature in df.columns:
80-
if df[feature].dtype == 'object':
81-
categorical.append(feature)
82-
83-
vec = DictVectorizer()
84-
vec_data = pd.DataFrame(vec.fit_transform(df[cols].to_dict(outtype='records')).toarray())
85-
vec_data.columns = vec.get_feature_names()
86-
vec_data.index = df.index
87-
88-
df = df.drop(cols, axis=1)
89-
df = df.join(vec_data)
90-
return df
91-
92-
75+
categorical = list()
76+
if cols is not None:
77+
categorical = cols
78+
else:
79+
for feature in df.columns:
80+
if df[feature].dtype == 'object':
81+
categorical.append(feature)
82+
vec = DictVectorizer()
83+
vec_data = pd.DataFrame(vec.fit_transform(df[cols].to_dict(outtype='records')).toarray())
84+
vec_data.columns = vec.get_feature_names()
85+
vec_data.index = df.index
9386

87+
df = df.drop(cols, axis=1)
88+
df = df.join(vec_data)
89+
return df

0 commit comments

Comments
 (0)