Skip to content

Commit c3fe2fb

Browse files
committed
parser on read to accept a None value for header
1 parent cf09804 commit c3fe2fb

File tree

5 files changed

+17
-12
lines changed

5 files changed

+17
-12
lines changed

language/mlsql/mlsql/functions/dataflow.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,10 @@ def _model_phase(keywords, filename, header, sep, train, predictors, label, algo
109109
=============================================
110110
=============================================""" % (filename, df.head()) )
111111

112+
df.to_csv('temp.csv')
113+
112114
# Encode all categorical values
113-
df = encode_categorical(df)
115+
# df = encode_categorical(df)
114116
#Classification and Regression and Cluster
115117
if not keywords["classify"] and not keywords["regress"] and not keywords["cluster"]:
116118
# KI: Rationale behind changining Error to Warning is that the user may

language/mlsql/mlsql/functions/keywords/preprocessing/impute_functions.py

-2
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,8 @@ def impute_missing(data, columns=None, impute_strategy='mode', missing_values='N
3333
if not cols_to_impute:
3434
return datacopy
3535
if impute_strategy == 'mode':
36-
print(cols_to_impute)
3736
for col in cols_to_impute:
3837
modeVal = data[col].mode()
39-
print(modeVal[0])
4038
datacopy[col] = _fill_col(data[col], missing_values, modeVal[0])
4139
return datacopy
4240
elif impute_strategy == 'mean':

language/mlsql/mlsql/functions/keywords/read_functions.py

+8-4
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ def handle_read(userfile, separator, header):
1313
if is_mlsql_file(userfile):
1414
model = load_model(userfile)
1515
else:
16-
return _read_data_file(userfile, separator, header)
16+
return _read_data_file(userfile, separator, header)
1717

1818

1919
def _read_data_file(userfile, separator, header):
@@ -34,7 +34,10 @@ def _read_data_file(userfile, separator, header):
3434

3535
#attempt to read file with given parameters
3636
try:
37-
df = read_csv(userfile, sep = separ, header = head)
37+
if head is None:
38+
df = read_csv(userfile, sep=separ, header=None)
39+
else:
40+
df = read_csv(userfile, sep = separ, header = head)
3841
except OSError as e:
3942
print("Error importing file: '" + userfile + "'")
4043
print(e)
@@ -46,7 +49,8 @@ def _handle_header(header):
4649
"""
4750
Translates header into a proper value to be read by read_csv functions from pandas
4851
"""
49-
if header is None or header == "":
52+
53+
if header is None or header == "" or header == "None":
5054
return None
5155
elif header == "False":
5256
return None
@@ -69,4 +73,4 @@ def _handle_separator(sep):
6973
if sep is None or sep == "":
7074
return ","
7175
else:
72-
return str(sep)
76+
return str(sep)

language/mlsql/mlsql/parser/keywords/read.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from .grammer import *
2-
from pyparsing import Word, Keyword, Optional, MatchFirst, Literal
2+
from pyparsing import Word, Keyword, Optional, MatchFirst, Literal, oneOf
33

44
def define_read():
55
filename = Word(everythingWOQuotes).setResultsName("filename")
@@ -9,8 +9,9 @@ def define_read():
99

1010
#Define Read Optionals
1111
#header
12+
Nones = oneOf('None')
1213
headerLiteral = (Literal("header") + Literal("=")).suppress()
13-
header_choices = MatchFirst([Word(numbers), bool_true, bool_false]).setResultsName("header")
14+
header_choices = MatchFirst([Word(numbers), bool_true, bool_false, Nones]).setResultsName("header")
1415
header = Optional(headerLiteral + header_choices)
1516

1617
#separator
@@ -22,5 +23,5 @@ def define_read():
2223
readOptions = Optional(openParen + separator + ocomma + header + closeParen)
2324

2425
read = readKeyword + Quote + filename + Quote + readOptions
25-
26+
2627
return read
+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import mlsql
22
from mlsql import execute
33

4-
query = 'READ "data/auto.csv" (separator = "\s+", header = 0)\
4+
query = 'READ "data/auto.csv" (separator = "\s+", header = None)\
55
REPLACE ("?", "mode") SPLIT (train = .8, test = .2, validation = .0)\
66
REGRESS (predictors = [2,3,4,5,6,7,8], label = 1, algorithm = simple)'
77

8-
execute(query, verbose=True)
8+
execute(query, verbose=False)

0 commit comments

Comments
 (0)