Skip to content

Commit

Permalink
test: hive complete data types' check and unit tests (#160)
Browse files Browse the repository at this point in the history
* feat: add Authorization SQL and update syntax file

* test: hive complete data types' check and unit tests

* feat: hive add syntax complete automaticlly

* feat: update hive's syntax complete

feat:update hive's syntax complete

---------

Co-authored-by: zhaoge <>
  • Loading branch information
Cythia828 authored Oct 8, 2023
1 parent b8c47d0 commit 0a9a7d1
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 40 deletions.
10 changes: 9 additions & 1 deletion src/parser/common/basic-parser-types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,15 @@ export enum SyntaxContextType {
/** table name path, such as catalog.db.tb */
TABLE = 'table',
/** table name path will be created */
TABLE_CREATE = 'tableCreate'
TABLE_CREATE = 'tableCreate',
/** view name */
VIEW = 'view',
/** function name */
FUNCTION = 'function',
/** principal name */
PRINCIPAL = 'principal',
/** hint arg name */
HTNTARG = 'hintArg',
}

export interface WordRange {
Expand Down
93 changes: 83 additions & 10 deletions src/parser/hive.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import { Token } from 'antlr4ts';
import { CandidatesCollection } from 'antlr4-c3';
import { HiveSqlLexer } from '../lib/hive/HiveSqlLexer';
import { HiveSqlParser, ProgramContext } from '../lib/hive/HiveSqlParser';
import { HiveSqlParser, ProgramContext, StatementContext, ExplainStatementContext, ExecStatementContext } from '../lib/hive/HiveSqlParser';
import BasicParser from './common/basicParser';
import { Suggestions } from './common/basic-parser-types';
import { HiveSqlParserListener } from '../lib/hive/HiveSqlParserListener';
import { SyntaxContextType, Suggestions, SyntaxSuggestion } from './common/basic-parser-types';


export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, HiveSqlParser> {
Expand All @@ -16,21 +17,93 @@ export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, H
return new HiveSqlParser(tokenStream);
}

protected preferredRules: Set<number> = new Set([
HiveSqlParser.RULE_tableName, // table name
HiveSqlParser.RULE_viewName, // view name
HiveSqlParser.RULE_functionIdentifier, // function name
HiveSqlParser.RULE_principalIdentifier, // USER/ROLE/GROUP name
HiveSqlParser.RULE_hintArgName, // hint name
]);

protected get splitListener () {
return null as any;
return new HiveSqlSplitListener();
}

protected preferredRules: Set<number> = new Set();

protected processCandidates(
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number
candidates: CandidatesCollection,
allTokens: Token[],
caretTokenIndex: number,
tokenIndexOffset: number,
): Suggestions<Token> {
const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
const keywords: string[] = [];

for (let candidate of candidates.rules) {
const [ruleType, candidateRule] = candidate;
const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + tokenIndexOffset + 1);

let syntaxContextType: SyntaxContextType;
switch (ruleType) {
case HiveSqlParser.RULE_tableName: {
syntaxContextType = SyntaxContextType.TABLE;
break;
}
case HiveSqlParser.RULE_viewName: {
syntaxContextType = SyntaxContextType.VIEW;
break;
}
case HiveSqlParser.RULE_functionIdentifier: {
syntaxContextType = SyntaxContextType.FUNCTION;
break;
}
case HiveSqlParser.RULE_principalIdentifier: {
syntaxContextType = SyntaxContextType.PRINCIPAL;
break;
}
case HiveSqlParser.RULE_hintArgName: {
syntaxContextType = SyntaxContextType.HTNTARG;
break;
}
default:
break;
}

if (syntaxContextType) {
originalSyntaxSuggestions.push({
syntaxContextType,
wordRanges: tokenRanges,
});
}
}

for (let candidate of candidates.tokens) {
const symbolicName = this._parser.vocabulary.getSymbolicName(candidate[0]);
const displayName = this._parser.vocabulary.getDisplayName(candidate[0]);
if (symbolicName && symbolicName.startsWith('KW_')) {
const keyword = displayName.startsWith("'") && displayName.endsWith("'") ? displayName.slice(1, -1) : displayName;
keywords.push(keyword);
}
}
return {
syntax: [],
keywords: []
}
keywords: [],
};
}
}

export class HiveSqlSplitListener implements HiveSqlParserListener {
private _statementContext: StatementContext[] = [];

exitStatement = (ctx: StatementContext) => {
this._statementContext.push(ctx);
}

enterStatement = (ctx: StatementContext) => {
};

get statementsContext() {
return this._statementContext;
}
}

4 changes: 2 additions & 2 deletions test/parser/hive/listener.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ import HiveSQL from '../../../src/parser/hive';
describe('HiveSQL Listener Tests', () => {
const parser = new HiveSQL();
test('Listener enterSelectList', async () => {
const expectTableName = 'userName';
const sql = `select ${expectTableName} from user1 where inc_day='20190601' limit 1000;`;
const expectTableName = 'username';
const sql = `select ${expectTableName} from tablename where inc_day='20190601' limit 1000;`;
const parserTree = parser.parse(sql);

let result = '';
Expand Down
20 changes: 0 additions & 20 deletions test/parser/hive/syntax.test.ts

This file was deleted.

16 changes: 16 additions & 0 deletions test/parser/hive/syntax/dataTypesStatement.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import HiveSQL from '../../../../src/parser/hive';
import { readSQL } from '../../../helper';

const parser = new HiveSQL();

const features = {
dataTypes: readSQL(__dirname, 'dataTypes.sql'),
};

describe('HiveSQL Check Data Types Tests', () => {
features.dataTypes.forEach((dataType) => {
it(dataType, () => {
expect(parser.validate(dataType).length).toBe(0);
});
});
});
14 changes: 7 additions & 7 deletions test/parser/hive/syntax/fixtures/createTable.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,31 +28,31 @@ CREATE TEMPORARY EXTERNAL TABLE page_view(
page_url STRING,
referrer_url STRING,
ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE;
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS TEXTFILE;

CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
viewTime INT,
userid BIGINT,
page_url STRING,
referrer_url STRING,
ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE;
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS RCFILE;

CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
viewTime INT,
userid BIGINT,
page_url STRING,
referrer_url STRING,
ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE LOCATION '/hsd_path';
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS ORC LOCATION '/hsd_path';

CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
viewTime INT,
userid BIGINT,
page_url STRING,
referrer_url STRING,
ip STRING COMMENT 'IP Address of the User'
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE LOCATION '/hsd_path' AS
) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS PARQUET LOCATION '/hsd_path' AS
SELECT
(key % 1024) new_key,
concat(key, value) key_value_pair
Expand All @@ -62,9 +62,9 @@ FROM


CREATE TABLE list_bucket_single (key STRING, value STRING)
SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES;
SKEWED BY (key) ON (1,5,6) STORED AS AVRO;

CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS ORC;
CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS INPUTFORMAT 'inputfilename' OUTPUTFORMAT 'outputfilename';

CREATE TABLE IF NOT EXISTS copy_table LIKE origin_table;

Expand Down Expand Up @@ -104,4 +104,4 @@ CREATE MANAGED TABLE managed_table (
name STRING COMMENT '名称'
) COMMENT '测试分桶' CLUSTERED BY(id) SORTED BY (id) INTO 4 BUCKETS STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler';

CREATE TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS DIRECTORIES;
CREATE TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS JSONFILE;
45 changes: 45 additions & 0 deletions test/parser/hive/syntax/fixtures/dataTypes.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
-- TINYINT
-- | SMALLINT
-- | INT
-- | BIGINT
-- | BOOLEAN
-- | FLOAT
-- | DOUBLE
-- | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
-- | STRING
-- | BINARY -- (Note: Available in Hive 0.8.0 and later)
-- | TIMESTAMP -- (Note: Available in Hive 0.8.0 and later)
-- | DECIMAL -- (Note: Available in Hive 0.11.0 and later)
-- | DECIMAL(precision, scale) -- (Note: Available in Hive 0.13.0 and later)
-- | DATE -- (Note: Available in Hive 0.12.0 and later)
-- | VARCHAR -- (Note: Available in Hive 0.12.0 and later)
-- | CHAR -- (Note: Available in Hive 0.13.0 and later)
-- | ARRAY<data_type> (Note: negative values and non-constant expressions are allowed as of Hive 0.14.)
-- | MAP<primitive_type, data_type> (Note: negative values and non-constant expressions are allowed as of Hive 0.14.)
-- | STRUCT<col_name : data_type [COMMENT col_comment], ...>
-- | UNIONTYPE<data_type, data_type, ...> (Note: Only available starting with Hive 0.7.0.)
CREATE TABLE test_table (
viewTime INT,
userid BIGINT,
page_url STRING,
age TINYINT,
instance FLOAT,
isAduit BOOLEAN,
score DOUBLE,
aver DOUBLE PRECISION,
somename BINARY,
someid DECIMAL,
birth TIMESTAMP,
schooldt DATE,
someint SMALLINT,
colvarchar VARCHAR(1),
colchar CHAR(2),
coldecil DECIMAL(3,4),
list ARRAY <STRING>,
realmap MAP <STRING, DATE>,
realstruct STRUCT<col_name: STRING>,
collast UNIONTYPE <DOUBLE, STRING>
);

-- INTERVAL
UPDATE tablenames SET birthdt=INTERVAL '1-2' YEAR TO MONTH;

0 comments on commit 0a9a7d1

Please sign in to comment.