test: hive complete data types' check and unit tests (#160)

* feat: add Authorization SQL and update syntax file * test: hive complete data types' check and unit tests * feat: hive add syntax complete automaticlly * feat: update hive's syntax complete feat:update hive's syntax complete --------- Co-authored-by: zhaoge <>
DTStack · Oct 8, 2023 · 0a9a7d1 · 0a9a7d1
1 parent b8c47d0
commit 0a9a7d1
Show file tree

Hide file tree

Showing 7 changed files with 162 additions and 40 deletions.
diff --git a/src/parser/common/basic-parser-types.ts b/src/parser/common/basic-parser-types.ts
@@ -22,7 +22,15 @@ export enum SyntaxContextType {
     /** table name path, such as catalog.db.tb */
     TABLE = 'table',
     /** table name path will be created */
-    TABLE_CREATE = 'tableCreate'
+    TABLE_CREATE = 'tableCreate',
+    /** view name */
+    VIEW = 'view',
+    /** function name */
+    FUNCTION = 'function',
+    /** principal name */
+    PRINCIPAL = 'principal',
+    /** hint arg name */
+    HTNTARG = 'hintArg',
 }
 
 export interface WordRange {

diff --git a/src/parser/hive.ts b/src/parser/hive.ts
@@ -1,9 +1,10 @@
 import { Token } from 'antlr4ts';
 import { CandidatesCollection } from 'antlr4-c3';
 import { HiveSqlLexer } from '../lib/hive/HiveSqlLexer';
-import { HiveSqlParser, ProgramContext } from '../lib/hive/HiveSqlParser';
+import { HiveSqlParser, ProgramContext, StatementContext, ExplainStatementContext, ExecStatementContext } from '../lib/hive/HiveSqlParser';
 import BasicParser from './common/basicParser';
-import { Suggestions } from './common/basic-parser-types';
+import { HiveSqlParserListener } from '../lib/hive/HiveSqlParserListener';
+import { SyntaxContextType, Suggestions, SyntaxSuggestion } from './common/basic-parser-types';
 
 
 export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, HiveSqlParser> {
@@ -16,21 +17,93 @@ export default class HiveSQL extends BasicParser<HiveSqlLexer, ProgramContext, H
         return new HiveSqlParser(tokenStream);
     }
 
+    protected preferredRules: Set<number> = new Set([
+        HiveSqlParser.RULE_tableName, // table name
+        HiveSqlParser.RULE_viewName, // view name
+        HiveSqlParser.RULE_functionIdentifier, // function name
+        HiveSqlParser.RULE_principalIdentifier, // USER/ROLE/GROUP name
+        HiveSqlParser.RULE_hintArgName, // hint name
+    ]);
+
     protected get splitListener () {
-        return null as any;
+        return new HiveSqlSplitListener();
     }
 
-    protected preferredRules: Set<number> = new Set();
-
     protected processCandidates(
-        candidates: CandidatesCollection, 
-        allTokens: Token[], 
-        caretTokenIndex: number
+        candidates: CandidatesCollection,
+        allTokens: Token[],
+        caretTokenIndex: number,
+        tokenIndexOffset: number,
     ): Suggestions<Token> {
+        const originalSyntaxSuggestions: SyntaxSuggestion<Token>[] = [];
+        const keywords: string[] = [];
+
+        for (let candidate of candidates.rules) {
+            const [ruleType, candidateRule] = candidate;
+            const startTokenIndex = candidateRule.startTokenIndex + tokenIndexOffset;
+            const tokenRanges = allTokens.slice(startTokenIndex, caretTokenIndex + tokenIndexOffset + 1);
+
+            let syntaxContextType: SyntaxContextType;
+            switch (ruleType) {
+                case HiveSqlParser.RULE_tableName: {
+                    syntaxContextType = SyntaxContextType.TABLE;
+                    break;
+                }
+                case HiveSqlParser.RULE_viewName: {
+                    syntaxContextType = SyntaxContextType.VIEW;
+                    break;
+                }
+                case HiveSqlParser.RULE_functionIdentifier: {
+                    syntaxContextType = SyntaxContextType.FUNCTION;
+                    break;
+                }
+                case HiveSqlParser.RULE_principalIdentifier: {
+                    syntaxContextType = SyntaxContextType.PRINCIPAL;
+                    break;
+                }
+                case HiveSqlParser.RULE_hintArgName: {
+                    syntaxContextType = SyntaxContextType.HTNTARG;
+                    break;
+                }
+                default:
+                    break;
+            }
+
+            if (syntaxContextType) {
+                originalSyntaxSuggestions.push({
+                    syntaxContextType,
+                    wordRanges: tokenRanges,
+                });
+            }
+        }
+
+        for (let candidate of candidates.tokens) {
+            const symbolicName = this._parser.vocabulary.getSymbolicName(candidate[0]);
+            const displayName = this._parser.vocabulary.getDisplayName(candidate[0]);
+            if (symbolicName && symbolicName.startsWith('KW_')) {
+                const keyword = displayName.startsWith("'") && displayName.endsWith("'") ? displayName.slice(1, -1) : displayName;
+                keywords.push(keyword);
+            }
+        }
         return {
             syntax: [],
-            keywords: []
-        }
+            keywords: [],
+        };
+    }
+}
+
+export class HiveSqlSplitListener implements HiveSqlParserListener {
+    private _statementContext: StatementContext[] = [];
+
+    exitStatement = (ctx: StatementContext) => {
+        this._statementContext.push(ctx);
+    }
+
+    enterStatement = (ctx: StatementContext) => {
+    };
+
+    get statementsContext() {
+        return this._statementContext;
     }
 }
 
diff --git a/test/parser/hive/listener.test.ts b/test/parser/hive/listener.test.ts
@@ -6,8 +6,8 @@ import HiveSQL from '../../../src/parser/hive';
 describe('HiveSQL Listener Tests', () => {
     const parser = new HiveSQL();
     test('Listener enterSelectList', async () => {
-        const expectTableName = 'userName';
-        const sql = `select ${expectTableName} from user1 where inc_day='20190601' limit 1000;`;
+        const expectTableName = 'username';
+        const sql = `select ${expectTableName} from tablename where inc_day='20190601' limit 1000;`;
         const parserTree = parser.parse(sql);
 
         let result = '';

diff --git a/test/parser/hive/syntax.test.ts b/test/parser/hive/syntax.test.ts
diff --git a/test/parser/hive/syntax/dataTypesStatement.test.ts b/test/parser/hive/syntax/dataTypesStatement.test.ts
@@ -0,0 +1,16 @@
+import HiveSQL from '../../../../src/parser/hive';
+import { readSQL } from '../../../helper';
+
+const parser = new HiveSQL();
+
+const features = {
+    dataTypes: readSQL(__dirname, 'dataTypes.sql'),
+};
+
+describe('HiveSQL Check Data Types Tests', () => {
+    features.dataTypes.forEach((dataType) => {
+        it(dataType, () => {
+            expect(parser.validate(dataType).length).toBe(0);
+        });
+    });
+});
diff --git a/test/parser/hive/syntax/fixtures/createTable.sql b/test/parser/hive/syntax/fixtures/createTable.sql
@@ -28,31 +28,31 @@ CREATE TEMPORARY EXTERNAL TABLE page_view(
     page_url STRING,
     referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User'
-) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE;
+) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS TEXTFILE;
 
 CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
     viewTime INT,
     userid BIGINT,
     page_url STRING,
     referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User'
-) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE;
+) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS RCFILE;
 
 CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
     viewTime INT,
     userid BIGINT,
     page_url STRING,
     referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User'
-) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE LOCATION '/hsd_path';
+) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS ORC LOCATION '/hsd_path';
 
 CREATE TEMPORARY EXTERNAL TABLE IF NOT EXISTS page_view(
     viewTime INT,
     userid BIGINT,
     page_url STRING,
     referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User'
-) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS SEQUENCEFILE LOCATION '/hsd_path' AS
+) COMMENT 'This is the page view table' PARTITIONED BY(dt STRING, country STRING) CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS ROW FORMAT DELIMITED FIELDS TERMINATED BY '\001' COLLECTION ITEMS TERMINATED BY '\002' MAP KEYS TERMINATED BY '\003' STORED AS PARQUET LOCATION '/hsd_path' AS
 SELECT
     (key % 1024) new_key,
     concat(key, value) key_value_pair
@@ -62,9 +62,9 @@ FROM
 
 
 CREATE TABLE list_bucket_single (key STRING, value STRING)
-  SKEWED BY (key) ON (1,5,6) STORED AS DIRECTORIES;
+  SKEWED BY (key) ON (1,5,6) STORED AS AVRO;
 
-CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS ORC;
+CREATE TRANSACTIONAL TABLE transactional_table_test(key STRING, value STRING) PARTITIONED BY(ds STRING) STORED AS INPUTFORMAT 'inputfilename' OUTPUTFORMAT 'outputfilename';
 
 CREATE TABLE IF NOT EXISTS copy_table LIKE origin_table;
 
@@ -104,4 +104,4 @@ CREATE MANAGED TABLE managed_table (
     name STRING COMMENT '名称'
 ) COMMENT '测试分桶' CLUSTERED BY(id) SORTED BY (id) INTO 4 BUCKETS STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler';
 
-CREATE TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS DIRECTORIES;
+CREATE TABLE list_bucket_multiple (col1 STRING, col2 INT, col3 STRING) SKEWED BY (col1, col2) ON (('s1', 1), ('s3', 3), ('s13', 13), ('s78', 78)) STORED AS JSONFILE;
diff --git a/test/parser/hive/syntax/fixtures/dataTypes.sql b/test/parser/hive/syntax/fixtures/dataTypes.sql
@@ -0,0 +1,45 @@
+--   TINYINT
+--   | SMALLINT
+--   | INT
+--   | BIGINT
+--   | BOOLEAN
+--   | FLOAT
+--   | DOUBLE
+--   | DOUBLE PRECISION -- (Note: Available in Hive 2.2.0 and later)
+--   | STRING
+--   | BINARY      -- (Note: Available in Hive 0.8.0 and later)
+--   | TIMESTAMP   -- (Note: Available in Hive 0.8.0 and later)
+--   | DECIMAL     -- (Note: Available in Hive 0.11.0 and later)
+--   | DECIMAL(precision, scale)  -- (Note: Available in Hive 0.13.0 and later)
+--   | DATE        -- (Note: Available in Hive 0.12.0 and later)
+--   | VARCHAR     -- (Note: Available in Hive 0.12.0 and later)
+--   | CHAR        -- (Note: Available in Hive 0.13.0 and later)
+--   | ARRAY<data_type> (Note: negative values and non-constant expressions are allowed as of Hive 0.14.)
+--   | MAP<primitive_type, data_type> (Note: negative values and non-constant expressions are allowed as of Hive 0.14.)
+--   | STRUCT<col_name : data_type [COMMENT col_comment], ...>
+--   | UNIONTYPE<data_type, data_type, ...> (Note: Only available starting with Hive 0.7.0.)
+CREATE TABLE test_table (
+    viewTime INT,
+    userid BIGINT,
+    page_url STRING,
+    age TINYINT,
+    instance FLOAT,
+    isAduit BOOLEAN,
+    score DOUBLE,
+    aver DOUBLE PRECISION,
+    somename BINARY,
+    someid DECIMAL,
+    birth TIMESTAMP,
+    schooldt DATE,
+    someint SMALLINT,
+    colvarchar VARCHAR(1),
+    colchar CHAR(2),
+    coldecil DECIMAL(3,4),
+    list ARRAY <STRING>,
+    realmap MAP <STRING, DATE>,
+    realstruct STRUCT<col_name: STRING>,
+    collast UNIONTYPE <DOUBLE, STRING>
+);
+
+-- INTERVAL
+UPDATE tablenames SET birthdt=INTERVAL '1-2' YEAR TO MONTH;