From 7afeb8b8078018a167e7bce6421024b95517b234 Mon Sep 17 00:00:00 2001 From: Asura7969 <1402357969@qq.com> Date: Wed, 8 Nov 2023 17:04:53 +0800 Subject: [PATCH 1/5] Minor: Improve the document format of JoinHashMap --- .../src/joins/hash_join_utils.rs | 115 ++++++++++-------- 1 file changed, 62 insertions(+), 53 deletions(-) diff --git a/datafusion/physical-plan/src/joins/hash_join_utils.rs b/datafusion/physical-plan/src/joins/hash_join_utils.rs index 3a2a85c72722..3ea0331ab4fe 100644 --- a/datafusion/physical-plan/src/joins/hash_join_utils.rs +++ b/datafusion/physical-plan/src/joins/hash_join_utils.rs @@ -40,59 +40,68 @@ use datafusion_physical_expr::{PhysicalExpr, PhysicalSortExpr}; use hashbrown::raw::RawTable; use hashbrown::HashSet; -// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value. -// By allocating a `HashMap` with capacity for *at least* the number of rows for entries at the build side, -// we make sure that we don't have to re-hash the hashmap, which needs access to the key (the hash in this case) value. -// E.g. 1 -> [3, 6, 8] indicates that the column values map to rows 3, 6 and 8 for hash value 1 -// As the key is a hash value, we need to check possible hash collisions in the probe stage -// During this stage it might be the case that a row is contained the same hashmap value, -// but the values don't match. Those are checked in the [equal_rows] macro -// The indices (values) are stored in a separate chained list stored in the `Vec`. -// The first value (+1) is stored in the hashmap, whereas the next value is stored in array at the position value. -// The chain can be followed until the value "0" has been reached, meaning the end of the list. -// Also see chapter 5.3 of [Balancing vectorized query execution with bandwidth-optimized storage](https://dare.uva.nl/search?identifier=5ccbb60a-38b8-4eeb-858a-e7735dd37487) -// See the example below: -// Insert (1,1) -// map: -// --------- -// | 1 | 2 | -// --------- -// next: -// --------------------- -// | 0 | 0 | 0 | 0 | 0 | -// --------------------- -// Insert (2,2) -// map: -// --------- -// | 1 | 2 | -// | 2 | 3 | -// --------- -// next: -// --------------------- -// | 0 | 0 | 0 | 0 | 0 | -// --------------------- -// Insert (1,3) -// map: -// --------- -// | 1 | 4 | -// | 2 | 3 | -// --------- -// next: -// --------------------- -// | 0 | 0 | 0 | 2 | 0 | <--- hash value 1 maps to 4,2 (which means indices values 3,1) -// --------------------- -// Insert (1,4) -// map: -// --------- -// | 1 | 5 | -// | 2 | 3 | -// --------- -// next: -// --------------------- -// | 0 | 0 | 0 | 2 | 4 | <--- hash value 1 maps to 5,4,2 (which means indices values 4,3,1) -// --------------------- -// TODO: speed up collision checks -// https://github.com/apache/arrow-datafusion/issues/50 +/// Maps a `u64` hash value based on the build side ["on" values] to a list of indices with this key's value. +/// +/// By allocating a `HashMap` with capacity for *at least* the number of rows for entries at the build side, +/// we make sure that we don't have to re-hash the hashmap, which needs access to the key (the hash in this case) value. +/// +/// E.g. 1 -> [3, 6, 8] indicates that the column values map to rows 3, 6 and 8 for hash value 1 +/// As the key is a hash value, we need to check possible hash collisions in the probe stage +/// During this stage it might be the case that a row is contained the same hashmap value, +/// but the values don't match. Those are checked in the [equal_rows] macro +/// The indices (values) are stored in a separate chained list stored in the `Vec`. +/// +/// The first value (+1) is stored in the hashmap, whereas the next value is stored in array at the position value. +/// +/// The chain can be followed until the value "0" has been reached, meaning the end of the list. +/// Also see chapter 5.3 of [Balancing vectorized query execution with bandwidth-optimized storage](https://dare.uva.nl/search?identifier=5ccbb60a-38b8-4eeb-858a-e7735dd37487) +/// +/// # Example +/// +/// ``` text +/// See the example below: +/// Insert (1,1) +/// map: +/// --------- +/// | 1 | 2 | +/// --------- +/// next: +/// --------------------- +/// | 0 | 0 | 0 | 0 | 0 | +/// --------------------- +/// Insert (2,2) +/// map: +/// --------- +/// | 1 | 2 | +/// | 2 | 3 | +/// --------- +/// next: +/// --------------------- +/// | 0 | 0 | 0 | 0 | 0 | +/// --------------------- +/// Insert (1,3) +/// map: +/// --------- +/// | 1 | 4 | +/// | 2 | 3 | +/// --------- +/// next: +/// --------------------- +/// | 0 | 0 | 0 | 2 | 0 | <--- hash value 1 maps to 4,2 (which means indices values 3,1) +/// --------------------- +/// Insert (1,4) +/// map: +/// --------- +/// | 1 | 5 | +/// | 2 | 3 | +/// --------- +/// next: +/// --------------------- +/// | 0 | 0 | 0 | 2 | 4 | <--- hash value 1 maps to 5,4,2 (which means indices values 4,3,1) +/// --------------------- +/// ``` +/// +///TODO: [speed up collision checks](https://github.com/apache/arrow-datafusion/issues/50) pub struct JoinHashMap { // Stores hash value to last row index pub map: RawTable<(u64, u64)>, From 737d73c900ef6587b5c65c552005d6765878d78b Mon Sep 17 00:00:00 2001 From: asura7969 <1402357969@qq.com> Date: Fri, 17 Nov 2023 21:51:17 +0800 Subject: [PATCH 2/5] Port tests in csv_files.rs to sqllogictest --- .../common/src/file_options/csv_writer.rs | 7 +- .../src/datasource/listing_table_factory.rs | 2 + datafusion/core/tests/sql/csv_files.rs | 125 ------------------ datafusion/core/tests/sql/mod.rs | 1 - .../sqllogictest/test_files/csv_files.slt | 71 ++++++++++ 5 files changed, 79 insertions(+), 127 deletions(-) delete mode 100644 datafusion/core/tests/sql/csv_files.rs create mode 100644 datafusion/sqllogictest/test_files/csv_files.slt diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs index fef4a1d21b4b..35f2e62e7b1a 100644 --- a/datafusion/common/src/file_options/csv_writer.rs +++ b/datafusion/common/src/file_options/csv_writer.rs @@ -90,7 +90,12 @@ impl TryFrom<(&ConfigOptions, &StatementOptions)> for CsvWriterOptions { "Unable to convert CSV delimiter into u8".into(), ) })?) - }, + }, + "quote" | "escape" => { + // These two attributes are only available when reading csv files. + // To avoid error + builder + }, _ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for CSV format!"))) } } diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index f9a7ab04ce68..f1ca936a2dfc 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -73,6 +73,8 @@ impl TableProviderFactory for ListingTableFactory { CsvFormat::default() .with_has_header(cmd.has_header) .with_delimiter(cmd.delimiter as u8) + .with_quote(cmd.options.get("quote").map_or(b'"', |x| x.as_bytes()[0])) + .with_escape(cmd.options.get("escape").map_or(None, |x| Some(x.as_bytes()[0]))) .with_file_compression_type(file_compression_type), ), #[cfg(feature = "parquet")] diff --git a/datafusion/core/tests/sql/csv_files.rs b/datafusion/core/tests/sql/csv_files.rs deleted file mode 100644 index 5ed0068d6135..000000000000 --- a/datafusion/core/tests/sql/csv_files.rs +++ /dev/null @@ -1,125 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -use super::*; - -#[tokio::test] -async fn csv_custom_quote() -> Result<()> { - let tmp_dir = TempDir::new()?; - let ctx = SessionContext::new(); - - let schema = Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Utf8, false), - ])); - let filename = format!("partition.{}", "csv"); - let file_path = tmp_dir.path().join(filename); - let mut file = File::create(file_path)?; - - // generate some data - for index in 0..10 { - let text1 = format!("id{index:}"); - let text2 = format!("value{index:}"); - let data = format!("~{text1}~,~{text2}~\r\n"); - file.write_all(data.as_bytes())?; - } - ctx.register_csv( - "test", - tmp_dir.path().to_str().unwrap(), - CsvReadOptions::new() - .schema(&schema) - .has_header(false) - .quote(b'~'), - ) - .await?; - - let results = plan_and_collect(&ctx, "SELECT * from test").await?; - - let expected = vec![ - "+-----+--------+", - "| c1 | c2 |", - "+-----+--------+", - "| id0 | value0 |", - "| id1 | value1 |", - "| id2 | value2 |", - "| id3 | value3 |", - "| id4 | value4 |", - "| id5 | value5 |", - "| id6 | value6 |", - "| id7 | value7 |", - "| id8 | value8 |", - "| id9 | value9 |", - "+-----+--------+", - ]; - - assert_batches_sorted_eq!(expected, &results); - Ok(()) -} - -#[tokio::test] -async fn csv_custom_escape() -> Result<()> { - let tmp_dir = TempDir::new()?; - let ctx = SessionContext::new(); - - let schema = Arc::new(Schema::new(vec![ - Field::new("c1", DataType::Utf8, false), - Field::new("c2", DataType::Utf8, false), - ])); - let filename = format!("partition.{}", "csv"); - let file_path = tmp_dir.path().join(filename); - let mut file = File::create(file_path)?; - - // generate some data - for index in 0..10 { - let text1 = format!("id{index:}"); - let text2 = format!("value\\\"{index:}"); - let data = format!("\"{text1}\",\"{text2}\"\r\n"); - file.write_all(data.as_bytes())?; - } - - ctx.register_csv( - "test", - tmp_dir.path().to_str().unwrap(), - CsvReadOptions::new() - .schema(&schema) - .has_header(false) - .escape(b'\\'), - ) - .await?; - - let results = plan_and_collect(&ctx, "SELECT * from test").await?; - - let expected = vec![ - "+-----+---------+", - "| c1 | c2 |", - "+-----+---------+", - "| id0 | value\"0 |", - "| id1 | value\"1 |", - "| id2 | value\"2 |", - "| id3 | value\"3 |", - "| id4 | value\"4 |", - "| id5 | value\"5 |", - "| id6 | value\"6 |", - "| id7 | value\"7 |", - "| id8 | value\"8 |", - "| id9 | value\"9 |", - "+-----+---------+", - ]; - - assert_batches_sorted_eq!(expected, &results); - Ok(()) -} diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index 4bd42c4688df..1469977b0ab7 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -74,7 +74,6 @@ macro_rules! test_expression { pub mod aggregates; pub mod create_drop; -pub mod csv_files; pub mod describe; pub mod displayable; pub mod explain_analyze; diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt new file mode 100644 index 000000000000..3c35d4b9e915 --- /dev/null +++ b/datafusion/sqllogictest/test_files/csv_files.slt @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +############# +## Csv Files Tests +############# + +statement ok +CREATE EXTERNAL TABLE csv_custom_quote ( + c1 VARCHAR DEFAULT NULL, + c2 VARCHAR DEFAULT NULL +) +STORED AS CSV +WITH HEADER ROW +DELIMITER ',' +OPTIONS ('quote' '~') +LOCATION '../../testing/data/csv/csv_custom_quote.csv'; + +statement ok +CREATE EXTERNAL TABLE csv_custom_escape ( + c1 VARCHAR DEFAULT NULL, + c2 VARCHAR DEFAULT NULL +) +STORED AS CSV +WITH HEADER ROW +DELIMITER ',' +OPTIONS ('escape' '\') +LOCATION '../../testing/data/csv/csv_custom_escape.csv'; + +query TT +SELECT * from csv_custom_quote; +---- +id0 value0 +id1 value1 +id2 value2 +id3 value3 +id4 value4 +id5 value5 +id6 value6 +id7 value7 +id8 value8 +id9 value9 + + +query TT +SELECT * from csv_custom_escape; +---- +id0 value"0 +id1 value"1 +id2 value"2 +id3 value"3 +id4 value"4 +id5 value"5 +id6 value"6 +id7 value"7 +id8 value"8 +id9 value"9 From 34922662de91d4d684fa72f2b838319f488fc88b Mon Sep 17 00:00:00 2001 From: asura7969 <1402357969@qq.com> Date: Fri, 17 Nov 2023 23:15:34 +0800 Subject: [PATCH 3/5] fix: fmt --- datafusion/core/src/datasource/listing_table_factory.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index f1ca936a2dfc..7f18f7d2d7ae 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -73,8 +73,10 @@ impl TableProviderFactory for ListingTableFactory { CsvFormat::default() .with_has_header(cmd.has_header) .with_delimiter(cmd.delimiter as u8) - .with_quote(cmd.options.get("quote").map_or(b'"', |x| x.as_bytes()[0])) - .with_escape(cmd.options.get("escape").map_or(None, |x| Some(x.as_bytes()[0]))) + .with_quote( + cmd.options.get("quote").map_or(b'"', |x| x.as_bytes()[0]), + ) + .with_escape(cmd.options.get("escape").map(|x| x.as_bytes()[0])) .with_file_compression_type(file_compression_type), ), #[cfg(feature = "parquet")] From 3a6e362df092dea3939dc7fb9b9878d259f8b08a Mon Sep 17 00:00:00 2001 From: asura7969 <1402357969@qq.com> Date: Tue, 21 Nov 2023 23:13:29 +0800 Subject: [PATCH 4/5] fix: test file --- datafusion/core/tests/data/custom_escape.csv | 11 +++++++++++ datafusion/core/tests/data/custom_quote.csv | 11 +++++++++++ datafusion/core/tests/sql/mod.rs | 1 - datafusion/sqllogictest/test_files/csv_files.slt | 12 ++++++------ 4 files changed, 28 insertions(+), 7 deletions(-) create mode 100644 datafusion/core/tests/data/custom_escape.csv create mode 100644 datafusion/core/tests/data/custom_quote.csv diff --git a/datafusion/core/tests/data/custom_escape.csv b/datafusion/core/tests/data/custom_escape.csv new file mode 100644 index 000000000000..331a1e697329 --- /dev/null +++ b/datafusion/core/tests/data/custom_escape.csv @@ -0,0 +1,11 @@ +c1,c2 +"id0","value\"0" +"id1","value\"1" +"id2","value\"2" +"id3","value\"3" +"id4","value\"4" +"id5","value\"5" +"id6","value\"6" +"id7","value\"7" +"id8","value\"8" +"id9","value\"9" diff --git a/datafusion/core/tests/data/custom_quote.csv b/datafusion/core/tests/data/custom_quote.csv new file mode 100644 index 000000000000..d81488436409 --- /dev/null +++ b/datafusion/core/tests/data/custom_quote.csv @@ -0,0 +1,11 @@ +c1,c2 +~id0~,~value0~ +~id1~,~value1~ +~id2~,~value2~ +~id3~,~value3~ +~id4~,~value4~ +~id5~,~value5~ +~id6~,~value6~ +~id7~,~value7~ +~id8~,~value8~ +~id9~,~value9~ diff --git a/datafusion/core/tests/sql/mod.rs b/datafusion/core/tests/sql/mod.rs index d915f7b910b5..af3d46491726 100644 --- a/datafusion/core/tests/sql/mod.rs +++ b/datafusion/core/tests/sql/mod.rs @@ -74,7 +74,6 @@ macro_rules! test_expression { pub mod aggregates; pub mod create_drop; -pub mod displayable; pub mod explain_analyze; pub mod expr; pub mod group_by; diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt index 3c35d4b9e915..bd0fd727052e 100644 --- a/datafusion/sqllogictest/test_files/csv_files.slt +++ b/datafusion/sqllogictest/test_files/csv_files.slt @@ -20,7 +20,7 @@ ############# statement ok -CREATE EXTERNAL TABLE csv_custom_quote ( +CREATE EXTERNAL TABLE custom_quote ( c1 VARCHAR DEFAULT NULL, c2 VARCHAR DEFAULT NULL ) @@ -28,10 +28,10 @@ STORED AS CSV WITH HEADER ROW DELIMITER ',' OPTIONS ('quote' '~') -LOCATION '../../testing/data/csv/csv_custom_quote.csv'; +LOCATION '../core/tests/data/custom_quote.csv'; statement ok -CREATE EXTERNAL TABLE csv_custom_escape ( +CREATE EXTERNAL TABLE custom_escape ( c1 VARCHAR DEFAULT NULL, c2 VARCHAR DEFAULT NULL ) @@ -39,10 +39,10 @@ STORED AS CSV WITH HEADER ROW DELIMITER ',' OPTIONS ('escape' '\') -LOCATION '../../testing/data/csv/csv_custom_escape.csv'; +LOCATION '../core/tests/data/custom_escape.csv'; query TT -SELECT * from csv_custom_quote; +SELECT * from custom_quote; ---- id0 value0 id1 value1 @@ -57,7 +57,7 @@ id9 value9 query TT -SELECT * from csv_custom_escape; +SELECT * from custom_escape; ---- id0 value"0 id1 value"1 From 126b3c24983be950d1d937bab3270b22be69f6f6 Mon Sep 17 00:00:00 2001 From: Asura7969 <1402357969@qq.com> Date: Thu, 23 Nov 2023 15:47:24 +0800 Subject: [PATCH 5/5] fix: use enable_testdir --- .../common/src/file_options/csv_writer.rs | 5 -- .../src/datasource/listing_table_factory.rs | 4 -- datafusion/core/tests/data/custom_escape.csv | 11 ----- datafusion/core/tests/data/custom_quote.csv | 11 ----- datafusion/sqllogictest/src/test_context.rs | 36 ++++++++++++++ .../sqllogictest/test_files/csv_files.slt | 49 +++++-------------- 6 files changed, 48 insertions(+), 68 deletions(-) delete mode 100644 datafusion/core/tests/data/custom_escape.csv delete mode 100644 datafusion/core/tests/data/custom_quote.csv diff --git a/datafusion/common/src/file_options/csv_writer.rs b/datafusion/common/src/file_options/csv_writer.rs index 35f2e62e7b1a..868dc9bf4916 100644 --- a/datafusion/common/src/file_options/csv_writer.rs +++ b/datafusion/common/src/file_options/csv_writer.rs @@ -91,11 +91,6 @@ impl TryFrom<(&ConfigOptions, &StatementOptions)> for CsvWriterOptions { ) })?) }, - "quote" | "escape" => { - // These two attributes are only available when reading csv files. - // To avoid error - builder - }, _ => return Err(DataFusionError::Configuration(format!("Found unsupported option {option} with value {value} for CSV format!"))) } } diff --git a/datafusion/core/src/datasource/listing_table_factory.rs b/datafusion/core/src/datasource/listing_table_factory.rs index 38c9e492893c..543a3a83f7c5 100644 --- a/datafusion/core/src/datasource/listing_table_factory.rs +++ b/datafusion/core/src/datasource/listing_table_factory.rs @@ -71,10 +71,6 @@ impl TableProviderFactory for ListingTableFactory { CsvFormat::default() .with_has_header(cmd.has_header) .with_delimiter(cmd.delimiter as u8) - .with_quote( - cmd.options.get("quote").map_or(b'"', |x| x.as_bytes()[0]), - ) - .with_escape(cmd.options.get("escape").map(|x| x.as_bytes()[0])) .with_file_compression_type(file_compression_type), ), #[cfg(feature = "parquet")] diff --git a/datafusion/core/tests/data/custom_escape.csv b/datafusion/core/tests/data/custom_escape.csv deleted file mode 100644 index 331a1e697329..000000000000 --- a/datafusion/core/tests/data/custom_escape.csv +++ /dev/null @@ -1,11 +0,0 @@ -c1,c2 -"id0","value\"0" -"id1","value\"1" -"id2","value\"2" -"id3","value\"3" -"id4","value\"4" -"id5","value\"5" -"id6","value\"6" -"id7","value\"7" -"id8","value\"8" -"id9","value\"9" diff --git a/datafusion/core/tests/data/custom_quote.csv b/datafusion/core/tests/data/custom_quote.csv deleted file mode 100644 index d81488436409..000000000000 --- a/datafusion/core/tests/data/custom_quote.csv +++ /dev/null @@ -1,11 +0,0 @@ -c1,c2 -~id0~,~value0~ -~id1~,~value1~ -~id2~,~value2~ -~id3~,~value3~ -~id4~,~value4~ -~id5~,~value5~ -~id6~,~value6~ -~id7~,~value7~ -~id8~,~value8~ -~id9~,~value9~ diff --git a/datafusion/sqllogictest/src/test_context.rs b/datafusion/sqllogictest/src/test_context.rs index f5ab8f71aaaf..653f78b60629 100644 --- a/datafusion/sqllogictest/src/test_context.rs +++ b/datafusion/sqllogictest/src/test_context.rs @@ -104,6 +104,10 @@ impl TestContext { info!("Registering metadata table tables"); register_metadata_tables(test_ctx.session_ctx()).await; } + "csv_files.slt" => { + info!("Registering metadata table tables"); + register_csv_custom_tables(&mut test_ctx).await; + } _ => { info!("Using default SessionContext"); } @@ -219,6 +223,38 @@ pub async fn register_partition_table(test_ctx: &mut TestContext) { .unwrap(); } +pub async fn register_csv_custom_tables(test_ctx: &mut TestContext) { + test_ctx.enable_testdir(); + let schema = Arc::new(Schema::new(vec![ + Field::new("c1", DataType::Utf8, false), + Field::new("c2", DataType::Utf8, false), + ])); + let filename = format!("quote_escape.{}", "csv"); + let file_path = test_ctx.testdir_path().join(filename); + let mut file = File::create(file_path.clone()).unwrap(); + + // generate some data + for index in 0..10 { + let text1 = format!("id{index:}"); + let text2 = format!("value{index:}"); + let data = format!("~{text1}~,~{text2}~\r\n"); + file.write_all(data.as_bytes()).unwrap(); + } + test_ctx + .ctx + .register_csv( + "test_custom_quote_escape", + file_path.to_str().unwrap(), + CsvReadOptions::new() + .schema(&schema) + .has_header(false) + .quote(b'~') + .escape(b'\\'), + ) + .await + .unwrap(); +} + // registers a LOCAL TEMPORARY table. pub async fn register_temp_table(ctx: &SessionContext) { struct TestTable(TableType); diff --git a/datafusion/sqllogictest/test_files/csv_files.slt b/datafusion/sqllogictest/test_files/csv_files.slt index bd0fd727052e..d9e3f51f8544 100644 --- a/datafusion/sqllogictest/test_files/csv_files.slt +++ b/datafusion/sqllogictest/test_files/csv_files.slt @@ -19,30 +19,20 @@ ## Csv Files Tests ############# -statement ok -CREATE EXTERNAL TABLE custom_quote ( - c1 VARCHAR DEFAULT NULL, - c2 VARCHAR DEFAULT NULL -) -STORED AS CSV -WITH HEADER ROW -DELIMITER ',' -OPTIONS ('quote' '~') -LOCATION '../core/tests/data/custom_quote.csv'; - -statement ok -CREATE EXTERNAL TABLE custom_escape ( - c1 VARCHAR DEFAULT NULL, - c2 VARCHAR DEFAULT NULL -) -STORED AS CSV -WITH HEADER ROW -DELIMITER ',' -OPTIONS ('escape' '\') -LOCATION '../core/tests/data/custom_escape.csv'; +# TODO: https://github.com/apache/arrow-datafusion/issues/8310 +# statement ok +# CREATE EXTERNAL TABLE test_custom_quote_escape ( +# c1 VARCHAR DEFAULT NULL, +# c2 VARCHAR DEFAULT NULL +# ) +# STORED AS CSV +# WITH HEADER ROW +# DELIMITER ',' +# OPTIONS ('quote' '~', 'escape' '\') +# LOCATION 'test_custom_quote_escape.csv'; query TT -SELECT * from custom_quote; +SELECT * from test_custom_quote_escape; ---- id0 value0 id1 value1 @@ -54,18 +44,3 @@ id6 value6 id7 value7 id8 value8 id9 value9 - - -query TT -SELECT * from custom_escape; ----- -id0 value"0 -id1 value"1 -id2 value"2 -id3 value"3 -id4 value"4 -id5 value"5 -id6 value"6 -id7 value"7 -id8 value"8 -id9 value"9