1818//! CSV format abstractions
1919
2020use std:: any:: Any ;
21+
2122use std:: sync:: Arc ;
2223
2324use arrow:: datatypes:: Schema ;
2425use arrow:: { self , datatypes:: SchemaRef } ;
2526use async_trait:: async_trait;
27+ use bytes:: Buf ;
28+
2629use datafusion_common:: DataFusionError ;
30+
2731use futures:: TryFutureExt ;
2832use object_store:: { ObjectMeta , ObjectStore } ;
2933
3034use super :: FileFormat ;
35+ use crate :: datasource:: file_format:: file_type:: FileCompressionType ;
3136use crate :: datasource:: file_format:: DEFAULT_SCHEMA_INFER_MAX_RECORD ;
3237use crate :: error:: Result ;
3338use crate :: logical_plan:: Expr ;
@@ -43,6 +48,7 @@ pub struct CsvFormat {
4348 has_header : bool ,
4449 delimiter : u8 ,
4550 schema_infer_max_rec : Option < usize > ,
51+ file_compression_type : FileCompressionType ,
4652}
4753
4854impl Default for CsvFormat {
@@ -51,6 +57,7 @@ impl Default for CsvFormat {
5157 schema_infer_max_rec : Some ( DEFAULT_SCHEMA_INFER_MAX_RECORD ) ,
5258 has_header : true ,
5359 delimiter : b',' ,
60+ file_compression_type : FileCompressionType :: UNCOMPRESSED ,
5461 }
5562 }
5663}
@@ -82,6 +89,16 @@ impl CsvFormat {
8289 self
8390 }
8491
92+ /// Set a `FileCompressionType` of CSV
93+ /// - defaults to `FileCompressionType::UNCOMPRESSED`
94+ pub fn with_file_compression_type (
95+ mut self ,
96+ file_compression_type : FileCompressionType ,
97+ ) -> Self {
98+ self . file_compression_type = file_compression_type;
99+ self
100+ }
101+
85102 /// The delimiter character.
86103 pub fn delimiter ( & self ) -> u8 {
87104 self . delimiter
@@ -110,8 +127,9 @@ impl FileFormat for CsvFormat {
110127 . await
111128 . map_err ( |e| DataFusionError :: External ( Box :: new ( e) ) ) ?;
112129
130+ let decoder = self . file_compression_type . convert_read ( data. reader ( ) ) ;
113131 let ( schema, records_read) = arrow:: csv:: reader:: infer_reader_schema (
114- & mut data . as_ref ( ) ,
132+ decoder ,
115133 self . delimiter ,
116134 Some ( records_to_read) ,
117135 self . has_header ,
@@ -144,7 +162,12 @@ impl FileFormat for CsvFormat {
144162 conf : FileScanConfig ,
145163 _filters : & [ Expr ] ,
146164 ) -> Result < Arc < dyn ExecutionPlan > > {
147- let exec = CsvExec :: new ( conf, self . has_header , self . delimiter ) ;
165+ let exec = CsvExec :: new (
166+ conf,
167+ self . has_header ,
168+ self . delimiter ,
169+ self . file_compression_type . to_owned ( ) ,
170+ ) ;
148171 Ok ( Arc :: new ( exec) )
149172 }
150173}
0 commit comments