Skip to content

Commit 6cc7f96

Browse files
author
Jiayu Liu
committed
refactor sort exec stream and combine batches
1 parent 5c88450 commit 6cc7f96

4 files changed

Lines changed: 159 additions & 94 deletions

File tree

ballista/rust/core/src/serde/physical_plan/from_proto.rs

Lines changed: 15 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,6 @@
1717

1818
//! Serde code to convert from protocol buffers to Rust data structures.
1919
20-
use std::collections::HashMap;
21-
use std::convert::TryInto;
22-
use std::sync::Arc;
23-
2420
use crate::error::BallistaError;
2521
use crate::execution_plans::{ShuffleReaderExec, UnresolvedShuffleExec};
2622
use crate::serde::protobuf::repartition_exec_node::PartitionMethod;
@@ -45,7 +41,6 @@ use datafusion::physical_plan::planner::DefaultPhysicalPlanner;
4541
use datafusion::physical_plan::window_functions::{
4642
BuiltInWindowFunction, WindowFunction,
4743
};
48-
use datafusion::physical_plan::windows::create_window_expr;
4944
use datafusion::physical_plan::windows::WindowAggExec;
5045
use datafusion::physical_plan::{
5146
coalesce_batches::CoalesceBatchesExec,
@@ -67,6 +62,9 @@ use datafusion::prelude::CsvReadOptions;
6762
use log::debug;
6863
use protobuf::logical_expr_node::ExprType;
6964
use protobuf::physical_plan_node::PhysicalPlanType;
65+
use std::collections::HashMap;
66+
use std::convert::TryInto;
67+
use std::sync::Arc;
7068

7169
impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
7270
type Error = BallistaError;
@@ -211,6 +209,7 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
211209

212210
let catalog_list =
213211
Arc::new(MemoryCatalogList::new()) as Arc<dyn CatalogList>;
212+
214213
let ctx_state = ExecutionContextState {
215214
catalog_list,
216215
scalar_functions: Default::default(),
@@ -227,53 +226,19 @@ impl TryInto<Arc<dyn ExecutionPlan>> for &protobuf::PhysicalPlanNode {
227226
.map(|(expr, name)| expr.try_into().map(|expr| (expr, name.clone())))
228227
.collect::<Result<Vec<_>, _>>()?;
229228

230-
let mut physical_window_expr = vec![];
231-
232229
let df_planner = DefaultPhysicalPlanner::default();
233230

234-
for (expr, name) in &window_agg_expr {
235-
match expr {
236-
Expr::WindowFunction {
237-
fun,
238-
args,
239-
partition_by,
240-
order_by,
241-
window_frame,
242-
..
243-
} => {
244-
let arg = df_planner
245-
.create_physical_expr(
246-
&args[0],
247-
&physical_schema,
248-
&ctx_state,
249-
)
250-
.map_err(|e| {
251-
BallistaError::General(format!("{:?}", e))
252-
})?;
253-
if !partition_by.is_empty() {
254-
return Err(BallistaError::NotImplemented("Window function with partition by is not yet implemented".to_owned()));
255-
}
256-
if !order_by.is_empty() {
257-
return Err(BallistaError::NotImplemented("Window function with order by is not yet implemented".to_owned()));
258-
}
259-
if window_frame.is_some() {
260-
return Err(BallistaError::NotImplemented("Window function with window frame is not yet implemented".to_owned()));
261-
}
262-
let window_expr = create_window_expr(
263-
&fun,
264-
&[arg],
265-
&physical_schema,
266-
name.to_owned(),
267-
)?;
268-
physical_window_expr.push(window_expr);
269-
}
270-
_ => {
271-
return Err(BallistaError::General(
272-
"Invalid expression for WindowAggrExec".to_string(),
273-
));
274-
}
275-
}
276-
}
231+
let physical_window_expr = window_agg_expr
232+
.iter()
233+
.map(|(expr, name)| {
234+
df_planner.create_window_expr_with_name(
235+
expr,
236+
name.clone(),
237+
&physical_schema,
238+
&ctx_state,
239+
)
240+
})
241+
.collect::<Result<Vec<_>, _>>()?;
277242

278243
Ok(Arc::new(WindowAggExec::try_new(
279244
physical_window_expr,

datafusion/src/physical_plan/mod.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,9 @@ pub trait WindowExpr: Send + Sync + Debug {
470470
/// Functions which take a single input argument, such as `sum`, return a single [`Expr`],
471471
/// others (e.g. `cov`) return many.
472472
fn expressions(&self) -> Vec<Arc<dyn PhysicalExpr>>;
473+
474+
/// Get the sort key of this window function.
475+
fn sort_key(&self) -> &[Arc<dyn PhysicalExpr>];
473476
}
474477

475478
/// An accumulator represents a stateful object that lives throughout the evaluation of multiple rows and

datafusion/src/physical_plan/planner.rs

Lines changed: 64 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ use crate::physical_plan::{
4444
};
4545
use crate::prelude::JoinType;
4646
use crate::scalar::ScalarValue;
47+
use crate::sql::utils::generate_sort_key;
4748
use crate::variable::VarType;
4849
use crate::{
4950
error::{DataFusionError, Result},
@@ -143,7 +144,12 @@ impl DefaultPhysicalPlanner {
143144
LogicalPlan::Window {
144145
input, window_expr, ..
145146
} => {
146-
// Initially need to perform the aggregate and then merge the partitions
147+
if window_expr.is_empty() {
148+
return Err(DataFusionError::Internal(
149+
"Impossibly got empty window expression".to_owned(),
150+
));
151+
}
152+
147153
let input_exec = self.create_initial_plan(input, ctx_state)?;
148154
let input_schema = input_exec.schema();
149155

@@ -731,34 +737,59 @@ impl DefaultPhysicalPlanner {
731737
}
732738
}
733739

734-
/// Create a window expression from a logical expression
735-
pub fn create_window_expr(
740+
/// Create a window expression with a name
741+
pub fn create_window_expr_with_name(
736742
&self,
737743
e: &Expr,
738-
logical_input_schema: &DFSchema,
744+
name: String,
739745
physical_input_schema: &Schema,
740746
ctx_state: &ExecutionContextState,
741747
) -> Result<Arc<dyn WindowExpr>> {
742-
// unpack aliased logical expressions, e.g. "sum(col) over () as total"
743-
let (name, e) = match e {
744-
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
745-
_ => (e.name(logical_input_schema)?, e),
746-
};
747-
748748
match e {
749-
Expr::WindowFunction { fun, args, .. } => {
749+
Expr::WindowFunction {
750+
fun,
751+
args,
752+
partition_by,
753+
order_by,
754+
window_frame,
755+
} => {
750756
let args = args
751757
.iter()
752758
.map(|e| {
753759
self.create_physical_expr(e, physical_input_schema, ctx_state)
754760
})
755761
.collect::<Result<Vec<_>>>()?;
756-
// if !order_by.is_empty() {
757-
// return Err(DataFusionError::NotImplemented(
758-
// "Window function with order by is not yet implemented".to_owned(),
759-
// ));
760-
// }
761-
windows::create_window_expr(fun, &args, physical_input_schema, name)
762+
if !partition_by.is_empty() {
763+
return Err(DataFusionError::NotImplemented(
764+
"Window function with partition by is not yet implemented"
765+
.to_owned(),
766+
));
767+
}
768+
if !order_by.is_empty() {
769+
return Err(DataFusionError::NotImplemented(
770+
"Window function with order by is not yet implemented".to_owned(),
771+
));
772+
}
773+
if window_frame.is_some() {
774+
return Err(DataFusionError::NotImplemented(
775+
"Window function with window frame is not yet implemented"
776+
.to_owned(),
777+
));
778+
}
779+
let sort_key = generate_sort_key(partition_by, order_by)
780+
.iter()
781+
.map(|e| {
782+
self.create_physical_expr(e, physical_input_schema, ctx_state)
783+
})
784+
.collect::<Result<Vec<_>>>()?;
785+
let expr = windows::create_window_expr(
786+
fun,
787+
&args,
788+
&sort_key,
789+
physical_input_schema,
790+
name,
791+
)?;
792+
Ok(expr)
762793
}
763794
other => Err(DataFusionError::Internal(format!(
764795
"Invalid window expression '{:?}'",
@@ -767,6 +798,22 @@ impl DefaultPhysicalPlanner {
767798
}
768799
}
769800

801+
/// Create a window expression from a logical expression
802+
pub fn create_window_expr(
803+
&self,
804+
e: &Expr,
805+
logical_input_schema: &DFSchema,
806+
physical_input_schema: &Schema,
807+
ctx_state: &ExecutionContextState,
808+
) -> Result<Arc<dyn WindowExpr>> {
809+
// unpack aliased logical expressions, e.g. "sum(col) over () as total"
810+
let (name, e) = match e {
811+
Expr::Alias(sub_expr, alias) => (alias.clone(), sub_expr.as_ref()),
812+
_ => (e.name(logical_input_schema)?, e),
813+
};
814+
self.create_window_expr_with_name(e, name, physical_input_schema, ctx_state)
815+
}
816+
770817
/// Create an aggregate expression from a logical expression
771818
pub fn create_aggregate_expr(
772819
&self,

0 commit comments

Comments
 (0)