@@ -51,7 +51,7 @@ use crate::physical_plan::PyExecutionPlan;
5151use crate :: record_batch:: PyRecordBatchStream ;
5252use crate :: sql:: logical:: PyLogicalPlan ;
5353use crate :: utils:: {
54- get_tokio_runtime, py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
54+ get_tokio_runtime, is_ipython_env , py_obj_to_scalar_value, validate_pycapsule, wait_for_future,
5555} ;
5656use crate :: {
5757 errors:: PyDataFusionResult ,
@@ -289,21 +289,33 @@ impl PyParquetColumnOptions {
289289#[ derive( Clone ) ]
290290pub struct PyDataFrame {
291291 df : Arc < DataFrame > ,
292+
293+ // In IPython environment cache batches between __repr__ and _repr_html_ calls.
294+ batches : Option < ( Vec < RecordBatch > , bool ) > ,
292295}
293296
294297impl PyDataFrame {
295298 /// creates a new PyDataFrame
296299 pub fn new ( df : DataFrame ) -> Self {
297- Self { df : Arc :: new ( df) }
300+ Self {
301+ df : Arc :: new ( df) ,
302+ batches : None ,
303+ }
298304 }
299305
300- fn prepare_repr_string ( & self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
306+ fn prepare_repr_string ( & mut self , py : Python , as_html : bool ) -> PyDataFusionResult < String > {
301307 // Get the Python formatter and config
302308 let PythonFormatter { formatter, config } = get_python_formatter_with_config ( py) ?;
303- let ( batches, has_more) = wait_for_future (
304- py,
305- collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
306- ) ??;
309+
310+ let should_cache = * is_ipython_env ( py) && self . batches . is_none ( ) ;
311+ let ( batches, has_more) = match self . batches . take ( ) {
312+ Some ( b) => b,
313+ None => wait_for_future (
314+ py,
315+ collect_record_batches_to_display ( self . df . as_ref ( ) . clone ( ) , config) ,
316+ ) ??,
317+ } ;
318+
307319 if batches. is_empty ( ) {
308320 // This should not be reached, but do it for safety since we index into the vector below
309321 return Ok ( "No data to display" . to_string ( ) ) ;
@@ -313,7 +325,7 @@ impl PyDataFrame {
313325
314326 // Convert record batches to PyObject list
315327 let py_batches = batches
316- . into_iter ( )
328+ . iter ( )
317329 . map ( |rb| rb. to_pyarrow ( py) )
318330 . collect :: < PyResult < Vec < PyObject > > > ( ) ?;
319331
@@ -334,6 +346,10 @@ impl PyDataFrame {
334346 let html_result = formatter. call_method ( method_name, ( ) , Some ( & kwargs) ) ?;
335347 let html_str: String = html_result. extract ( ) ?;
336348
349+ if should_cache {
350+ self . batches = Some ( ( batches, has_more) ) ;
351+ }
352+
337353 Ok ( html_str)
338354 }
339355}
@@ -361,7 +377,7 @@ impl PyDataFrame {
361377 }
362378 }
363379
364- fn __repr__ ( & self , py : Python ) -> PyDataFusionResult < String > {
380+ fn __repr__ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
365381 self . prepare_repr_string ( py, false )
366382 }
367383
@@ -396,7 +412,7 @@ impl PyDataFrame {
396412 Ok ( format ! ( "DataFrame()\n {batches_as_displ}{additional_str}" ) )
397413 }
398414
399- fn _repr_html_ ( & self , py : Python ) -> PyDataFusionResult < String > {
415+ fn _repr_html_ ( & mut self , py : Python ) -> PyDataFusionResult < String > {
400416 self . prepare_repr_string ( py, true )
401417 }
402418
0 commit comments