Signed-off-by: Runji Wang <runji@deepseek.com>
This commit is contained in:
Runji Wang 2025-02-28 19:33:29 +08:00
parent 947af97bab
commit da5a1d2ffa
2 changed files with 9 additions and 2 deletions

View File

@ -19,6 +19,7 @@ authors = [
{ name = "Minghua Zhang" },
{ name = "Zhewen Hao" },
]
readme = "README.md"
urls = { Homepage = "https://github.com/deepseek-ai/smallpond" }
keywords = ["distributed query processing", "SQL", "parquet"]
requires-python = ">=3.8"

View File

@ -598,8 +598,13 @@ class DataFrame:
"""
if streaming:
def process_func(_runtime_ctx, readers: List[arrow.RecordBatchReader]) -> Iterator[arrow.Table]:
tables = map(lambda batch: arrow.Table.from_batches([batch]), readers[0])
def process_func(
_runtime_ctx, readers: List[arrow.RecordBatchReader]
) -> Iterator[arrow.Table]:
tables = map(
lambda batch: arrow.Table.from_batches([batch]), readers[0]
)
return func(tables)
plan = ArrowStreamNode(
@ -610,6 +615,7 @@ class DataFrame:
**kwargs,
)
else:
def process_func(_runtime_ctx, tables: List[arrow.Table]) -> arrow.Table:
return func(tables[0])