Files
smallpond/generated/smallpond.logical.dataset.ParquetDataSet.html
2025-03-06 02:23:52 +08:00

748 lines
46 KiB
HTML
Raw Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>smallpond.logical.dataset.ParquetDataSet &#8212; smallpond documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="../_static/styles/theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="../_static/styles/bootstrap.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="../_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="../_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=5b4479735964841361fd" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd" />
<script src="../_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=5b4479735964841361fd"></script>
<script data-url_root="../" id="documentation_options" src="../_static/documentation_options.js?v=b3ba4146"></script>
<script src="../_static/doctools.js?v=888ff710"></script>
<script src="../_static/sphinx_highlight.js?v=4825356b"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'generated/smallpond.logical.dataset.ParquetDataSet';</script>
<link rel="index" title="Index" href="../genindex.html" />
<link rel="search" title="Search" href="../search.html" />
<link rel="next" title="smallpond.logical.dataset.CsvDataSet" href="smallpond.logical.dataset.CsvDataSet.html" />
<link rel="prev" title="smallpond.logical.dataset.FileSet" href="smallpond.logical.dataset.FileSet.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../index.html">
<p class="title logo__title">smallpond documentation</p>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../getstarted.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../internals.html">
Internals
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="../api.html">
API Reference
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/deepseek-ai/smallpond" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="../getstarted.html">
Getting Started
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="../internals.html">
Internals
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="../api.html">
API Reference
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/deepseek-ai/smallpond" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__start sidebar-primary__section">
<div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
aria-label="Section Navigation">
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
<div class="bd-toc-item navbar-nav"><ul class="nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/dataframe.html">DataFrame</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="smallpond.init.html">smallpond.init</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.from_items.html">smallpond.dataframe.Session.from_items</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.from_arrow.html">smallpond.dataframe.Session.from_arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.from_pandas.html">smallpond.dataframe.Session.from_pandas</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.read_csv.html">smallpond.dataframe.Session.read_csv</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.read_json.html">smallpond.dataframe.Session.read_json</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.read_parquet.html">smallpond.dataframe.Session.read_parquet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.repartition.html">smallpond.dataframe.DataFrame.repartition</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.partial_sql.html">smallpond.dataframe.Session.partial_sql</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.map.html">smallpond.dataframe.DataFrame.map</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.map_batches.html">smallpond.dataframe.DataFrame.map_batches</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.flat_map.html">smallpond.dataframe.DataFrame.flat_map</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.filter.html">smallpond.dataframe.DataFrame.filter</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.limit.html">smallpond.dataframe.DataFrame.limit</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.partial_sort.html">smallpond.dataframe.DataFrame.partial_sort</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.random_shuffle.html">smallpond.dataframe.DataFrame.random_shuffle</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.count.html">smallpond.dataframe.DataFrame.count</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.take.html">smallpond.dataframe.DataFrame.take</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.take_all.html">smallpond.dataframe.DataFrame.take_all</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.to_arrow.html">smallpond.dataframe.DataFrame.to_arrow</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.to_pandas.html">smallpond.dataframe.DataFrame.to_pandas</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.write_parquet.html">smallpond.dataframe.DataFrame.write_parquet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.write_parquet_lazy.html">smallpond.dataframe.DataFrame.write_parquet_lazy</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.compute.html">smallpond.dataframe.DataFrame.compute</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.is_computed.html">smallpond.dataframe.DataFrame.is_computed</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.DataFrame.recompute.html">smallpond.dataframe.DataFrame.recompute</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.dataframe.Session.wait.html">smallpond.dataframe.Session.wait</a></li>
</ul>
</li>
</ul>
<ul class="current nav bd-sidenav">
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../api/dataset.html">Dataset</a><input checked="" class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.DataSet.html">smallpond.logical.dataset.DataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.FileSet.html">smallpond.logical.dataset.FileSet</a></li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">smallpond.logical.dataset.ParquetDataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.CsvDataSet.html">smallpond.logical.dataset.CsvDataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.JsonDataSet.html">smallpond.logical.dataset.JsonDataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.ArrowTableDataSet.html">smallpond.logical.dataset.ArrowTableDataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.PandasDataSet.html">smallpond.logical.dataset.PandasDataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.PartitionedDataSet.html">smallpond.logical.dataset.PartitionedDataSet</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.dataset.SqlQueryDataSet.html">smallpond.logical.dataset.SqlQueryDataSet</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/nodes.html">Nodes</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.Context.html">smallpond.logical.node.Context</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.NodeId.html">smallpond.logical.node.NodeId</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.LogicalPlan.html">smallpond.logical.node.LogicalPlan</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.LogicalPlanVisitor.html">smallpond.logical.node.LogicalPlanVisitor</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.Node.html">smallpond.logical.node.Node</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.DataSetPartitionNode.html">smallpond.logical.node.DataSetPartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.ArrowBatchNode.html">smallpond.logical.node.ArrowBatchNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.ArrowComputeNode.html">smallpond.logical.node.ArrowComputeNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.ArrowStreamNode.html">smallpond.logical.node.ArrowStreamNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.ConsolidateNode.html">smallpond.logical.node.ConsolidateNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.DataSinkNode.html">smallpond.logical.node.DataSinkNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.DataSourceNode.html">smallpond.logical.node.DataSourceNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.EvenlyDistributedPartitionNode.html">smallpond.logical.node.EvenlyDistributedPartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.HashPartitionNode.html">smallpond.logical.node.HashPartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.LimitNode.html">smallpond.logical.node.LimitNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.LoadPartitionedDataSetNode.html">smallpond.logical.node.LoadPartitionedDataSetNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.PandasBatchNode.html">smallpond.logical.node.PandasBatchNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.PandasComputeNode.html">smallpond.logical.node.PandasComputeNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.PartitionNode.html">smallpond.logical.node.PartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.ProjectionNode.html">smallpond.logical.node.ProjectionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.PythonScriptNode.html">smallpond.logical.node.PythonScriptNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.RangePartitionNode.html">smallpond.logical.node.RangePartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.RepeatPartitionNode.html">smallpond.logical.node.RepeatPartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.RootNode.html">smallpond.logical.node.RootNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.ShuffleNode.html">smallpond.logical.node.ShuffleNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.SqlEngineNode.html">smallpond.logical.node.SqlEngineNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.UnionNode.html">smallpond.logical.node.UnionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.UserDefinedPartitionNode.html">smallpond.logical.node.UserDefinedPartitionNode</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.logical.node.UserPartitionedDataSourceNode.html">smallpond.logical.node.UserPartitionedDataSourceNode</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/tasks.html">Tasks</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.RuntimeContext.html">smallpond.execution.task.RuntimeContext</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.JobId.html">smallpond.execution.task.JobId</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.TaskId.html">smallpond.execution.task.TaskId</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.TaskRuntimeId.html">smallpond.execution.task.TaskRuntimeId</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PartitionInfo.html">smallpond.execution.task.PartitionInfo</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PerfStats.html">smallpond.execution.task.PerfStats</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.ExecutionPlan.html">smallpond.execution.task.ExecutionPlan</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.Task.html">smallpond.execution.task.Task</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.ArrowBatchTask.html">smallpond.execution.task.ArrowBatchTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.ArrowComputeTask.html">smallpond.execution.task.ArrowComputeTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.ArrowStreamTask.html">smallpond.execution.task.ArrowStreamTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.DataSinkTask.html">smallpond.execution.task.DataSinkTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.DataSourceTask.html">smallpond.execution.task.DataSourceTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.EvenlyDistributedPartitionProducerTask.html">smallpond.execution.task.EvenlyDistributedPartitionProducerTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.HashPartitionArrowTask.html">smallpond.execution.task.HashPartitionArrowTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.HashPartitionDuckDbTask.html">smallpond.execution.task.HashPartitionDuckDbTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.HashPartitionTask.html">smallpond.execution.task.HashPartitionTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.LoadPartitionedDataSetProducerTask.html">smallpond.execution.task.LoadPartitionedDataSetProducerTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.MergeDataSetsTask.html">smallpond.execution.task.MergeDataSetsTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PandasBatchTask.html">smallpond.execution.task.PandasBatchTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PandasComputeTask.html">smallpond.execution.task.PandasComputeTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PartitionConsumerTask.html">smallpond.execution.task.PartitionConsumerTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PartitionProducerTask.html">smallpond.execution.task.PartitionProducerTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.ProjectionTask.html">smallpond.execution.task.ProjectionTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.PythonScriptTask.html">smallpond.execution.task.PythonScriptTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.RangePartitionTask.html">smallpond.execution.task.RangePartitionTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.RepeatPartitionProducerTask.html">smallpond.execution.task.RepeatPartitionProducerTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.RootTask.html">smallpond.execution.task.RootTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.SplitDataSetTask.html">smallpond.execution.task.SplitDataSetTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.SqlEngineTask.html">smallpond.execution.task.SqlEngineTask</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.task.UserDefinedPartitionProducerTask.html">smallpond.execution.task.UserDefinedPartitionProducerTask</a></li>
</ul>
</li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../api/execution.html">Execution</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.driver.Driver.html">smallpond.execution.driver.Driver</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.manager.JobManager.html">smallpond.execution.manager.JobManager</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.scheduler.Scheduler.html">smallpond.execution.scheduler.Scheduler</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.execution.executor.Executor.html">smallpond.execution.executor.Executor</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.platform.Platform.html">smallpond.platform.Platform</a></li>
<li class="toctree-l2"><a class="reference internal" href="smallpond.platform.MPI.html">smallpond.platform.MPI</a></li>
</ul>
</li>
</ul>
</div>
</nav></div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../api.html" class="nav-link">API Reference</a></li>
<li class="breadcrumb-item"><a href="../api/dataset.html" class="nav-link">Dataset</a></li>
<li class="breadcrumb-item active" aria-current="page">smallpond.lo...</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<section id="smallpond-logical-dataset-parquetdataset">
<h1>smallpond.logical.dataset.ParquetDataSet<a class="headerlink" href="#smallpond-logical-dataset-parquetdataset" title="Permalink to this heading">#</a></h1>
<dl class="py class">
<dt class="sig sig-object py" id="smallpond.logical.dataset.ParquetDataSet">
<em class="property"><span class="pre">class</span><span class="w"> </span></em><span class="sig-prename descclassname"><span class="pre">smallpond.logical.dataset.</span></span><span class="sig-name descname"><span class="pre">ParquetDataSet</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">paths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">root_dir</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recursive</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">generated_columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">union_by_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span><a class="headerlink" href="#smallpond.logical.dataset.ParquetDataSet" title="Permalink to this definition">#</a></dt>
<dd><p>A set of parquet files.</p>
<dl class="py method">
<dt class="sig sig-object py" id="smallpond.logical.dataset.ParquetDataSet.__init__">
<span class="sig-name descname"><span class="pre">__init__</span></span><span class="sig-paren">(</span><em class="sig-param"><span class="n"><span class="pre">paths</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span></span></em>, <em class="sig-param"><span class="n"><span class="pre">root_dir</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">str</span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">''</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">recursive</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">generated_columns</span></span><span class="p"><span class="pre">:</span></span><span class="w"> </span><span class="n"><span class="pre">List</span><span class="p"><span class="pre">[</span></span><span class="pre">str</span><span class="p"><span class="pre">]</span></span><span class="w"> </span><span class="p"><span class="pre">|</span></span><span class="w"> </span><span class="pre">None</span></span><span class="w"> </span><span class="o"><span class="pre">=</span></span><span class="w"> </span><span class="default_value"><span class="pre">None</span></span></em>, <em class="sig-param"><span class="n"><span class="pre">union_by_name</span></span><span class="o"><span class="pre">=</span></span><span class="default_value"><span class="pre">False</span></span></em><span class="sig-paren">)</span> <span class="sig-return"><span class="sig-return-icon">&#x2192;</span> <span class="sig-return-typehint"><span class="pre">None</span></span></span><a class="headerlink" href="#smallpond.logical.dataset.ParquetDataSet.__init__" title="Permalink to this definition">#</a></dt>
<dd><p>Construct a dataset from a list of paths.</p>
<section id="parameters">
<h2>Parameters<a class="headerlink" href="#parameters" title="Permalink to this heading">#</a></h2>
<dl class="simple">
<dt>paths</dt><dd><p>A path or a list of paths or path patterns.
e.g. <cite>[data/100.parquet, /datasetA/*.parquet]</cite>.</p>
</dd>
<dt>root_dir, optional</dt><dd><p>Relative paths in <cite>paths</cite> would be resolved under <cite>root_dir</cite> if specified.</p>
</dd>
<dt>recursive, optional</dt><dd><p>Resolve path patterns recursively if true.</p>
</dd>
<dt>columns, optional</dt><dd><p>Only load the specified columns if not None.</p>
</dd>
<dt>union_by_name, optional</dt><dd><p>Unify the columns of different files by name (see <a class="reference external" href="https://duckdb.org/docs/data/multiple_files/combining_schemas#union-by-name">https://duckdb.org/docs/data/multiple_files/combining_schemas#union-by-name</a>).</p>
</dd>
</dl>
</section>
</dd></dl>
<p class="rubric">Methods</p>
<table class="autosummary longtable table autosummary">
<tbody>
<tr class="row-odd"><td><p><a class="reference internal" href="#smallpond.logical.dataset.ParquetDataSet.__init__" title="smallpond.logical.dataset.ParquetDataSet.__init__"><code class="xref py py-obj docutils literal notranslate"><span class="pre">__init__</span></code></a>(paths[, root_dir, recursive, ...])</p></td>
<td><p>Construct a dataset from a list of paths.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">create_from</span></code>(table, output_dir[, filename])</p></td>
<td><p></p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">load_partitioned_datasets</span></code>(npartition, ...[, ...])</p></td>
<td><p>Split the dataset into a list of partitioned datasets.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">log</span></code>([num_rows])</p></td>
<td><p>Log the dataset to the logger.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">merge</span></code>(datasets)</p></td>
<td><p>Merge multiple datasets into a single dataset.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">partition_by_files</span></code>(npartition[, random_shuffle])</p></td>
<td><p>Evenly split into <cite>npartition</cite> datasets by files.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">partition_by_rows</span></code>(npartition[, random_shuffle])</p></td>
<td><p>Evenly split the dataset into <cite>npartition</cite> partitions by rows.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">partition_by_size</span></code>(max_partition_size)</p></td>
<td><p>Split the dataset into multiple partitions so that each partition has at most <cite>max_partition_size</cite> bytes.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">remove_empty_files</span></code>()</p></td>
<td><p>Remove empty parquet files from the dataset.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">reset</span></code>([paths, root_dir, recursive])</p></td>
<td><p>NOTE: all row ranges will be cleared.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">sql_query_fragment</span></code>([filesystem, conn])</p></td>
<td><p>Return a sql fragment that represents the dataset.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_arrow_table</span></code>([max_workers, filesystem, conn])</p></td>
<td><p>Load the dataset to an arrow table.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_batch_reader</span></code>([batch_size, filesystem, conn])</p></td>
<td><p>Return an arrow record batch reader to read the dataset.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">to_pandas</span></code>()</p></td>
<td><p>Convert the dataset to a pandas dataframe.</p></td>
</tr>
</tbody>
</table>
<p class="rubric">Attributes</p>
<table class="autosummary longtable table autosummary">
<tbody>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">generated_columns</span></code></p></td>
<td><p>Generated columns of DuckDB <cite>read_parquet</cite> function.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">absolute_paths</span></code></p></td>
<td><p>An ordered list of absolute paths of the given file patterns.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">columns</span></code></p></td>
<td><p>The columns to load from the dataset files.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">empty</span></code></p></td>
<td><p>Whether the dataset is empty.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">estimated_data_size</span></code></p></td>
<td><p>Return the estimated data size in bytes.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">num_files</span></code></p></td>
<td><p>The number of files in the dataset.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">num_rows</span></code></p></td>
<td><p>The number of rows in the dataset.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">paths</span></code></p></td>
<td><p>The paths to the dataset files.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">recursive</span></code></p></td>
<td><p>Whether to resolve path patterns recursively.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">resolved_paths</span></code></p></td>
<td><p>An ordered list of absolute paths of files.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">resolved_row_ranges</span></code></p></td>
<td><p>Return row ranges for each parquet file.</p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">root_dir</span></code></p></td>
<td><p>The root directory of paths.</p></td>
</tr>
<tr class="row-odd"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">udfs</span></code></p></td>
<td><p></p></td>
</tr>
<tr class="row-even"><td><p><code class="xref py py-obj docutils literal notranslate"><span class="pre">union_by_name</span></code></p></td>
<td><p>Whether to unify the columns of different files by name.</p></td>
</tr>
</tbody>
</table>
</dd></dl>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="smallpond.logical.dataset.FileSet.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">smallpond.logical.dataset.FileSet</p>
</div>
</a>
<a class="right-next"
href="smallpond.logical.dataset.CsvDataSet.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">smallpond.logical.dataset.CsvDataSet</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#smallpond.logical.dataset.ParquetDataSet"><code class="docutils literal notranslate"><span class="pre">ParquetDataSet</span></code></a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#smallpond.logical.dataset.ParquetDataSet.__init__"><code class="docutils literal notranslate"><span class="pre">ParquetDataSet.__init__()</span></code></a></li>
</ul>
</li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection sourcelink">
<a href="../_sources/generated/smallpond.logical.dataset.ParquetDataSet.rst.txt">
<i class="fa-solid fa-file-lines"></i> Show Source
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="../_static/scripts/bootstrap.js?digest=5b4479735964841361fd"></script>
<script src="../_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2025, deepseek.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.1.2.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.14.4.
</p></div>
</div>
</div>
</footer>
</body>
</html>