mirror of
https://github.com/deepseek-ai/smallpond
synced 2025-06-26 18:27:45 +00:00
713 lines
43 KiB
HTML
713 lines
43 KiB
HTML
|
|
<!DOCTYPE html>
|
|
|
|
|
|
<html lang="en" data-content_root="" >
|
|
|
|
<head>
|
|
<meta charset="utf-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
|
|
|
|
<title>API Reference — smallpond documentation</title>
|
|
|
|
|
|
|
|
<script data-cfasync="false">
|
|
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
|
|
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
|
|
</script>
|
|
|
|
<!-- Loaded before other Sphinx assets -->
|
|
<link href="_static/styles/theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
|
|
<link href="_static/styles/bootstrap.css?digest=5b4479735964841361fd" rel="stylesheet" />
|
|
<link href="_static/styles/pydata-sphinx-theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
|
|
|
|
|
|
<link href="_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=5b4479735964841361fd" rel="stylesheet" />
|
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
|
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
|
|
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
|
|
|
|
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=8f2a1f02" />
|
|
|
|
<!-- Pre-loaded scripts that we'll load fully later -->
|
|
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=5b4479735964841361fd" />
|
|
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd" />
|
|
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=5b4479735964841361fd"></script>
|
|
|
|
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js?v=b3ba4146"></script>
|
|
<script src="_static/doctools.js?v=888ff710"></script>
|
|
<script src="_static/sphinx_highlight.js?v=4825356b"></script>
|
|
<script>DOCUMENTATION_OPTIONS.pagename = 'api';</script>
|
|
<link rel="index" title="Index" href="genindex.html" />
|
|
<link rel="search" title="Search" href="search.html" />
|
|
<link rel="next" title="DataFrame" href="api/dataframe.html" />
|
|
<link rel="prev" title="Internals" href="internals.html" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
|
<meta name="docsearch:language" content="en"/>
|
|
</head>
|
|
|
|
|
|
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
|
|
|
|
|
|
|
|
<a class="skip-link" href="#main-content">Skip to main content</a>
|
|
|
|
<div id="pst-scroll-pixel-helper"></div>
|
|
|
|
|
|
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
|
|
<i class="fa-solid fa-arrow-up"></i>
|
|
Back to top
|
|
</button>
|
|
|
|
|
|
<input type="checkbox"
|
|
class="sidebar-toggle"
|
|
name="__primary"
|
|
id="__primary"/>
|
|
<label class="overlay overlay-primary" for="__primary"></label>
|
|
|
|
<input type="checkbox"
|
|
class="sidebar-toggle"
|
|
name="__secondary"
|
|
id="__secondary"/>
|
|
<label class="overlay overlay-secondary" for="__secondary"></label>
|
|
|
|
<div class="search-button__wrapper">
|
|
<div class="search-button__overlay"></div>
|
|
<div class="search-button__search-container">
|
|
<form class="bd-search d-flex align-items-center"
|
|
action="search.html"
|
|
method="get">
|
|
<i class="fa-solid fa-magnifying-glass"></i>
|
|
<input type="search"
|
|
class="form-control"
|
|
name="q"
|
|
id="search-input"
|
|
placeholder="Search the docs ..."
|
|
aria-label="Search the docs ..."
|
|
autocomplete="off"
|
|
autocorrect="off"
|
|
autocapitalize="off"
|
|
spellcheck="false"/>
|
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
|
|
</form></div>
|
|
</div>
|
|
|
|
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
|
|
<div class="bd-header__inner bd-page-width">
|
|
<label class="sidebar-toggle primary-toggle" for="__primary">
|
|
<span class="fa-solid fa-bars"></span>
|
|
</label>
|
|
|
|
|
|
<div class="col-lg-3 navbar-header-items__start">
|
|
|
|
<div class="navbar-item">
|
|
|
|
|
|
|
|
<a class="navbar-brand logo" href="index.html">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<p class="title logo__title">smallpond documentation</p>
|
|
|
|
</a></div>
|
|
|
|
</div>
|
|
|
|
<div class="col-lg-9 navbar-header-items">
|
|
|
|
<div class="me-auto navbar-header-items__center">
|
|
|
|
<div class="navbar-item">
|
|
<nav class="navbar-nav">
|
|
<p class="sidebar-header-items__title"
|
|
role="heading"
|
|
aria-level="1"
|
|
aria-label="Site Navigation">
|
|
Site Navigation
|
|
</p>
|
|
<ul class="bd-navbar-elements navbar-nav">
|
|
|
|
<li class="nav-item">
|
|
<a class="nav-link nav-internal" href="getstarted.html">
|
|
Getting Started
|
|
</a>
|
|
</li>
|
|
|
|
|
|
<li class="nav-item">
|
|
<a class="nav-link nav-internal" href="internals.html">
|
|
Internals
|
|
</a>
|
|
</li>
|
|
|
|
|
|
<li class="nav-item current active">
|
|
<a class="nav-link nav-internal" href="#">
|
|
API Reference
|
|
</a>
|
|
</li>
|
|
|
|
</ul>
|
|
</nav></div>
|
|
|
|
</div>
|
|
|
|
|
|
<div class="navbar-header-items__end">
|
|
|
|
<div class="navbar-item navbar-persistent--container">
|
|
|
|
|
|
<script>
|
|
document.write(`
|
|
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|
<i class="fa-solid fa-magnifying-glass"></i>
|
|
<span class="search-button__default-text">Search</span>
|
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
|
</button>
|
|
`);
|
|
</script>
|
|
</div>
|
|
|
|
|
|
<div class="navbar-item">
|
|
|
|
<script>
|
|
document.write(`
|
|
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
|
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
|
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
|
</button>
|
|
`);
|
|
</script></div>
|
|
|
|
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
|
|
aria-label="Icon Links">
|
|
<li class="nav-item">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://github.com/deepseek-ai/smallpond" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
|
|
<span class="sr-only">GitHub</span></a>
|
|
</li>
|
|
</ul></div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
|
|
<div class="navbar-persistent--mobile">
|
|
|
|
<script>
|
|
document.write(`
|
|
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|
<i class="fa-solid fa-magnifying-glass"></i>
|
|
<span class="search-button__default-text">Search</span>
|
|
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
|
|
</button>
|
|
`);
|
|
</script>
|
|
</div>
|
|
|
|
|
|
|
|
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
|
|
<span class="fa-solid fa-outdent"></span>
|
|
</label>
|
|
|
|
</div>
|
|
|
|
</nav>
|
|
|
|
<div class="bd-container">
|
|
<div class="bd-container__inner bd-page-width">
|
|
|
|
<div class="bd-sidebar-primary bd-sidebar">
|
|
|
|
|
|
|
|
<div class="sidebar-header-items sidebar-primary__section">
|
|
|
|
|
|
<div class="sidebar-header-items__center">
|
|
|
|
<div class="navbar-item">
|
|
<nav class="navbar-nav">
|
|
<p class="sidebar-header-items__title"
|
|
role="heading"
|
|
aria-level="1"
|
|
aria-label="Site Navigation">
|
|
Site Navigation
|
|
</p>
|
|
<ul class="bd-navbar-elements navbar-nav">
|
|
|
|
<li class="nav-item">
|
|
<a class="nav-link nav-internal" href="getstarted.html">
|
|
Getting Started
|
|
</a>
|
|
</li>
|
|
|
|
|
|
<li class="nav-item">
|
|
<a class="nav-link nav-internal" href="internals.html">
|
|
Internals
|
|
</a>
|
|
</li>
|
|
|
|
|
|
<li class="nav-item current active">
|
|
<a class="nav-link nav-internal" href="#">
|
|
API Reference
|
|
</a>
|
|
</li>
|
|
|
|
</ul>
|
|
</nav></div>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
<div class="sidebar-header-items__end">
|
|
|
|
<div class="navbar-item">
|
|
|
|
<script>
|
|
document.write(`
|
|
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
|
|
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
|
|
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
|
|
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
|
|
</button>
|
|
`);
|
|
</script></div>
|
|
|
|
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
|
|
aria-label="Icon Links">
|
|
<li class="nav-item">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
<a href="https://github.com/deepseek-ai/smallpond" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
|
|
<span class="sr-only">GitHub</span></a>
|
|
</li>
|
|
</ul></div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
<div class="sidebar-primary-items__start sidebar-primary__section">
|
|
<div class="sidebar-primary-item">
|
|
<nav class="bd-docs-nav bd-links"
|
|
aria-label="Section Navigation">
|
|
<p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
|
|
<div class="bd-toc-item navbar-nav"><ul class="nav bd-sidenav">
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="api/dataframe.html">DataFrame</a><input class="toctree-checkbox" id="toctree-checkbox-1" name="toctree-checkbox-1" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-1"><i class="fa-solid fa-chevron-down"></i></label><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.init.html">smallpond.init</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.from_items.html">smallpond.dataframe.Session.from_items</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.from_arrow.html">smallpond.dataframe.Session.from_arrow</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.from_pandas.html">smallpond.dataframe.Session.from_pandas</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.read_csv.html">smallpond.dataframe.Session.read_csv</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.read_json.html">smallpond.dataframe.Session.read_json</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.read_parquet.html">smallpond.dataframe.Session.read_parquet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.repartition.html">smallpond.dataframe.DataFrame.repartition</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.partial_sql.html">smallpond.dataframe.Session.partial_sql</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.map.html">smallpond.dataframe.DataFrame.map</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.map_batches.html">smallpond.dataframe.DataFrame.map_batches</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.flat_map.html">smallpond.dataframe.DataFrame.flat_map</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.filter.html">smallpond.dataframe.DataFrame.filter</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.limit.html">smallpond.dataframe.DataFrame.limit</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.partial_sort.html">smallpond.dataframe.DataFrame.partial_sort</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.random_shuffle.html">smallpond.dataframe.DataFrame.random_shuffle</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.count.html">smallpond.dataframe.DataFrame.count</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.take.html">smallpond.dataframe.DataFrame.take</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.take_all.html">smallpond.dataframe.DataFrame.take_all</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.to_arrow.html">smallpond.dataframe.DataFrame.to_arrow</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.to_pandas.html">smallpond.dataframe.DataFrame.to_pandas</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.write_parquet.html">smallpond.dataframe.DataFrame.write_parquet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.write_parquet_lazy.html">smallpond.dataframe.DataFrame.write_parquet_lazy</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.compute.html">smallpond.dataframe.DataFrame.compute</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.is_computed.html">smallpond.dataframe.DataFrame.is_computed</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.DataFrame.recompute.html">smallpond.dataframe.DataFrame.recompute</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.dataframe.Session.wait.html">smallpond.dataframe.Session.wait</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
<ul class="nav bd-sidenav">
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="api/dataset.html">Dataset</a><input class="toctree-checkbox" id="toctree-checkbox-2" name="toctree-checkbox-2" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-2"><i class="fa-solid fa-chevron-down"></i></label><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.DataSet.html">smallpond.logical.dataset.DataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.FileSet.html">smallpond.logical.dataset.FileSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.ParquetDataSet.html">smallpond.logical.dataset.ParquetDataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.CsvDataSet.html">smallpond.logical.dataset.CsvDataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.JsonDataSet.html">smallpond.logical.dataset.JsonDataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.ArrowTableDataSet.html">smallpond.logical.dataset.ArrowTableDataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.PandasDataSet.html">smallpond.logical.dataset.PandasDataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.PartitionedDataSet.html">smallpond.logical.dataset.PartitionedDataSet</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.dataset.SqlQueryDataSet.html">smallpond.logical.dataset.SqlQueryDataSet</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="api/nodes.html">Nodes</a><input class="toctree-checkbox" id="toctree-checkbox-3" name="toctree-checkbox-3" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-3"><i class="fa-solid fa-chevron-down"></i></label><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.Context.html">smallpond.logical.node.Context</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.NodeId.html">smallpond.logical.node.NodeId</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.LogicalPlan.html">smallpond.logical.node.LogicalPlan</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.LogicalPlanVisitor.html">smallpond.logical.node.LogicalPlanVisitor</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.Node.html">smallpond.logical.node.Node</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.DataSetPartitionNode.html">smallpond.logical.node.DataSetPartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.ArrowBatchNode.html">smallpond.logical.node.ArrowBatchNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.ArrowComputeNode.html">smallpond.logical.node.ArrowComputeNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.ArrowStreamNode.html">smallpond.logical.node.ArrowStreamNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.ConsolidateNode.html">smallpond.logical.node.ConsolidateNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.DataSinkNode.html">smallpond.logical.node.DataSinkNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.DataSourceNode.html">smallpond.logical.node.DataSourceNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.EvenlyDistributedPartitionNode.html">smallpond.logical.node.EvenlyDistributedPartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.HashPartitionNode.html">smallpond.logical.node.HashPartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.LimitNode.html">smallpond.logical.node.LimitNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.LoadPartitionedDataSetNode.html">smallpond.logical.node.LoadPartitionedDataSetNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.PandasBatchNode.html">smallpond.logical.node.PandasBatchNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.PandasComputeNode.html">smallpond.logical.node.PandasComputeNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.PartitionNode.html">smallpond.logical.node.PartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.ProjectionNode.html">smallpond.logical.node.ProjectionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.PythonScriptNode.html">smallpond.logical.node.PythonScriptNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.RangePartitionNode.html">smallpond.logical.node.RangePartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.RepeatPartitionNode.html">smallpond.logical.node.RepeatPartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.RootNode.html">smallpond.logical.node.RootNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.ShuffleNode.html">smallpond.logical.node.ShuffleNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.SqlEngineNode.html">smallpond.logical.node.SqlEngineNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.UnionNode.html">smallpond.logical.node.UnionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.UserDefinedPartitionNode.html">smallpond.logical.node.UserDefinedPartitionNode</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.logical.node.UserPartitionedDataSourceNode.html">smallpond.logical.node.UserPartitionedDataSourceNode</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="api/tasks.html">Tasks</a><input class="toctree-checkbox" id="toctree-checkbox-4" name="toctree-checkbox-4" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-4"><i class="fa-solid fa-chevron-down"></i></label><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.RuntimeContext.html">smallpond.execution.task.RuntimeContext</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.JobId.html">smallpond.execution.task.JobId</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.TaskId.html">smallpond.execution.task.TaskId</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.TaskRuntimeId.html">smallpond.execution.task.TaskRuntimeId</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PartitionInfo.html">smallpond.execution.task.PartitionInfo</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PerfStats.html">smallpond.execution.task.PerfStats</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.ExecutionPlan.html">smallpond.execution.task.ExecutionPlan</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.Task.html">smallpond.execution.task.Task</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.ArrowBatchTask.html">smallpond.execution.task.ArrowBatchTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.ArrowComputeTask.html">smallpond.execution.task.ArrowComputeTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.ArrowStreamTask.html">smallpond.execution.task.ArrowStreamTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.DataSinkTask.html">smallpond.execution.task.DataSinkTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.DataSourceTask.html">smallpond.execution.task.DataSourceTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.EvenlyDistributedPartitionProducerTask.html">smallpond.execution.task.EvenlyDistributedPartitionProducerTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.HashPartitionArrowTask.html">smallpond.execution.task.HashPartitionArrowTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.HashPartitionDuckDbTask.html">smallpond.execution.task.HashPartitionDuckDbTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.HashPartitionTask.html">smallpond.execution.task.HashPartitionTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.LoadPartitionedDataSetProducerTask.html">smallpond.execution.task.LoadPartitionedDataSetProducerTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.MergeDataSetsTask.html">smallpond.execution.task.MergeDataSetsTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PandasBatchTask.html">smallpond.execution.task.PandasBatchTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PandasComputeTask.html">smallpond.execution.task.PandasComputeTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PartitionConsumerTask.html">smallpond.execution.task.PartitionConsumerTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PartitionProducerTask.html">smallpond.execution.task.PartitionProducerTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.ProjectionTask.html">smallpond.execution.task.ProjectionTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.PythonScriptTask.html">smallpond.execution.task.PythonScriptTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.RangePartitionTask.html">smallpond.execution.task.RangePartitionTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.RepeatPartitionProducerTask.html">smallpond.execution.task.RepeatPartitionProducerTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.RootTask.html">smallpond.execution.task.RootTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.SplitDataSetTask.html">smallpond.execution.task.SplitDataSetTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.SqlEngineTask.html">smallpond.execution.task.SqlEngineTask</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.task.UserDefinedPartitionProducerTask.html">smallpond.execution.task.UserDefinedPartitionProducerTask</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1 has-children"><a class="reference internal" href="api/execution.html">Execution</a><input class="toctree-checkbox" id="toctree-checkbox-5" name="toctree-checkbox-5" type="checkbox"/><label class="toctree-toggle" for="toctree-checkbox-5"><i class="fa-solid fa-chevron-down"></i></label><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.driver.Driver.html">smallpond.execution.driver.Driver</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.manager.JobManager.html">smallpond.execution.manager.JobManager</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.scheduler.Scheduler.html">smallpond.execution.scheduler.Scheduler</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.execution.executor.Executor.html">smallpond.execution.executor.Executor</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.platform.Platform.html">smallpond.platform.Platform</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="generated/smallpond.platform.MPI.html">smallpond.platform.MPI</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</nav></div>
|
|
</div>
|
|
|
|
|
|
<div class="sidebar-primary-items__end sidebar-primary__section">
|
|
</div>
|
|
|
|
<div id="rtd-footer-container"></div>
|
|
|
|
|
|
</div>
|
|
|
|
<main id="main-content" class="bd-main">
|
|
|
|
|
|
<div class="bd-content">
|
|
<div class="bd-article-container">
|
|
|
|
<div class="bd-header-article">
|
|
<div class="header-article-items header-article__inner">
|
|
|
|
<div class="header-article-items__start">
|
|
|
|
<div class="header-article-item">
|
|
|
|
|
|
|
|
<nav aria-label="Breadcrumb">
|
|
<ul class="bd-breadcrumbs">
|
|
|
|
<li class="breadcrumb-item breadcrumb-home">
|
|
<a href="index.html" class="nav-link" aria-label="Home">
|
|
<i class="fa-solid fa-home"></i>
|
|
</a>
|
|
</li>
|
|
<li class="breadcrumb-item active" aria-current="page">API Reference</li>
|
|
</ul>
|
|
</nav>
|
|
</div>
|
|
|
|
</div>
|
|
|
|
|
|
</div>
|
|
</div>
|
|
|
|
|
|
|
|
|
|
<div id="searchbox"></div>
|
|
<article class="bd-article" role="main">
|
|
|
|
<section id="api-reference">
|
|
<h1>API Reference<a class="headerlink" href="#api-reference" title="Permalink to this heading">#</a></h1>
|
|
<p>Smallpond provides both high-level and low-level APIs.</p>
|
|
<div class="admonition note">
|
|
<p class="admonition-title">Note</p>
|
|
<p>Currently, smallpond provides two different APIs, supporting dynamic and static construction of data flow graphs respectively. Due to historical reasons, these two APIs use different scheduler backends and support different configuration options.</p>
|
|
<ul class="simple">
|
|
<li><p>The High-level API currently uses Ray as the backend, supporting dynamic construction and execution of data flow graphs.</p></li>
|
|
<li><p>The Low-level API uses a built-in scheduler and only supports one-time execution of static data flow graphs. However, it offers more performance optimizations and richer configuration options.</p></li>
|
|
</ul>
|
|
<p>We are working to merge them so that in the future, you can use a unified high-level API and freely choose between Ray or the built-in scheduler.</p>
|
|
</div>
|
|
<section id="high-level-api">
|
|
<h2>High-level API<a class="headerlink" href="#high-level-api" title="Permalink to this heading">#</a></h2>
|
|
<p>The high-level API is centered around <a class="reference internal" href="api/dataframe.html#dataframe"><span class="std std-ref">DataFrame</span></a>. It allows dynamic construction of data flow graphs, execution, and result retrieval.</p>
|
|
<p>A typical workflow looks like this:</p>
|
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">import</span><span class="w"> </span><span class="nn">smallpond</span>
|
|
|
|
<span class="n">sp</span> <span class="o">=</span> <span class="n">smallpond</span><span class="o">.</span><span class="n">init</span><span class="p">()</span>
|
|
|
|
<span class="n">df</span> <span class="o">=</span> <span class="n">sp</span><span class="o">.</span><span class="n">read_parquet</span><span class="p">(</span><span class="s2">"path/to/dataset/*.parquet"</span><span class="p">)</span>
|
|
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">repartition</span><span class="p">(</span><span class="mi">10</span><span class="p">)</span>
|
|
<span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">map</span><span class="p">(</span><span class="s2">"x + 1"</span><span class="p">)</span>
|
|
<span class="n">df</span><span class="o">.</span><span class="n">write_parquet</span><span class="p">(</span><span class="s2">"path/to/output"</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
<div class="toctree-wrapper compound">
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="api/dataframe.html">DataFrame</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataframe.html#initialization">Initialization</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataframe.html#loading-data">Loading Data</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataframe.html#partitioning-data">Partitioning Data</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataframe.html#transformations">Transformations</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataframe.html#consuming-data">Consuming Data</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataframe.html#execution">Execution</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<p>It is recommended to use the DataFrame API.</p>
|
|
</section>
|
|
<section id="low-level-api">
|
|
<h2>Low-level API<a class="headerlink" href="#low-level-api" title="Permalink to this heading">#</a></h2>
|
|
<p>In the low-level API, users manually create <a class="reference internal" href="api/nodes.html#nodes"><span class="std std-ref">Nodes</span></a> to construct static data flow graphs, then submit them to smallpond to generate <a class="reference internal" href="api/tasks.html#tasks"><span class="std std-ref">Tasks</span></a> and wait for all tasks to complete.</p>
|
|
<p>A complete example is shown below.</p>
|
|
<div class="highlight-python notranslate"><div class="highlight"><pre><span></span><span class="kn">from</span><span class="w"> </span><span class="nn">smallpond.logical.dataset</span><span class="w"> </span><span class="kn">import</span> <span class="n">ParquetDataSet</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">smallpond.logical.node</span><span class="w"> </span><span class="kn">import</span> <span class="n">Context</span><span class="p">,</span> <span class="n">DataSourceNode</span><span class="p">,</span> <span class="n">DataSetPartitionNode</span><span class="p">,</span> <span class="n">SqlEngineNode</span><span class="p">,</span> <span class="n">LogicalPlan</span>
|
|
<span class="kn">from</span><span class="w"> </span><span class="nn">smallpond.execution.driver</span><span class="w"> </span><span class="kn">import</span> <span class="n">Driver</span>
|
|
|
|
<span class="k">def</span><span class="w"> </span><span class="nf">my_pipeline</span><span class="p">(</span><span class="n">input_paths</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="nb">str</span><span class="p">],</span> <span class="n">npartitions</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
|
|
<span class="n">ctx</span> <span class="o">=</span> <span class="n">Context</span><span class="p">()</span>
|
|
<span class="n">dataset</span> <span class="o">=</span> <span class="n">ParquetDataSet</span><span class="p">(</span><span class="n">input_paths</span><span class="p">)</span>
|
|
<span class="n">node</span> <span class="o">=</span> <span class="n">DataSourceNode</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span> <span class="n">dataset</span><span class="p">)</span>
|
|
<span class="n">node</span> <span class="o">=</span> <span class="n">DataSetPartitionNode</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span> <span class="p">(</span><span class="n">node</span><span class="p">,),</span> <span class="n">npartitions</span><span class="o">=</span><span class="n">npartitions</span><span class="p">)</span>
|
|
<span class="n">node</span> <span class="o">=</span> <span class="n">SqlEngineNode</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span> <span class="p">(</span><span class="n">node</span><span class="p">,),</span> <span class="s2">"SELECT * FROM </span><span class="si">{0}</span><span class="s2">"</span><span class="p">)</span>
|
|
<span class="k">return</span> <span class="n">LogicalPlan</span><span class="p">(</span><span class="n">ctx</span><span class="p">,</span> <span class="n">node</span><span class="p">)</span>
|
|
|
|
<span class="k">if</span> <span class="vm">__name__</span> <span class="o">==</span> <span class="s2">"__main__"</span><span class="p">:</span>
|
|
<span class="n">driver</span> <span class="o">=</span> <span class="n">Driver</span><span class="p">()</span>
|
|
<span class="n">driver</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-i"</span><span class="p">,</span> <span class="s2">"--input_paths"</span><span class="p">,</span> <span class="n">nargs</span><span class="o">=</span><span class="s2">"+"</span><span class="p">)</span>
|
|
<span class="n">driver</span><span class="o">.</span><span class="n">add_argument</span><span class="p">(</span><span class="s2">"-n"</span><span class="p">,</span> <span class="s2">"--npartitions"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="nb">int</span><span class="p">,</span> <span class="n">default</span><span class="o">=</span><span class="mi">10</span><span class="p">)</span>
|
|
|
|
<span class="n">plan</span> <span class="o">=</span> <span class="n">my_pipeline</span><span class="p">(</span><span class="o">**</span><span class="n">driver</span><span class="o">.</span><span class="n">get_arguments</span><span class="p">())</span>
|
|
<span class="n">driver</span><span class="o">.</span><span class="n">run</span><span class="p">(</span><span class="n">plan</span><span class="p">)</span>
|
|
</pre></div>
|
|
</div>
|
|
<p>To run this script:</p>
|
|
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>python<span class="w"> </span>script.py<span class="w"> </span>-i<span class="w"> </span><span class="s2">"path/to/*.parquet"</span><span class="w"> </span>-n<span class="w"> </span><span class="m">10</span>
|
|
</pre></div>
|
|
</div>
|
|
<div class="toctree-wrapper compound">
|
|
<ul>
|
|
<li class="toctree-l1"><a class="reference internal" href="api/dataset.html">Dataset</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/dataset.html#datasets">DataSets</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="api/nodes.html">Nodes</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/nodes.html#context">Context</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/nodes.html#logicalplan">LogicalPlan</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/nodes.html#id2">Nodes</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="api/tasks.html">Tasks</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/tasks.html#runtimecontext">RuntimeContext</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/tasks.html#executionplan">ExecutionPlan</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/tasks.html#id2">Tasks</a></li>
|
|
</ul>
|
|
</li>
|
|
<li class="toctree-l1"><a class="reference internal" href="api/execution.html">Execution</a><ul>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/execution.html#submit-a-job">Submit a Job</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/execution.html#scheduler-and-executor">Scheduler and Executor</a></li>
|
|
<li class="toctree-l2"><a class="reference internal" href="api/execution.html#customize-platform">Customize Platform</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</section>
|
|
</section>
|
|
|
|
|
|
</article>
|
|
|
|
|
|
|
|
|
|
|
|
<footer class="prev-next-footer">
|
|
|
|
<div class="prev-next-area">
|
|
<a class="left-prev"
|
|
href="internals.html"
|
|
title="previous page">
|
|
<i class="fa-solid fa-angle-left"></i>
|
|
<div class="prev-next-info">
|
|
<p class="prev-next-subtitle">previous</p>
|
|
<p class="prev-next-title">Internals</p>
|
|
</div>
|
|
</a>
|
|
<a class="right-next"
|
|
href="api/dataframe.html"
|
|
title="next page">
|
|
<div class="prev-next-info">
|
|
<p class="prev-next-subtitle">next</p>
|
|
<p class="prev-next-title">DataFrame</p>
|
|
</div>
|
|
<i class="fa-solid fa-angle-right"></i>
|
|
</a>
|
|
</div>
|
|
</footer>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
|
|
|
|
<div class="sidebar-secondary-item">
|
|
|
|
<div class="page-toc tocsection onthispage">
|
|
<i class="fa-solid fa-list"></i> On this page
|
|
</div>
|
|
<nav class="bd-toc-nav page-toc">
|
|
<ul class="visible nav section-nav flex-column">
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#high-level-api">High-level API</a></li>
|
|
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#low-level-api">Low-level API</a></li>
|
|
</ul>
|
|
</nav></div>
|
|
|
|
<div class="sidebar-secondary-item">
|
|
|
|
<div class="tocsection sourcelink">
|
|
<a href="_sources/api.rst.txt">
|
|
<i class="fa-solid fa-file-lines"></i> Show Source
|
|
</a>
|
|
</div>
|
|
</div>
|
|
|
|
</div></div>
|
|
|
|
|
|
</div>
|
|
<footer class="bd-footer-content">
|
|
|
|
</footer>
|
|
|
|
</main>
|
|
</div>
|
|
</div>
|
|
|
|
<!-- Scripts loaded after <body> so the DOM is not blocked -->
|
|
<script src="_static/scripts/bootstrap.js?digest=5b4479735964841361fd"></script>
|
|
<script src="_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd"></script>
|
|
|
|
<footer class="bd-footer">
|
|
<div class="bd-footer__inner bd-page-width">
|
|
|
|
<div class="footer-items__start">
|
|
|
|
<div class="footer-item">
|
|
|
|
<p class="copyright">
|
|
|
|
© Copyright 2025, deepseek.
|
|
<br/>
|
|
|
|
</p>
|
|
</div>
|
|
|
|
<div class="footer-item">
|
|
|
|
<p class="sphinx-version">
|
|
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.1.2.
|
|
<br/>
|
|
</p>
|
|
</div>
|
|
|
|
</div>
|
|
|
|
|
|
|
|
<div class="footer-items__end">
|
|
|
|
<div class="footer-item">
|
|
<p class="theme-version">
|
|
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.14.4.
|
|
</p></div>
|
|
|
|
</div>
|
|
|
|
</div>
|
|
|
|
</footer>
|
|
</body>
|
|
</html> |