Files
smallpond/internals.html
2025-03-06 02:23:52 +08:00

519 lines
17 KiB
HTML

<!DOCTYPE html>
<html lang="en" data-content_root="" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<title>Internals &#8212; smallpond documentation</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "light";
</script>
<!-- Loaded before other Sphinx assets -->
<link href="_static/styles/theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="_static/styles/bootstrap.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="_static/styles/pydata-sphinx-theme.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link href="_static/vendor/fontawesome/6.1.2/css/all.min.css?digest=5b4479735964841361fd" rel="stylesheet" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-solid-900.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-brands-400.woff2" />
<link rel="preload" as="font" type="font/woff2" crossorigin href="_static/vendor/fontawesome/6.1.2/webfonts/fa-regular-400.woff2" />
<link rel="stylesheet" type="text/css" href="_static/pygments.css?v=8f2a1f02" />
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="_static/scripts/bootstrap.js?digest=5b4479735964841361fd" />
<link rel="preload" as="script" href="_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd" />
<script src="_static/vendor/fontawesome/6.1.2/js/all.min.js?digest=5b4479735964841361fd"></script>
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js?v=b3ba4146"></script>
<script src="_static/doctools.js?v=888ff710"></script>
<script src="_static/sphinx_highlight.js?v=4825356b"></script>
<script>DOCUMENTATION_OPTIONS.pagename = 'internals';</script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="API Reference" href="api.html" />
<link rel="prev" title="Getting Started" href="getstarted.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
</head>
<body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">
<a class="skip-link" href="#main-content">Skip to main content</a>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>
Back to top
</button>
<input type="checkbox"
class="sidebar-toggle"
name="__primary"
id="__primary"/>
<label class="overlay overlay-primary" for="__primary"></label>
<input type="checkbox"
class="sidebar-toggle"
name="__secondary"
id="__secondary"/>
<label class="overlay overlay-secondary" for="__secondary"></label>
<div class="search-button__wrapper">
<div class="search-button__overlay"></div>
<div class="search-button__search-container">
<form class="bd-search d-flex align-items-center"
action="search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
id="search-input"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form></div>
</div>
<nav class="bd-header navbar navbar-expand-lg bd-navbar">
<div class="bd-header__inner bd-page-width">
<label class="sidebar-toggle primary-toggle" for="__primary">
<span class="fa-solid fa-bars"></span>
</label>
<div class="col-lg-3 navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="index.html">
<p class="title logo__title">smallpond documentation</p>
</a></div>
</div>
<div class="col-lg-9 navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="getstarted.html">
Getting Started
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="#">
Internals
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="api.html">
API Reference
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/deepseek-ai/smallpond" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<script>
document.write(`
<button class="btn navbar-btn search-button-field search-button__button" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
`);
</script>
</div>
<label class="sidebar-toggle secondary-toggle" for="__secondary" tabindex="0">
<span class="fa-solid fa-outdent"></span>
</label>
</div>
</nav>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<div class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav class="navbar-nav">
<p class="sidebar-header-items__title"
role="heading"
aria-level="1"
aria-label="Site Navigation">
Site Navigation
</p>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item">
<a class="nav-link nav-internal" href="getstarted.html">
Getting Started
</a>
</li>
<li class="nav-item current active">
<a class="nav-link nav-internal" href="#">
Internals
</a>
</li>
<li class="nav-item">
<a class="nav-link nav-internal" href="api.html">
API Reference
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item">
<script>
document.write(`
<button class="btn btn-sm navbar-btn theme-switch-button" title="light/dark" aria-label="light/dark" data-bs-placement="bottom" data-bs-toggle="tooltip">
<span class="theme-switch nav-link" data-mode="light"><i class="fa-solid fa-sun fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="dark"><i class="fa-solid fa-moon fa-lg"></i></span>
<span class="theme-switch nav-link" data-mode="auto"><i class="fa-solid fa-circle-half-stroke fa-lg"></i></span>
</button>
`);
</script></div>
<div class="navbar-item"><ul class="navbar-icon-links navbar-nav"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/deepseek-ai/smallpond" title="GitHub" class="nav-link" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><span><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i></span>
<span class="sr-only">GitHub</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
</div>
<div id="rtd-footer-container"></div>
</div>
<main id="main-content" class="bd-main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item active" aria-current="page">Internals</li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article" role="main">
<section id="internals">
<h1>Internals<a class="headerlink" href="#internals" title="Permalink to this heading">#</a></h1>
<section id="data-root">
<h2>Data Root<a class="headerlink" href="#data-root" title="Permalink to this heading">#</a></h2>
<p>Smallpond stores all data in a single directory called data root.</p>
<p>This directory has the following structure:</p>
<div class="highlight-bash notranslate"><div class="highlight"><pre><span></span>data_root
└──<span class="w"> </span><span class="m">2024</span>-12-11-12-00-28.2cc39990-296f-48a3-8063-78cf6dca460b<span class="w"> </span><span class="c1"># job_time.job_id</span>
<span class="w"> </span>├──<span class="w"> </span>config<span class="w"> </span><span class="c1"># configuration and state</span>
<span class="w"> </span><span class="w"> </span>├──<span class="w"> </span>exec_plan.pickle
<span class="w"> </span><span class="w"> </span>├──<span class="w"> </span>logical_plan.pickle
<span class="w"> </span><span class="w"> </span>└──<span class="w"> </span>runtime_ctx.pickle
<span class="w"> </span>├──<span class="w"> </span>log<span class="w"> </span><span class="c1"># logs</span>
<span class="w"> </span><span class="w"> </span>├──<span class="w"> </span>graph.png
<span class="w"> </span><span class="w"> </span>└──<span class="w"> </span>scheduler.log
<span class="w"> </span>├──<span class="w"> </span>queue<span class="w"> </span><span class="c1"># message queue between scheduler and workers</span>
<span class="w"> </span>├──<span class="w"> </span>output<span class="w"> </span><span class="c1"># output data</span>
<span class="w"> </span>├──<span class="w"> </span>staging<span class="w"> </span><span class="c1"># intermediate data</span>
<span class="w"> </span><span class="w"> </span>├──<span class="w"> </span>DataSourceTask.000001
<span class="w"> </span><span class="w"> </span>├──<span class="w"> </span>EvenlyDistributedPartitionProducerTask.000002
<span class="w"> </span><span class="w"> </span>├──<span class="w"> </span>completed_tasks<span class="w"> </span><span class="c1"># output dataset of completed tasks</span>
<span class="w"> </span><span class="w"> </span>└──<span class="w"> </span>started_tasks<span class="w"> </span><span class="c1"># used for checkpoint</span>
<span class="w"> </span>└──<span class="w"> </span>temp<span class="w"> </span><span class="c1"># temporary data</span>
<span class="w"> </span>├──<span class="w"> </span>DataSourceTask.000001
<span class="w"> </span>└──<span class="w"> </span>EvenlyDistributedPartitionProducerTask.000002
</pre></div>
</div>
</section>
<section id="failure-recovery">
<h2>Failure Recovery<a class="headerlink" href="#failure-recovery" title="Permalink to this heading">#</a></h2>
<p>Smallpond can recover from failure and resume execution from the last checkpoint.
Checkpoint is task-level. A few tasks, such as <cite>ArrowBatchTask</cite>, support checkpointing at the batch level.</p>
</section>
</section>
</article>
<footer class="prev-next-footer">
<div class="prev-next-area">
<a class="left-prev"
href="getstarted.html"
title="previous page">
<i class="fa-solid fa-angle-left"></i>
<div class="prev-next-info">
<p class="prev-next-subtitle">previous</p>
<p class="prev-next-title">Getting Started</p>
</div>
</a>
<a class="right-next"
href="api.html"
title="next page">
<div class="prev-next-info">
<p class="prev-next-subtitle">next</p>
<p class="prev-next-title">API Reference</p>
</div>
<i class="fa-solid fa-angle-right"></i>
</a>
</div>
</footer>
</div>
<div class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">
<div class="sidebar-secondary-item">
<div class="page-toc tocsection onthispage">
<i class="fa-solid fa-list"></i> On this page
</div>
<nav class="bd-toc-nav page-toc">
<ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#data-root">Data Root</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#failure-recovery">Failure Recovery</a></li>
</ul>
</nav></div>
<div class="sidebar-secondary-item">
<div class="tocsection sourcelink">
<a href="_sources/internals.rst.txt">
<i class="fa-solid fa-file-lines"></i> Show Source
</a>
</div>
</div>
</div></div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script src="_static/scripts/bootstrap.js?digest=5b4479735964841361fd"></script>
<script src="_static/scripts/pydata-sphinx-theme.js?digest=5b4479735964841361fd"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2025, deepseek.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 7.1.2.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.14.4.
</p></div>
</div>
</div>
</footer>
</body>
</html>