mirror of
https://github.com/deepseek-ai/3FS
synced 2025-06-03 19:36:20 +00:00
140 lines
4.6 KiB
Python
140 lines
4.6 KiB
Python
import hf3fs_py_usrbio as h3fio
|
||
from hf3fs_py_usrbio import register_fd, deregister_fd, force_fsync, extract_mount_point, hardlink, punch_hole
|
||
|
||
import multiprocessing.shared_memory
|
||
import os
|
||
import os.path
|
||
from uuid import uuid4
|
||
|
||
class iovec:
|
||
def __init__(self, iov, link):
|
||
self.iov = iov
|
||
self.link = link
|
||
|
||
def __del__(self):
|
||
os.unlink(self.link)
|
||
|
||
def __getitem__(self, slice):
|
||
return self.iov[slice]
|
||
|
||
def __setitem__(self, slice, val):
|
||
self.iov[slice] = val
|
||
|
||
class ioring:
|
||
def __init__(self, ior):
|
||
self.ior = ior
|
||
|
||
@staticmethod
|
||
def size_for_entries(entries):
|
||
return h3fio.ioring.size_for_entries(entries)
|
||
|
||
def prepare(self, iov, *args, **kwargs):
|
||
if type(iov) == iovec:
|
||
return self.ior.prepare(iov.iov, *args, **kwargs)
|
||
else:
|
||
return self.ior.prepare(iov, *args, **kwargs)
|
||
|
||
def submit(self):
|
||
return self.ior.submit()
|
||
|
||
def wait(self, *args, **kwargs):
|
||
return self.ior.wait(*args, **kwargs)
|
||
|
||
class IorPriority(object):
|
||
HIGH = -1
|
||
NORMAL = 0
|
||
LOW = 1
|
||
|
||
def make_iovec(shm, hf3fs_mount_point, block_size=0, numa=-1):
|
||
'''
|
||
创建 iovec 对象
|
||
|
||
Args:
|
||
shm: Python multiprocessing.shared_memory.SharedMemory 对象
|
||
hf3fs_mount_point: hf3fs 挂载点
|
||
block_size: 默认为 0,代表整体视为一个 block,系统会按照 block_size 按块分配内存,防止触发 IB 注册驱动问题
|
||
numa: 默认为 -1,代表不进行 numa 绑定,指定此参数可以指定内存绑定到固定 numa
|
||
'''
|
||
id = str(uuid4())
|
||
target = os.path.normpath(f'/dev/shm/{shm.name}')
|
||
link = f'{hf3fs_mount_point}/3fs-virt/iovs/{id}{f".b{block_size}" if block_size > 0 else ""}'
|
||
|
||
os.symlink(target, link)
|
||
|
||
return iovec(h3fio.iovec(shm.buf, id, hf3fs_mount_point, block_size, numa), link)
|
||
|
||
def make_ioring(hf3fs_mount_point, entries, for_read=True, io_depth=0, priority=None, timeout=None, numa=-1, flags=0):
|
||
'''
|
||
创建 ioring 对象
|
||
可以用 io_depth 参数来控制读取策略,有以下三种情况:
|
||
io_depth = 0 时,ioring 每次后台扫描任务或被通知有后台任务时,提交全部 io
|
||
io_depth > 0 时,ioring 每次提交 io_depth 个 io,用户需保证最终有足量任务,否则 ioring 在 wait 时会卡住
|
||
io_depth < 0 时,ioring 每次后台扫描任务或被通知有后台任务时,提交最多 -io_depth 个 io
|
||
|
||
Args:
|
||
hf3fs_mount_point: hf3fs 挂载点
|
||
entries: ioring 最大存放 io 操作的个数
|
||
for_read: 指定该 ioring 执行的操作,读为 True,写为 False
|
||
io_depth: 指定读取策略
|
||
numa: 默认为 -1,代表不进行 numa 绑定,指定此参数可以指定将 ioring 通信使用的内存绑定到固定 numa
|
||
flags: 读写操作的一些额外选项,现在有用取值的主要是2可以在读到洞的时候报错而不是填0
|
||
'''
|
||
return ioring(h3fio.ioring(hf3fs_mount_point, entries, for_read, io_depth, priority, timeout, numa, flags))
|
||
|
||
# @param cb if set, will callback with read data and current offset, will return None
|
||
# suggested use is to use cb
|
||
def read_file(fn, hf3fs_mount_point=None, block_size=1 << 30, off=0, priority=None, cb=None):
|
||
if hf3fs_mount_point is None:
|
||
hf3fs_mount_point = extract_mount_point(fn)
|
||
|
||
bufs = []
|
||
|
||
try:
|
||
fd = os.open(fn, os.O_RDONLY)
|
||
register_fd(fd)
|
||
shm = multiprocessing.shared_memory.SharedMemory(size=block_size, create=True)
|
||
iov = make_iovec(shm, hf3fs_mount_point)
|
||
ior = make_ioring(hf3fs_mount_point, 1, priority=priority)
|
||
|
||
i = 0
|
||
roff = off
|
||
while True:
|
||
ior.prepare(iov[:], True, fd, roff)
|
||
done = ior.submit().wait(min_results=1)[0]
|
||
if done.result < 0:
|
||
raise OSError(-done.result)
|
||
|
||
if done.result == 0:
|
||
break
|
||
|
||
if cb is None:
|
||
bufs.append(bytes(shm.buf[:done.result]))
|
||
else:
|
||
res = cb(shm.buf[:done.result], roff)
|
||
if type(res) == int:
|
||
roff = res
|
||
continue
|
||
elif res:
|
||
return
|
||
|
||
if done.result < block_size:
|
||
break
|
||
|
||
i += 1
|
||
roff += block_size
|
||
|
||
if cb is not None:
|
||
return
|
||
|
||
if len(bufs) == 1:
|
||
return bufs[0]
|
||
else:
|
||
return b''.join(bufs)
|
||
finally:
|
||
deregister_fd(fd)
|
||
os.close(fd)
|
||
del ior
|
||
del iov
|
||
shm.close()
|
||
shm.unlink()
|