mirror of
https://github.com/deepseek-ai/DeepEP
synced 2025-06-26 18:28:11 +00:00
Merge pull request #222 from deepseek-ai/set_dev_id
Set `device_id` to suppress pytorch warning.
This commit is contained in:
commit
a2d2354e1d
@ -4,6 +4,7 @@ import numpy as np
|
|||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
import torch.distributed as dist
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
|
||||||
def init_dist(local_rank: int, num_local_ranks: int):
|
def init_dist(local_rank: int, num_local_ranks: int):
|
||||||
@ -14,12 +15,16 @@ def init_dist(local_rank: int, num_local_ranks: int):
|
|||||||
node_rank = int(os.getenv('RANK', 0))
|
node_rank = int(os.getenv('RANK', 0))
|
||||||
assert (num_local_ranks < 8 and num_nodes == 1) or num_local_ranks == 8
|
assert (num_local_ranks < 8 and num_nodes == 1) or num_local_ranks == 8
|
||||||
|
|
||||||
dist.init_process_group(
|
sig = inspect.signature(dist.init_process_group)
|
||||||
backend='nccl',
|
params = {
|
||||||
init_method=f'tcp://{ip}:{port}',
|
'backend': 'nccl',
|
||||||
world_size=num_nodes * num_local_ranks,
|
'init_method': f'tcp://{ip}:{port}',
|
||||||
rank=node_rank * num_local_ranks + local_rank
|
'world_size': num_nodes * num_local_ranks,
|
||||||
)
|
'rank': node_rank * num_local_ranks + local_rank,
|
||||||
|
}
|
||||||
|
if 'device_id' in sig.parameters:
|
||||||
|
params['device_id'] = torch.device(f"cuda:{local_rank}")
|
||||||
|
dist.init_process_group(**params)
|
||||||
torch.set_default_dtype(torch.bfloat16)
|
torch.set_default_dtype(torch.bfloat16)
|
||||||
torch.set_default_device('cuda')
|
torch.set_default_device('cuda')
|
||||||
torch.cuda.set_device(local_rank)
|
torch.cuda.set_device(local_rank)
|
||||||
|
Loading…
Reference in New Issue
Block a user