mirror of
https://github.com/deepseek-ai/ESFT
synced 2025-06-26 18:15:50 +00:00
add training code
This commit is contained in:
12
scripts/train.sh
Normal file
12
scripts/train.sh
Normal file
@@ -0,0 +1,12 @@
|
||||
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
exp_name="test/eval_translation"
|
||||
base_model_path="/hf3fs-jd/prod/deepseek/shared/wangzihan/models/huggingface/vanilla_model"
|
||||
# turn above to for loop
|
||||
python train.py \
|
||||
--base_model_path=${base_model_path} \
|
||||
--expert_config=results/expert_configs/translation.json \
|
||||
--train_dataset=translation \
|
||||
--train_config=configs/base.yaml \
|
||||
--output_dir=results/checkpoints/${exp_name}
|
||||
11
scripts/train_ep.sh
Normal file
11
scripts/train_ep.sh
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
export TOKENIZERS_PARALLELISM=false
|
||||
|
||||
exp_name="test/eval_translation"
|
||||
base_model_path="/hf3fs-jd/prod/deepseek/shared/wangzihan/models/huggingface/vanilla_model"
|
||||
torchrun --nproc-per-node=8 train_ep.py \
|
||||
--base_model_path=${base_model_path} \
|
||||
--expert_config=results/expert_configs/translation.json \
|
||||
--train_dataset=translation \
|
||||
--train_config=configs/base.yaml \
|
||||
--output_dir=results/checkpoints/${exp_name}
|
||||
Reference in New Issue
Block a user