update eval and readme

2025-06-26 18:15:50 +00:00 · 2024-08-09 18:06:57 +08:00
parent 809d0e377e
commit 36c6194349
10 changed files with 333 additions and 152 deletions
--- a/scripts/eval_expert.sh
+++ b/scripts/eval_expert.sh
@@ -0,0 +1,16 @@
+python scripts/expert/get_expert_scores.py \
+    --eval_dataset=translation \
+    --base_model_path=deepseek-ai/ESFT-vanilla-lite \
+    --output_dir=results/expert_scores/translation \
+    --n_sample_tokens=131072 \
+    --world_size=4 \
+    --gpus_per_rank=2
+
+python scripts/expert/generate_expert_config.py \
+    --eval_datasets=intent,summary,law,translation \
+    --expert_scores_dir=results/expert_scores \
+    --output_dir=results/expert_configs \
+    --score_function=token \
+    --top_p=0.2 # the scoring function and top_p are hyperparameters
+    # --train_shared_experts
+    # --train_non_expert_modules