mirror of
https://github.com/deepseek-ai/ESFT
synced 2025-06-26 18:15:50 +00:00
first commit
update readme update readme update readme Update benchmarks.py Update download_adapters.sh Update esft.py
This commit is contained in:
53
scripts/eval.py
Normal file
53
scripts/eval.py
Normal file
@@ -0,0 +1,53 @@
|
||||
import json
|
||||
import argparse
|
||||
from benchmarks import *
|
||||
import os
|
||||
from esft import load_base_model, add_adapter
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--adapter_dir", type=str, required=True)
|
||||
parser.add_argument("--base_model_path", type=str, required=True)
|
||||
parser.add_argument("--eval_datasets", type=str, required=True)
|
||||
parser.add_argument("--output_dir", type=str, required=True)
|
||||
parser.add_argument("--max_new_tokens", type=int, default=128)
|
||||
parser.add_argument("--eval_batch_size", type=int, default=16)
|
||||
parser.add_argument("--openai_api_key", type=str, required=True)
|
||||
parser.add_argument("--debug", action='store_true')
|
||||
args = parser.parse_args()
|
||||
|
||||
base_model_path = args.base_model_path
|
||||
adapter_dir = args.adapter_dir
|
||||
eval_datasets = args.eval_datasets.split(",")
|
||||
|
||||
config = {"max_new_tokens": args.max_new_tokens, "eval_batch_size": args.eval_batch_size, "openai_api_key": args.openai_api_key}
|
||||
|
||||
evaluator_map={"intent": IntentEvaluator, "summary": SummaryEvaluator, "law": LawEvaluator, "translation": TranslationEvaluator}
|
||||
|
||||
print("Loading base model...")
|
||||
model, tokenizer = load_base_model(base_model_path)
|
||||
|
||||
for dataset_name in eval_datasets:
|
||||
print(f"Running evaluation on {dataset_name}...")
|
||||
dataset = [json.loads(i) for i in open(f"datasets/eval/{dataset_name}.jsonl").readlines()]
|
||||
if args.debug:
|
||||
print("Debugging. Shortening the dataset length")
|
||||
dataset = dataset[:16]
|
||||
|
||||
evaluator = evaluator_map[dataset_name](dataset, config)
|
||||
print("Adding adapter...")
|
||||
model.model, original_state_dict = add_adapter(model.model, os.path.join(adapter_dir, dataset_name), return_original_states=True) # add adapter to model and convert original states to buffer.
|
||||
results, metrics = evaluator.evaluate(model, tokenizer)
|
||||
|
||||
os.makedirs(args.output_dir, exist_ok=True)
|
||||
with open(os.path.join(args.output_dir, dataset_name + ".jsonl"), "w") as f:
|
||||
for res, m in zip(results, metrics):
|
||||
obj = {
|
||||
"example": res,
|
||||
"score": m
|
||||
}
|
||||
f.write(json.dumps(obj, ensure_ascii=False) + "\n")
|
||||
|
||||
model.model.load_state_dict(original_state_dict) # convert to original model
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user