ESFT/scripts/expert/generate_expert_config.py

import argparse
import json
import os
from multiprocessing import Pool
import numpy as np


def parse_line(line):
    expert_ids, expert_weights = line.split("\t\t")
    expert_ids = [int(i) for i in expert_ids.split("\t")]
    expert_weights = [float(i) for i in expert_weights.split("\t")]
    return expert_ids, expert_weights


def get_summary(files):
    TOP_K=6
    N_EXPERTS=64
    N_LAYERS=26 # 27 layers totally, the first layer is not MoE

    gate_scores = np.zeros((N_LAYERS, N_EXPERTS))
    token_scores = np.zeros((N_LAYERS, N_EXPERTS))

    print("loading files")
    for rank, file in files:
        layer_id = int(file.split(".")[0].split("_")[2]) - 1

        with open(os.path.join(args.expert_scores_dir, rank, file)) as f:
            data = f.readlines()
            for line in data:
                expert_ids, expert_weights = parse_line(line)
                np.add.at(gate_scores[layer_id], expert_ids, expert_weights)
                np.add.at(token_scores[layer_id], expert_ids, np.ones_like(expert_weights) / TOP_K)

    gate_scores = gate_scores / np.sum(gate_scores, axis=0)
    token_scores = token_scores / np.sum(token_scores, axis=0)

    summary = {"token_scores": token_scores, "gate_scores": gate_scores}
    summary = {k: {str(i+1): {str(j): round(v, 4) for j, v in enumerate(l)} for i, l in enumerate(v)} for k, v in summary.items()}

    return summary


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--eval_dataset", type=str, required=True)
    parser.add_argument("--expert_scores_dir", type=str, required=True)
    parser.add_argument("--output_path", type=str, required=True)
    parser.add_argument("--score_function", type=str, required=True)
    parser.add_argument("--top_p", type=float, required=True)
    parser.add_argument("--train_shared_experts", action="store_true")
    parser.add_argument("--train_non_expert_modules", action="store_true")

    args = parser.parse_args()

    expert_cfg = { # initialize expert config
        "experts": {},
        "shared_experts": args.train_shared_experts,
        "non_expert_modules": args.train_non_expert_modules
    }

    # let's walk inside args.expert_scores_dir and get abs file names
    file_names = []
    for rank in [i for i in os.listdir(args.expert_scores_dir) if 'rank' in i]:
        for file in os.listdir(os.path.join(args.expert_scores_dir, rank)):
            file_names.append([rank, file])


    summary_file = os.path.join(args.expert_scores_dir, "summary.json")
    summary = get_summary(file_names)

    with open(summary_file, "w") as f:
        f.write(json.dumps(summary))


    scores = summary[f"{args.score_function}_scores"]
    for layer, l_score in scores.items():
        l_score = [(int(k), v) for k,v in l_score.items()]
        l_score = sorted(l_score, key=lambda x: x[1], reverse=True)
        selected_experts = []
        current_score = 0
        for expert, score in l_score:
            if current_score >= args.top_p:
                break
            selected_experts.append(expert)
            current_score += score
        expert_cfg["experts"][layer] = selected_experts

    top_p = args.top_p
    train_shared_experts = args.train_shared_experts
    train_non_expert_modules = args.train_non_expert_modules


    os.makedirs(os.path.dirname(args.output_path), exist_ok=True)
    with open(args.output_path, "w") as f:
        json.dump(expert_cfg, f)