refac/fix: feedback leaderboard

2026-01-09 18:24:09 +04:00
parent 3a57233dd4
commit 10838b3654
6 changed files with 425 additions and 526 deletions
--- a/backend/open_webui/models/feedbacks.py
+++ b/backend/open_webui/models/feedbacks.py
@@ -68,6 +68,13 @@ class FeedbackIdResponse(BaseModel):
    updated_at: int


+class LeaderboardFeedbackData(BaseModel):
+    """Minimal feedback data for leaderboard computation (excludes snapshot/meta)."""
+
+    id: str
+    data: Optional[dict] = None
+
+
 class RatingData(BaseModel):
    rating: Optional[str | int] = None
    model_id: Optional[str] = None
@@ -271,6 +278,16 @@ class FeedbackTable:
                .all()
            ]

+    def get_feedbacks_for_leaderboard(
+        self, db: Optional[Session] = None
+    ) -> list[LeaderboardFeedbackData]:
+        """Fetch only id and data for leaderboard computation (excludes snapshot/meta)."""
+        with get_db_context(db) as db:
+            return [
+                LeaderboardFeedbackData(id=row.id, data=row.data)
+                for row in db.query(Feedback.id, Feedback.data).all()
+            ]
+
    def get_feedbacks_by_type(
        self, type: str, db: Optional[Session] = None
    ) -> list[FeedbackModel]:
--- a/backend/open_webui/routers/evaluations.py
+++ b/backend/open_webui/routers/evaluations.py
@@ -1,5 +1,7 @@
 from typing import Optional
+import logging
 from fastapi import APIRouter, Depends, HTTPException, status, Request
+from fastapi.concurrency import run_in_threadpool
 from pydantic import BaseModel

 from open_webui.models.users import Users, UserModel
@@ -10,6 +12,7 @@ from open_webui.models.feedbacks import (
    FeedbackForm,
    FeedbackUserResponse,
    FeedbackListResponse,
+    LeaderboardFeedbackData,
    Feedbacks,
 )

@@ -18,9 +21,239 @@ from open_webui.utils.auth import get_admin_user, get_verified_user
 from open_webui.internal.db import get_session
 from sqlalchemy.orm import Session

+log = logging.getLogger(__name__)
+
+
 router = APIRouter()


+# Leaderboard Elo Rating Computation
+#
+# How it works:
+# 1. Each model starts with a rating of 1000
+# 2. When a user picks a winner between two models, ratings are adjusted:
+#    - Winner gains points, loser loses points
+#    - The amount depends on expected outcome (upset = bigger change)
+# 3. The Elo formula: new_rating = old_rating + K * (actual - expected)
+#    - K=32 controls how much ratings can change per match
+#    - expected = probability of winning based on current ratings
+#
+# Query-based re-ranking (optional):
+#    When a user searches for a topic (e.g., "coding"), we want to show
+#    which models perform best FOR THAT TOPIC. We do this by:
+#    1. Computing semantic similarity between the query and each feedback's tags
+#    2. Using that similarity as a weight in the Elo calculation
+#    3. Feedbacks about "coding" contribute more to the final ranking
+#    4. Feedbacks about unrelated topics (e.g., "cooking") contribute less
+#    This gives topic-specific leaderboards without needing separate data.
+
+EMBEDDING_MODEL_NAME = "TaylorAI/bge-micro-v2"
+_embedding_model = None
+
+
+def _get_embedding_model():
+    global _embedding_model
+    if _embedding_model is None:
+        try:
+            from sentence_transformers import SentenceTransformer
+
+            _embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
+        except Exception as e:
+            log.error(f"Embedding model load failed: {e}")
+    return _embedding_model
+
+
+def _calculate_elo(
+    feedbacks: list[LeaderboardFeedbackData], similarities: dict = None
+) -> dict:
+    """
+    Calculate Elo ratings for models based on user feedback.
+
+    Each feedback represents a comparison where a user rated one model
+    against its opponents (sibling_model_ids). Rating=1 means the model won,
+    rating=-1 means it lost.
+
+    The Elo system adjusts ratings based on:
+    - Current rating difference (upsets cause bigger swings)
+    - Optional similarity weights (for query-based filtering)
+
+    Returns: {model_id: {"rating": float, "won": int, "lost": int}}
+    """
+    K_FACTOR = 32  # Standard Elo K-factor for rating volatility
+    model_stats = {}
+
+    def get_or_create_stats(model_id):
+        if model_id not in model_stats:
+            model_stats[model_id] = {"rating": 1000.0, "won": 0, "lost": 0}
+        return model_stats[model_id]
+
+    for feedback in feedbacks:
+        data = feedback.data or {}
+        winner_id = data.get("model_id")
+        rating_value = str(data.get("rating", ""))
+        if not winner_id or rating_value not in ("1", "-1"):
+            continue
+
+        won = rating_value == "1"
+        weight = similarities.get(feedback.id, 1.0) if similarities else 1.0
+
+        for opponent_id in data.get("sibling_model_ids") or []:
+            winner = get_or_create_stats(winner_id)
+            opponent = get_or_create_stats(opponent_id)
+            expected = 1 / (1 + 10 ** ((opponent["rating"] - winner["rating"]) / 400))
+
+            winner["rating"] += K_FACTOR * ((1 if won else 0) - expected) * weight
+            opponent["rating"] += (
+                K_FACTOR * ((0 if won else 1) - (1 - expected)) * weight
+            )
+
+            if won:
+                winner["won"] += 1
+                opponent["lost"] += 1
+            else:
+                winner["lost"] += 1
+                opponent["won"] += 1
+
+    return model_stats
+
+
+def _get_top_tags(feedbacks: list[LeaderboardFeedbackData], limit: int = 5) -> dict:
+    """
+    Count tag occurrences per model and return the most frequent ones.
+
+    Each feedback can have tags describing the conversation topic.
+    This aggregates those tags per model to show what topics each model
+    is commonly used for.
+
+    Returns: {model_id: [{"tag": str, "count": int}, ...]}
+    """
+    from collections import defaultdict
+
+    tag_counts = defaultdict(lambda: defaultdict(int))
+
+    for feedback in feedbacks:
+        data = feedback.data or {}
+        model_id = data.get("model_id")
+        if model_id:
+            for tag in data.get("tags", []):
+                tag_counts[model_id][tag] += 1
+
+    return {
+        model_id: [
+            {"tag": tag, "count": count}
+            for tag, count in sorted(tags.items(), key=lambda x: -x[1])[:limit]
+        ]
+        for model_id, tags in tag_counts.items()
+    }
+
+
+def _compute_similarities(feedbacks: list[LeaderboardFeedbackData], query: str) -> dict:
+    """
+    Compute how relevant each feedback is to a search query.
+
+    Uses embeddings to find semantic similarity between the query and
+    each feedback's tags. Higher similarity means the feedback is more
+    relevant to what the user searched for.
+
+    This is used to weight Elo calculations - feedbacks matching the
+    query have more influence on the final rankings.
+
+    Returns: {feedback_id: similarity_score (0-1)}
+    """
+    import numpy as np
+
+    embedding_model = _get_embedding_model()
+    if not embedding_model:
+        return {}
+
+    all_tags = list(
+        {
+            tag
+            for feedback in feedbacks
+            if feedback.data
+            for tag in feedback.data.get("tags", [])
+        }
+    )
+    if not all_tags:
+        return {}
+
+    try:
+        tag_embeddings = embedding_model.encode(all_tags)
+        query_embedding = embedding_model.encode([query])[0]
+    except Exception as e:
+        log.error(f"Embedding error: {e}")
+        return {}
+
+    # Vectorized cosine similarity
+    tag_norms = np.linalg.norm(tag_embeddings, axis=1)
+    query_norm = np.linalg.norm(query_embedding)
+    similarities = np.dot(tag_embeddings, query_embedding) / (
+        tag_norms * query_norm + 1e-9
+    )
+    tag_similarity_map = dict(zip(all_tags, similarities.tolist()))
+
+    return {
+        feedback.id: max(
+            (
+                tag_similarity_map.get(tag, 0)
+                for tag in (feedback.data or {}).get("tags", [])
+            ),
+            default=0,
+        )
+        for feedback in feedbacks
+    }
+
+
+class LeaderboardEntry(BaseModel):
+    model_id: str
+    rating: int
+    won: int
+    lost: int
+    count: int
+    top_tags: list[dict]
+
+
+class LeaderboardResponse(BaseModel):
+    entries: list[LeaderboardEntry]
+
+
+@router.get("/leaderboard", response_model=LeaderboardResponse)
+async def get_leaderboard(
+    query: Optional[str] = None,
+    user=Depends(get_admin_user),
+    db: Session = Depends(get_session),
+):
+    """Get model leaderboard with Elo ratings. Query filters by tag similarity."""
+    feedbacks = Feedbacks.get_feedbacks_for_leaderboard(db=db)
+
+    similarities = None
+    if query and query.strip():
+        similarities = await run_in_threadpool(
+            _compute_similarities, feedbacks, query.strip()
+        )
+
+    elo_stats = _calculate_elo(feedbacks, similarities)
+    tags_by_model = _get_top_tags(feedbacks)
+
+    entries = sorted(
+        [
+            LeaderboardEntry(
+                model_id=mid,
+                rating=round(s["rating"]),
+                won=s["won"],
+                lost=s["lost"],
+                count=s["won"] + s["lost"],
+                top_tags=tags_by_model.get(mid, []),
+            )
+            for mid, s in elo_stats.items()
+        ],
+        key=lambda e: e.rating,
+        reverse=True,
+    )
+
+    return LeaderboardResponse(entries=entries)
+
+
 ############################
 # GetConfig
 ############################
--- a/src/lib/apis/evaluations/index.ts
+++ b/src/lib/apis/evaluations/index.ts
@@ -93,6 +93,40 @@ export const getAllFeedbacks = async (token: string = '') => {
 	return res;
 };

+export const getLeaderboard = async (token: string = '', query: string = '') => {
+	let error = null;
+
+	const searchParams = new URLSearchParams();
+	if (query) searchParams.append('query', query);
+
+	const res = await fetch(
+		`${WEBUI_API_BASE_URL}/evaluations/leaderboard?${searchParams.toString()}`,
+		{
+			method: 'GET',
+			headers: {
+				Accept: 'application/json',
+				'Content-Type': 'application/json',
+				authorization: `Bearer ${token}`
+			}
+		}
+	)
+		.then(async (res) => {
+			if (!res.ok) throw await res.json();
+			return res.json();
+		})
+		.catch((err) => {
+			error = err.detail;
+			console.error(err);
+			return null;
+		});
+
+	if (error) {
+		throw error;
+	}
+
+	return res;
+};
+
 export const getFeedbackItems = async (token: string = '', orderBy, direction, page) => {
 	let error = null;

--- a/src/lib/components/admin/Evaluations.svelte
+++ b/src/lib/components/admin/Evaluations.svelte
@@ -6,8 +6,7 @@
 	import Leaderboard from './Evaluations/Leaderboard.svelte';
 	import Feedbacks from './Evaluations/Feedbacks.svelte';

-	import { getAllFeedbacks } from '$lib/apis/evaluations';
-
+	
 	const i18n = getContext('i18n');

 	let selectedTab;
@@ -30,12 +29,8 @@
 	};

 	let loaded = false;
-	let feedbacks = [];

 	onMount(async () => {
-		// TODO: feedbacks elo rating calculation should be done in the backend; remove below line later
-		feedbacks = await getAllFeedbacks(localStorage.token);
-
 		loaded = true;

 		const containerElement = document.getElementById('users-tabs-container');
@@ -117,7 +112,7 @@

 		<div class="flex-1 mt-1 lg:mt-0 px-[16px] lg:pr-[16px] lg:pl-0 overflow-y-scroll">
 			{#if selectedTab === 'leaderboard'}
-				<Leaderboard {feedbacks} />
+				<Leaderboard />
 			{:else if selectedTab === 'feedback'}
 				<Feedbacks />
 			{/if}
--- a/src/lib/components/admin/Evaluations/Leaderboard.svelte
+++ b/src/lib/components/admin/Evaluations/Leaderboard.svelte
@@ -1,559 +1,198 @@
 <script lang="ts">
 	import { onMount, getContext } from 'svelte';
 	import { models } from '$lib/stores';
-
+	import { getLeaderboard } from '$lib/apis/evaluations';
 	import ModelModal from './LeaderboardModal.svelte';
-
 	import Spinner from '$lib/components/common/Spinner.svelte';
 	import Tooltip from '$lib/components/common/Tooltip.svelte';
 	import Search from '$lib/components/icons/Search.svelte';
-
 	import ChevronUp from '$lib/components/icons/ChevronUp.svelte';
 	import ChevronDown from '$lib/components/icons/ChevronDown.svelte';
-	import { WEBUI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
+	import { WEBUI_API_BASE_URL } from '$lib/constants';

 	const i18n = getContext('i18n');

-	const EMBEDDING_MODEL = 'TaylorAI/bge-micro-v2';
-
-	let tokenizer = null;
-	let model = null;
-
-	export let feedbacks = [];
-
 	let rankedModels = [];
-
 	let query = '';
+	let loading = true;
+	let debounceTimer: ReturnType<typeof setTimeout>;
+	let orderBy = 'rating';
+	let direction: 'asc' | 'desc' = 'desc';

-	let tagEmbeddings = new Map();
-	let loadingLeaderboard = true;
-	let debounceTimer;
+	let showModal = false;
+	let selectedModel = null;

-	let orderBy: string = 'rating'; // default sort column
-	let direction: 'asc' | 'desc' = 'desc'; // default sort order
-
-	type Feedback = {
-		id: string;
-		data: {
-			rating: number;
-			model_id: string;
-			sibling_model_ids: string[] | null;
-			reason: string;
-			comment: string;
-			tags: string[];
-		};
-		user: {
-			name: string;
-			profile_image_url: string;
-		};
-		updated_at: number;
-	};
-
-	type ModelStats = {
-		rating: number;
-		won: number;
-		lost: number;
-	};
-
-	function setSortKey(key) {
+	const toggleSort = (key: string) => {
 		if (orderBy === key) {
 			direction = direction === 'asc' ? 'desc' : 'asc';
 		} else {
 			orderBy = key;
 			direction = key === 'name' ? 'asc' : 'desc';
 		}
-	}
+	};

-	//////////////////////
-	//
-	// Aggregate Level Modal
-	//
-	//////////////////////
-
-	let showLeaderboardModal = false;
-	let selectedModel = null;
-
-	const openLeaderboardModelModal = (model) => {
-		showLeaderboardModal = true;
+	const openModal = (model) => {
 		selectedModel = model;
+		showModal = true;
 	};

-	const closeLeaderboardModal = () => {
-		showLeaderboardModal = false;
+	const closeModal = () => {
 		selectedModel = null;
+		showModal = false;
 	};

-	//////////////////////
-	//
-	// Rank models by Elo rating
-	//
-	//////////////////////
+	const loadLeaderboard = async (searchQuery = '') => {
+		loading = true;
+		try {
+			const result = await getLeaderboard(localStorage.token, searchQuery);
+			const statsMap = new Map(
+				(result?.entries ?? []).map((e) => [e.model_id, e])
+			);

-	const rankHandler = async (similarities: Map<string, number> = new Map()) => {
-		const modelStats = calculateModelStats(feedbacks, similarities);
-
-		rankedModels = $models
-			.filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
-			.map((model) => {
-				const stats = modelStats.get(model.id);
-				return {
-					...model,
-					rating: stats ? Math.round(stats.rating) : '-',
-					stats: {
-						count: stats ? stats.won + stats.lost : 0,
-						won: stats ? stats.won.toString() : '-',
-						lost: stats ? stats.lost.toString() : '-'
-					}
-				};
-			})
-			.sort((a, b) => {
-				if (a.rating === '-' && b.rating !== '-') return 1;
-				if (b.rating === '-' && a.rating !== '-') return -1;
-				if (a.rating !== '-' && b.rating !== '-') return b.rating - a.rating;
-				return (a?.name ?? a?.id ?? '').localeCompare(b?.name ?? b?.id ?? '');
-			});
-
-		loadingLeaderboard = false;
+			rankedModels = $models
+				.filter((m) => m?.owned_by !== 'arena' && !m?.info?.meta?.hidden)
+				.map((model) => {
+					const s = statsMap.get(model.id);
+					return {
+						...model,
+						rating: s?.rating ?? '-',
+						stats: {
+							count: s ? s.won + s.lost : 0,
+							won: s?.won?.toString() ?? '-',
+							lost: s?.lost?.toString() ?? '-'
+						},
+						top_tags: s?.top_tags ?? []
+					};
+				})
+				.sort((a, b) => {
+					if (a.rating === '-') return 1;
+					if (b.rating === '-') return -1;
+					return b.rating - a.rating;
+				});
+		} catch (err) {
+			console.error('Leaderboard load failed:', err);
+		}
+		loading = false;
 	};

-	function calculateModelStats(
-		feedbacks: Feedback[],
-		similarities: Map<string, number>
-	): Map<string, ModelStats> {
-		const stats = new Map<string, ModelStats>();
-		const K = 32;
-
-		function getOrDefaultStats(modelId: string): ModelStats {
-			return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 };
-		}
-
-		function updateStats(modelId: string, ratingChange: number, outcome: number) {
-			const currentStats = getOrDefaultStats(modelId);
-			currentStats.rating += ratingChange;
-			if (outcome === 1) currentStats.won++;
-			else if (outcome === 0) currentStats.lost++;
-			stats.set(modelId, currentStats);
-		}
-
-		function calculateEloChange(
-			ratingA: number,
-			ratingB: number,
-			outcome: number,
-			similarity: number
-		): number {
-			const expectedScore = 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400));
-			return K * (outcome - expectedScore) * similarity;
-		}
-
-		feedbacks.forEach((feedback) => {
-			if (!feedback?.data?.model_id || !feedback?.data?.rating) return;
-
-			const modelA = feedback.data.model_id;
-			const statsA = getOrDefaultStats(modelA);
-			let outcome: number;
-
-			switch (feedback.data.rating.toString()) {
-				case '1':
-					outcome = 1;
-					break;
-				case '-1':
-					outcome = 0;
-					break;
-				default:
-					return; // Skip invalid ratings
-			}
-
-			// If the query is empty, set similarity to 1, else get the similarity from the map
-			const similarity = query !== '' ? similarities.get(feedback.id) || 0 : 1;
-			const opponents = feedback.data.sibling_model_ids || [];
-
-			opponents.forEach((modelB) => {
-				const statsB = getOrDefaultStats(modelB);
-				const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity);
-				const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity);
-
-				updateStats(modelA, changeA, outcome);
-				updateStats(modelB, changeB, 1 - outcome);
-			});
-		});
-
-		return stats;
-	}
-
-	//////////////////////
-	//
-	// Calculate cosine similarity
-	//
-	//////////////////////
-
-	const cosineSimilarity = (vecA, vecB) => {
-		// Ensure the lengths of the vectors are the same
-		if (vecA.length !== vecB.length) {
-			throw new Error('Vectors must be the same length');
-		}
-
-		// Calculate the dot product
-		let dotProduct = 0;
-		let normA = 0;
-		let normB = 0;
-
-		for (let i = 0; i < vecA.length; i++) {
-			dotProduct += vecA[i] * vecB[i];
-			normA += vecA[i] ** 2;
-			normB += vecB[i] ** 2;
-		}
-
-		// Calculate the magnitudes
-		normA = Math.sqrt(normA);
-		normB = Math.sqrt(normB);
-
-		// Avoid division by zero
-		if (normA === 0 || normB === 0) {
-			return 0;
-		}
-
-		// Return the cosine similarity
-		return dotProduct / (normA * normB);
-	};
-
-	const calculateMaxSimilarity = (queryEmbedding, tagEmbeddings: Map<string, number[]>) => {
-		let maxSimilarity = 0;
-		for (const tagEmbedding of tagEmbeddings.values()) {
-			const similarity = cosineSimilarity(queryEmbedding, tagEmbedding);
-			maxSimilarity = Math.max(maxSimilarity, similarity);
-		}
-		return maxSimilarity;
-	};
-
-	//////////////////////
-	//
-	// Embedding functions
-	//
-	//////////////////////
-
-	const loadEmbeddingModel = async () => {
-		const { env, AutoModel, AutoTokenizer } = await import('@huggingface/transformers');
-		if (env.backends.onnx.wasm) {
-			env.backends.onnx.wasm.wasmPaths = '/wasm/';
-		}
-
-		// Check if the tokenizer and model are already loaded and stored in the window object
-		if (!window.tokenizer) {
-			window.tokenizer = await AutoTokenizer.from_pretrained(EMBEDDING_MODEL);
-		}
-
-		if (!window.model) {
-			window.model = await AutoModel.from_pretrained(EMBEDDING_MODEL);
-		}
-
-		// Use the tokenizer and model from the window object
-		tokenizer = window.tokenizer;
-		model = window.model;
-
-		// Pre-compute embeddings for all unique tags
-		const allTags = new Set(feedbacks.flatMap((feedback) => feedback.data.tags || []));
-		await getTagEmbeddings(Array.from(allTags));
-	};
-
-	const getEmbeddings = async (text: string) => {
-		const tokens = await tokenizer(text);
-		const output = await model(tokens);
-
-		// Perform mean pooling on the last hidden states
-		const embeddings = output.last_hidden_state.mean(1);
-		return embeddings.ort_tensor.data;
-	};
-
-	const getTagEmbeddings = async (tags: string[]) => {
-		const embeddings = new Map();
-		for (const tag of tags) {
-			if (!tagEmbeddings.has(tag)) {
-				tagEmbeddings.set(tag, await getEmbeddings(tag));
-			}
-			embeddings.set(tag, tagEmbeddings.get(tag));
-		}
-		return embeddings;
-	};
-
-	const debouncedQueryHandler = async () => {
-		loadingLeaderboard = true;
-
-		if (query.trim() === '') {
-			rankHandler();
-			return;
-		}
-
+	const debouncedLoad = () => {
+		loading = true;
 		clearTimeout(debounceTimer);
-
-		debounceTimer = setTimeout(async () => {
-			const queryEmbedding = await getEmbeddings(query);
-			const similarities = new Map<string, number>();
-
-			for (const feedback of feedbacks) {
-				const feedbackTags = feedback.data.tags || [];
-				const tagEmbeddings = await getTagEmbeddings(feedbackTags);
-				const maxSimilarity = calculateMaxSimilarity(queryEmbedding, tagEmbeddings);
-				similarities.set(feedback.id, maxSimilarity);
-			}
-
-			rankHandler(similarities);
-		}, 1500); // Debounce for 1.5 seconds
+		debounceTimer = setTimeout(() => loadLeaderboard(query), 500);
 	};

-	$: query, debouncedQueryHandler();
+	$: query, debouncedLoad();

-	onMount(async () => {
-		rankHandler();
-	});
+	onMount(() => loadLeaderboard());

 	$: sortedModels = [...rankedModels].sort((a, b) => {
-		let aVal, bVal;
+		const getValue = (m, key) => {
+			if (key === 'name') return m.name ?? m.id ?? '';
+			if (key === 'rating') return m.rating === '-' ? -Infinity : m.rating;
+			if (key === 'won' || key === 'lost') {
+				const v = m.stats[key];
+				return v === '-' ? -Infinity : Number(v);
+			}
+			return 0;
+		};
+		const aVal = getValue(a, orderBy);
+		const bVal = getValue(b, orderBy);
 		if (orderBy === 'name') {
-			aVal = a.name;
-			bVal = b.name;
 			return direction === 'asc' ? aVal.localeCompare(bVal) : bVal.localeCompare(aVal);
-		} else if (orderBy === 'rating') {
-			aVal = a.rating === '-' ? -Infinity : a.rating;
-			bVal = b.rating === '-' ? -Infinity : b.rating;
-			return direction === 'asc' ? aVal - bVal : bVal - aVal;
-		} else if (orderBy === 'won') {
-			aVal = a.stats.won === '-' ? -Infinity : Number(a.stats.won);
-			bVal = b.stats.won === '-' ? -Infinity : Number(b.stats.won);
-			return direction === 'asc' ? aVal - bVal : bVal - aVal;
-		} else if (orderBy === 'lost') {
-			aVal = a.stats.lost === '-' ? -Infinity : Number(a.stats.lost);
-			bVal = b.stats.lost === '-' ? -Infinity : Number(b.stats.lost);
-			return direction === 'asc' ? aVal - bVal : bVal - aVal;
 		}
-		return 0;
+		return direction === 'asc' ? aVal - bVal : bVal - aVal;
 	});
 </script>

-<ModelModal
-	bind:show={showLeaderboardModal}
-	model={selectedModel}
-	{feedbacks}
-	onClose={closeLeaderboardModal}
-/>
+<ModelModal bind:show={showModal} model={selectedModel} onClose={closeModal} />

-<div
-	class="pt-0.5 pb-1 gap-1 flex flex-col md:flex-row justify-between sticky top-0 z-10 bg-white dark:bg-gray-900"
->
-	<div class="flex items-center md:self-center text-xl font-medium px-0.5 gap-2 shrink-0">
-		<div>
-			{$i18n.t('Leaderboard')}
-		</div>
-
-		<div class="text-lg font-medium text-gray-500 dark:text-gray-500">
-			{rankedModels.length}
-		</div>
-	</div>
-
-	<div class=" flex space-x-2">
-		<Tooltip content={$i18n.t('Re-rank models by topic similarity')}>
-			<div class="flex flex-1">
-				<div class=" self-center ml-1 mr-3">
-					<Search className="size-3" />
-				</div>
-				<input
-					class=" w-full text-sm pr-4 py-1 rounded-r-xl outline-hidden bg-transparent"
-					bind:value={query}
-					placeholder={$i18n.t('Search')}
-					on:focus={() => {
-						loadEmbeddingModel();
-					}}
-				/>
-			</div>
-		</Tooltip>
+<div class="pt-0.5 pb-1 gap-1 flex flex-col md:flex-row justify-between sticky top-0 z-10 bg-white dark:bg-gray-900">
+	<div class="flex items-center text-xl font-medium px-0.5 gap-2 shrink-0">
+		{$i18n.t('Leaderboard')}
+		<span class="text-lg text-gray-500">{rankedModels.length}</span>
 	</div>
+	<Tooltip content={$i18n.t('Re-rank models by topic similarity')}>
+		<div class="flex flex-1">
+			<Search className="size-3 ml-1 mr-3 self-center" />
+			<input
+				class="w-full text-sm pr-4 py-1 rounded-r-xl outline-hidden bg-transparent"
+				bind:value={query}
+				placeholder={$i18n.t('Search')}
+			/>
+		</div>
+	</Tooltip>
 </div>

-<div class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded-sm">
-	{#if loadingLeaderboard}
-		<div class=" absolute top-0 bottom-0 left-0 right-0 flex">
-			<div class="m-auto">
-				<Spinner className="size-5" />
-			</div>
+<div class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded-sm min-h-[100px]">
+	{#if loading}
+		<div class="absolute inset-0 flex items-center justify-center z-10 bg-white/50 dark:bg-gray-900/50">
+			<Spinner className="size-5" />
 		</div>
 	{/if}
-	{#if (rankedModels ?? []).length === 0}
-		<div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
-			{$i18n.t('No models found')}
-		</div>
-	{:else}
-		<table
-			class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full {loadingLeaderboard
-				? 'opacity-20'
-				: ''}"
-		>
+
+	{#if !rankedModels.length && !loading}
+		<div class="text-center text-xs text-gray-500 py-1">{$i18n.t('No models found')}</div>
+	{:else if rankedModels.length}
+		<table class="w-full text-sm text-left text-gray-500 dark:text-gray-400 {loading ? 'opacity-20' : ''}">
 			<thead class="text-xs text-gray-800 uppercase bg-transparent dark:text-gray-200">
-				<tr class=" border-b-[1.5px] border-gray-50 dark:border-gray-850/30">
-					<th
-						scope="col"
-						class="px-2.5 py-2 cursor-pointer select-none w-3"
-						on:click={() => setSortKey('rating')}
-					>
-						<div class="flex gap-1.5 items-center">
-							{$i18n.t('RK')}
-							{#if orderBy === 'rating'}
-								<span class="font-normal">
-									{#if direction === 'asc'}
-										<ChevronUp className="size-2" />
-									{:else}
-										<ChevronDown className="size-2" />
-									{/if}
-								</span>
-							{:else}
-								<span class="invisible">
-									<ChevronUp className="size-2" />
-								</span>
-							{/if}
-						</div>
-					</th>
-					<th
-						scope="col"
-						class="px-2.5 py-2 cursor-pointer select-none"
-						on:click={() => setSortKey('name')}
-					>
-						<div class="flex gap-1.5 items-center">
-							{$i18n.t('Model')}
-							{#if orderBy === 'name'}
-								<span class="font-normal">
-									{#if direction === 'asc'}
-										<ChevronUp className="size-2" />
-									{:else}
-										<ChevronDown className="size-2" />
-									{/if}
-								</span>
-							{:else}
-								<span class="invisible">
-									<ChevronUp className="size-2" />
-								</span>
-							{/if}
-						</div>
-					</th>
-					<th
-						scope="col"
-						class="px-2.5 py-2 text-right cursor-pointer select-none w-fit"
-						on:click={() => setSortKey('rating')}
-					>
-						<div class="flex gap-1.5 items-center justify-end">
-							{$i18n.t('Rating')}
-							{#if orderBy === 'rating'}
-								<span class="font-normal">
-									{#if direction === 'asc'}
-										<ChevronUp className="size-2" />
-									{:else}
-										<ChevronDown className="size-2" />
-									{/if}
-								</span>
-							{:else}
-								<span class="invisible">
-									<ChevronUp className="size-2" />
-								</span>
-							{/if}
-						</div>
-					</th>
-					<th
-						scope="col"
-						class="px-2.5 py-2 text-right cursor-pointer select-none w-5"
-						on:click={() => setSortKey('won')}
-					>
-						<div class="flex gap-1.5 items-center justify-end">
-							{$i18n.t('Won')}
-							{#if orderBy === 'won'}
-								<span class="font-normal">
-									{#if direction === 'asc'}
-										<ChevronUp className="size-2" />
-									{:else}
-										<ChevronDown className="size-2" />
-									{/if}
-								</span>
-							{:else}
-								<span class="invisible">
-									<ChevronUp className="size-2" />
-								</span>
-							{/if}
-						</div>
-					</th>
-					<th
-						scope="col"
-						class="px-2.5 py-2 text-right cursor-pointer select-none w-5"
-						on:click={() => setSortKey('lost')}
-					>
-						<div class="flex gap-1.5 items-center justify-end">
-							{$i18n.t('Lost')}
-							{#if orderBy === 'lost'}
-								<span class="font-normal">
-									{#if direction === 'asc'}
-										<ChevronUp className="size-2" />
-									{:else}
-										<ChevronDown className="size-2" />
-									{/if}
-								</span>
-							{:else}
-								<span class="invisible">
-									<ChevronUp className="size-2" />
-								</span>
-							{/if}
-						</div>
-					</th>
+				<tr class="border-b-[1.5px] border-gray-50 dark:border-gray-850/30">
+					{#each [
+						{ key: 'rating', label: 'RK', class: 'w-3' },
+						{ key: 'name', label: 'Model', class: '' },
+						{ key: 'rating', label: 'Rating', class: 'text-right w-fit' },
+						{ key: 'won', label: 'Won', class: 'text-right w-5' },
+						{ key: 'lost', label: 'Lost', class: 'text-right w-5' }
+					] as col}
+						<th
+							scope="col"
+							class="px-2.5 py-2 cursor-pointer select-none {col.class}"
+							on:click={() => toggleSort(col.key)}
+						>
+							<div class="flex gap-1.5 items-center {col.class.includes('right') ? 'justify-end' : ''}">
+								{$i18n.t(col.label)}
+								{#if orderBy === col.key}
+									{#if direction === 'asc'}<ChevronUp className="size-2" />{:else}<ChevronDown className="size-2" />{/if}
+								{:else}
+									<span class="invisible"><ChevronUp className="size-2" /></span>
+								{/if}
+							</div>
+						</th>
+					{/each}
 				</tr>
 			</thead>
-			<tbody class="">
-				{#each sortedModels as model, modelIdx (model.id)}
+			<tbody>
+				{#each sortedModels as model, idx (model.id)}
 					<tr
-						class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs group cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-850/50 transition"
-						on:click={() => openLeaderboardModelModal(model)}
+						class="bg-white dark:bg-gray-900 text-xs group cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-850/50 transition"
+						on:click={() => openModal(model)}
 					>
-						<td class="px-3 py-1.5 text-left font-medium text-gray-900 dark:text-white w-fit">
-							<div class=" line-clamp-1">
-								{model?.rating !== '-' ? modelIdx + 1 : '-'}
-							</div>
+						<td class="px-3 py-1.5 font-medium text-gray-900 dark:text-white">
+							{model.rating !== '-' ? idx + 1 : '-'}
 						</td>
-						<td class="px-3 py-1.5 flex flex-col justify-center">
+						<td class="px-3 py-1.5">
 							<div class="flex items-center gap-2">
-								<div class="shrink-0">
-									<img
-										src={`${WEBUI_API_BASE_URL}/models/model/profile/image?id=${model.id}`}
-										alt={model.name}
-										class="size-5 rounded-full object-cover shrink-0"
-									/>
-								</div>
-
-								<div class="font-medium text-gray-800 dark:text-gray-200 pr-4">
-									{model.name}
-								</div>
+								<img
+									src="{WEBUI_API_BASE_URL}/models/model/profile/image?id={model.id}"
+									alt={model.name}
+									class="size-5 rounded-full object-cover"
+								/>
+								<span class="font-medium text-gray-800 dark:text-gray-200">{model.name}</span>
 							</div>
 						</td>
-						<td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white w-max">
+						<td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white">
 							{model.rating}
 						</td>
-
-						<td class=" px-3 py-1.5 text-right font-medium text-green-500">
-							<div class=" w-10">
-								{#if model.stats.won === '-'}
-									-
-								{:else}
-									<span class="hidden group-hover:inline"
-										>{((model.stats.won / model.stats.count) * 100).toFixed(1)}%</span
-									>
-									<span class=" group-hover:hidden">{model.stats.won}</span>
-								{/if}
-							</div>
+						<td class="px-3 py-1.5 text-right font-medium text-green-500 w-10">
+							{#if model.stats.won === '-'}-{:else}
+								<span class="hidden group-hover:inline">{((Number(model.stats.won) / model.stats.count) * 100).toFixed(1)}%</span>
+								<span class="group-hover:hidden">{model.stats.won}</span>
+							{/if}
 						</td>
-
-						<td class="px-3 py-1.5 text-right font-medium text-red-500">
-							<div class=" w-10">
-								{#if model.stats.lost === '-'}
-									-
-								{:else}
-									<span class="hidden group-hover:inline"
-										>{((model.stats.lost / model.stats.count) * 100).toFixed(1)}%</span
-									>
-									<span class=" group-hover:hidden">{model.stats.lost}</span>
-								{/if}
-							</div>
+						<td class="px-3 py-1.5 text-right font-medium text-red-500 w-10">
+							{#if model.stats.lost === '-'}-{:else}
+								<span class="hidden group-hover:inline">{((Number(model.stats.lost) / model.stats.count) * 100).toFixed(1)}%</span>
+								<span class="group-hover:hidden">{model.stats.lost}</span>
+							{/if}
 						</td>
 					</tr>
 				{/each}
@@ -562,15 +201,11 @@
 	{/if}
 </div>

-<div class=" text-gray-500 text-xs mt-1.5 w-full flex justify-end">
-	<div class=" text-right">
+<div class="text-gray-500 text-xs mt-1.5 w-full flex justify-end">
+	<div class="text-right">
 		<div class="line-clamp-1">
-			ⓘ {$i18n.t(
-				'The evaluation leaderboard is based on the Elo rating system and is updated in real-time.'
-			)}
+			ⓘ {$i18n.t('The evaluation leaderboard is based on the Elo rating system and is updated in real-time.')}
 		</div>
-		{$i18n.t(
-			'The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.'
-		)}
+		{$i18n.t('The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.')}
 	</div>
 </div>
--- a/src/lib/components/admin/Evaluations/LeaderboardModal.svelte
+++ b/src/lib/components/admin/Evaluations/LeaderboardModal.svelte
@@ -3,7 +3,6 @@
 	import { getContext } from 'svelte';
 	export let show = false;
 	export let model = null;
-	export let feedbacks = [];
 	export let onClose: () => void = () => {};
 	const i18n = getContext('i18n');
 	import XMark from '$lib/components/icons/XMark.svelte';
@@ -13,22 +12,8 @@
 		onClose();
 	};

-	$: topTags = model ? getTopTagsForModel(model.id, feedbacks) : [];
-
-	const getTopTagsForModel = (modelId: string, feedbacks: any[], topN = 5) => {
-		const tagCounts = new Map();
-		feedbacks
-			.filter((fb) => fb.data.model_id === modelId)
-			.forEach((fb) => {
-				(fb.data.tags || []).forEach((tag) => {
-					tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1);
-				});
-			});
-		return Array.from(tagCounts.entries())
-			.sort((a, b) => b[1] - a[1])
-			.slice(0, topN)
-			.map(([tag, count]) => ({ tag, count }));
-	};
+	// Use top_tags from backend response (already computed)
+	$: topTags = model?.top_tags ?? [];
 </script>

 <Modal size="sm" bind:show>