refac/fix: feedback leaderboard
This commit is contained in:
@@ -68,6 +68,13 @@ class FeedbackIdResponse(BaseModel):
|
||||
updated_at: int
|
||||
|
||||
|
||||
class LeaderboardFeedbackData(BaseModel):
|
||||
"""Minimal feedback data for leaderboard computation (excludes snapshot/meta)."""
|
||||
|
||||
id: str
|
||||
data: Optional[dict] = None
|
||||
|
||||
|
||||
class RatingData(BaseModel):
|
||||
rating: Optional[str | int] = None
|
||||
model_id: Optional[str] = None
|
||||
@@ -271,6 +278,16 @@ class FeedbackTable:
|
||||
.all()
|
||||
]
|
||||
|
||||
def get_feedbacks_for_leaderboard(
|
||||
self, db: Optional[Session] = None
|
||||
) -> list[LeaderboardFeedbackData]:
|
||||
"""Fetch only id and data for leaderboard computation (excludes snapshot/meta)."""
|
||||
with get_db_context(db) as db:
|
||||
return [
|
||||
LeaderboardFeedbackData(id=row.id, data=row.data)
|
||||
for row in db.query(Feedback.id, Feedback.data).all()
|
||||
]
|
||||
|
||||
def get_feedbacks_by_type(
|
||||
self, type: str, db: Optional[Session] = None
|
||||
) -> list[FeedbackModel]:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
from typing import Optional
|
||||
import logging
|
||||
from fastapi import APIRouter, Depends, HTTPException, status, Request
|
||||
from fastapi.concurrency import run_in_threadpool
|
||||
from pydantic import BaseModel
|
||||
|
||||
from open_webui.models.users import Users, UserModel
|
||||
@@ -10,6 +12,7 @@ from open_webui.models.feedbacks import (
|
||||
FeedbackForm,
|
||||
FeedbackUserResponse,
|
||||
FeedbackListResponse,
|
||||
LeaderboardFeedbackData,
|
||||
Feedbacks,
|
||||
)
|
||||
|
||||
@@ -18,9 +21,239 @@ from open_webui.utils.auth import get_admin_user, get_verified_user
|
||||
from open_webui.internal.db import get_session
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
router = APIRouter()
|
||||
|
||||
|
||||
# Leaderboard Elo Rating Computation
|
||||
#
|
||||
# How it works:
|
||||
# 1. Each model starts with a rating of 1000
|
||||
# 2. When a user picks a winner between two models, ratings are adjusted:
|
||||
# - Winner gains points, loser loses points
|
||||
# - The amount depends on expected outcome (upset = bigger change)
|
||||
# 3. The Elo formula: new_rating = old_rating + K * (actual - expected)
|
||||
# - K=32 controls how much ratings can change per match
|
||||
# - expected = probability of winning based on current ratings
|
||||
#
|
||||
# Query-based re-ranking (optional):
|
||||
# When a user searches for a topic (e.g., "coding"), we want to show
|
||||
# which models perform best FOR THAT TOPIC. We do this by:
|
||||
# 1. Computing semantic similarity between the query and each feedback's tags
|
||||
# 2. Using that similarity as a weight in the Elo calculation
|
||||
# 3. Feedbacks about "coding" contribute more to the final ranking
|
||||
# 4. Feedbacks about unrelated topics (e.g., "cooking") contribute less
|
||||
# This gives topic-specific leaderboards without needing separate data.
|
||||
|
||||
EMBEDDING_MODEL_NAME = "TaylorAI/bge-micro-v2"
|
||||
_embedding_model = None
|
||||
|
||||
|
||||
def _get_embedding_model():
|
||||
global _embedding_model
|
||||
if _embedding_model is None:
|
||||
try:
|
||||
from sentence_transformers import SentenceTransformer
|
||||
|
||||
_embedding_model = SentenceTransformer(EMBEDDING_MODEL_NAME)
|
||||
except Exception as e:
|
||||
log.error(f"Embedding model load failed: {e}")
|
||||
return _embedding_model
|
||||
|
||||
|
||||
def _calculate_elo(
|
||||
feedbacks: list[LeaderboardFeedbackData], similarities: dict = None
|
||||
) -> dict:
|
||||
"""
|
||||
Calculate Elo ratings for models based on user feedback.
|
||||
|
||||
Each feedback represents a comparison where a user rated one model
|
||||
against its opponents (sibling_model_ids). Rating=1 means the model won,
|
||||
rating=-1 means it lost.
|
||||
|
||||
The Elo system adjusts ratings based on:
|
||||
- Current rating difference (upsets cause bigger swings)
|
||||
- Optional similarity weights (for query-based filtering)
|
||||
|
||||
Returns: {model_id: {"rating": float, "won": int, "lost": int}}
|
||||
"""
|
||||
K_FACTOR = 32 # Standard Elo K-factor for rating volatility
|
||||
model_stats = {}
|
||||
|
||||
def get_or_create_stats(model_id):
|
||||
if model_id not in model_stats:
|
||||
model_stats[model_id] = {"rating": 1000.0, "won": 0, "lost": 0}
|
||||
return model_stats[model_id]
|
||||
|
||||
for feedback in feedbacks:
|
||||
data = feedback.data or {}
|
||||
winner_id = data.get("model_id")
|
||||
rating_value = str(data.get("rating", ""))
|
||||
if not winner_id or rating_value not in ("1", "-1"):
|
||||
continue
|
||||
|
||||
won = rating_value == "1"
|
||||
weight = similarities.get(feedback.id, 1.0) if similarities else 1.0
|
||||
|
||||
for opponent_id in data.get("sibling_model_ids") or []:
|
||||
winner = get_or_create_stats(winner_id)
|
||||
opponent = get_or_create_stats(opponent_id)
|
||||
expected = 1 / (1 + 10 ** ((opponent["rating"] - winner["rating"]) / 400))
|
||||
|
||||
winner["rating"] += K_FACTOR * ((1 if won else 0) - expected) * weight
|
||||
opponent["rating"] += (
|
||||
K_FACTOR * ((0 if won else 1) - (1 - expected)) * weight
|
||||
)
|
||||
|
||||
if won:
|
||||
winner["won"] += 1
|
||||
opponent["lost"] += 1
|
||||
else:
|
||||
winner["lost"] += 1
|
||||
opponent["won"] += 1
|
||||
|
||||
return model_stats
|
||||
|
||||
|
||||
def _get_top_tags(feedbacks: list[LeaderboardFeedbackData], limit: int = 5) -> dict:
|
||||
"""
|
||||
Count tag occurrences per model and return the most frequent ones.
|
||||
|
||||
Each feedback can have tags describing the conversation topic.
|
||||
This aggregates those tags per model to show what topics each model
|
||||
is commonly used for.
|
||||
|
||||
Returns: {model_id: [{"tag": str, "count": int}, ...]}
|
||||
"""
|
||||
from collections import defaultdict
|
||||
|
||||
tag_counts = defaultdict(lambda: defaultdict(int))
|
||||
|
||||
for feedback in feedbacks:
|
||||
data = feedback.data or {}
|
||||
model_id = data.get("model_id")
|
||||
if model_id:
|
||||
for tag in data.get("tags", []):
|
||||
tag_counts[model_id][tag] += 1
|
||||
|
||||
return {
|
||||
model_id: [
|
||||
{"tag": tag, "count": count}
|
||||
for tag, count in sorted(tags.items(), key=lambda x: -x[1])[:limit]
|
||||
]
|
||||
for model_id, tags in tag_counts.items()
|
||||
}
|
||||
|
||||
|
||||
def _compute_similarities(feedbacks: list[LeaderboardFeedbackData], query: str) -> dict:
|
||||
"""
|
||||
Compute how relevant each feedback is to a search query.
|
||||
|
||||
Uses embeddings to find semantic similarity between the query and
|
||||
each feedback's tags. Higher similarity means the feedback is more
|
||||
relevant to what the user searched for.
|
||||
|
||||
This is used to weight Elo calculations - feedbacks matching the
|
||||
query have more influence on the final rankings.
|
||||
|
||||
Returns: {feedback_id: similarity_score (0-1)}
|
||||
"""
|
||||
import numpy as np
|
||||
|
||||
embedding_model = _get_embedding_model()
|
||||
if not embedding_model:
|
||||
return {}
|
||||
|
||||
all_tags = list(
|
||||
{
|
||||
tag
|
||||
for feedback in feedbacks
|
||||
if feedback.data
|
||||
for tag in feedback.data.get("tags", [])
|
||||
}
|
||||
)
|
||||
if not all_tags:
|
||||
return {}
|
||||
|
||||
try:
|
||||
tag_embeddings = embedding_model.encode(all_tags)
|
||||
query_embedding = embedding_model.encode([query])[0]
|
||||
except Exception as e:
|
||||
log.error(f"Embedding error: {e}")
|
||||
return {}
|
||||
|
||||
# Vectorized cosine similarity
|
||||
tag_norms = np.linalg.norm(tag_embeddings, axis=1)
|
||||
query_norm = np.linalg.norm(query_embedding)
|
||||
similarities = np.dot(tag_embeddings, query_embedding) / (
|
||||
tag_norms * query_norm + 1e-9
|
||||
)
|
||||
tag_similarity_map = dict(zip(all_tags, similarities.tolist()))
|
||||
|
||||
return {
|
||||
feedback.id: max(
|
||||
(
|
||||
tag_similarity_map.get(tag, 0)
|
||||
for tag in (feedback.data or {}).get("tags", [])
|
||||
),
|
||||
default=0,
|
||||
)
|
||||
for feedback in feedbacks
|
||||
}
|
||||
|
||||
|
||||
class LeaderboardEntry(BaseModel):
|
||||
model_id: str
|
||||
rating: int
|
||||
won: int
|
||||
lost: int
|
||||
count: int
|
||||
top_tags: list[dict]
|
||||
|
||||
|
||||
class LeaderboardResponse(BaseModel):
|
||||
entries: list[LeaderboardEntry]
|
||||
|
||||
|
||||
@router.get("/leaderboard", response_model=LeaderboardResponse)
|
||||
async def get_leaderboard(
|
||||
query: Optional[str] = None,
|
||||
user=Depends(get_admin_user),
|
||||
db: Session = Depends(get_session),
|
||||
):
|
||||
"""Get model leaderboard with Elo ratings. Query filters by tag similarity."""
|
||||
feedbacks = Feedbacks.get_feedbacks_for_leaderboard(db=db)
|
||||
|
||||
similarities = None
|
||||
if query and query.strip():
|
||||
similarities = await run_in_threadpool(
|
||||
_compute_similarities, feedbacks, query.strip()
|
||||
)
|
||||
|
||||
elo_stats = _calculate_elo(feedbacks, similarities)
|
||||
tags_by_model = _get_top_tags(feedbacks)
|
||||
|
||||
entries = sorted(
|
||||
[
|
||||
LeaderboardEntry(
|
||||
model_id=mid,
|
||||
rating=round(s["rating"]),
|
||||
won=s["won"],
|
||||
lost=s["lost"],
|
||||
count=s["won"] + s["lost"],
|
||||
top_tags=tags_by_model.get(mid, []),
|
||||
)
|
||||
for mid, s in elo_stats.items()
|
||||
],
|
||||
key=lambda e: e.rating,
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
return LeaderboardResponse(entries=entries)
|
||||
|
||||
|
||||
############################
|
||||
# GetConfig
|
||||
############################
|
||||
|
||||
@@ -93,6 +93,40 @@ export const getAllFeedbacks = async (token: string = '') => {
|
||||
return res;
|
||||
};
|
||||
|
||||
export const getLeaderboard = async (token: string = '', query: string = '') => {
|
||||
let error = null;
|
||||
|
||||
const searchParams = new URLSearchParams();
|
||||
if (query) searchParams.append('query', query);
|
||||
|
||||
const res = await fetch(
|
||||
`${WEBUI_API_BASE_URL}/evaluations/leaderboard?${searchParams.toString()}`,
|
||||
{
|
||||
method: 'GET',
|
||||
headers: {
|
||||
Accept: 'application/json',
|
||||
'Content-Type': 'application/json',
|
||||
authorization: `Bearer ${token}`
|
||||
}
|
||||
}
|
||||
)
|
||||
.then(async (res) => {
|
||||
if (!res.ok) throw await res.json();
|
||||
return res.json();
|
||||
})
|
||||
.catch((err) => {
|
||||
error = err.detail;
|
||||
console.error(err);
|
||||
return null;
|
||||
});
|
||||
|
||||
if (error) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
return res;
|
||||
};
|
||||
|
||||
export const getFeedbackItems = async (token: string = '', orderBy, direction, page) => {
|
||||
let error = null;
|
||||
|
||||
|
||||
@@ -6,8 +6,7 @@
|
||||
import Leaderboard from './Evaluations/Leaderboard.svelte';
|
||||
import Feedbacks from './Evaluations/Feedbacks.svelte';
|
||||
|
||||
import { getAllFeedbacks } from '$lib/apis/evaluations';
|
||||
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
|
||||
let selectedTab;
|
||||
@@ -30,12 +29,8 @@
|
||||
};
|
||||
|
||||
let loaded = false;
|
||||
let feedbacks = [];
|
||||
|
||||
onMount(async () => {
|
||||
// TODO: feedbacks elo rating calculation should be done in the backend; remove below line later
|
||||
feedbacks = await getAllFeedbacks(localStorage.token);
|
||||
|
||||
loaded = true;
|
||||
|
||||
const containerElement = document.getElementById('users-tabs-container');
|
||||
@@ -117,7 +112,7 @@
|
||||
|
||||
<div class="flex-1 mt-1 lg:mt-0 px-[16px] lg:pr-[16px] lg:pl-0 overflow-y-scroll">
|
||||
{#if selectedTab === 'leaderboard'}
|
||||
<Leaderboard {feedbacks} />
|
||||
<Leaderboard />
|
||||
{:else if selectedTab === 'feedback'}
|
||||
<Feedbacks />
|
||||
{/if}
|
||||
|
||||
@@ -1,559 +1,198 @@
|
||||
<script lang="ts">
|
||||
import { onMount, getContext } from 'svelte';
|
||||
import { models } from '$lib/stores';
|
||||
|
||||
import { getLeaderboard } from '$lib/apis/evaluations';
|
||||
import ModelModal from './LeaderboardModal.svelte';
|
||||
|
||||
import Spinner from '$lib/components/common/Spinner.svelte';
|
||||
import Tooltip from '$lib/components/common/Tooltip.svelte';
|
||||
import Search from '$lib/components/icons/Search.svelte';
|
||||
|
||||
import ChevronUp from '$lib/components/icons/ChevronUp.svelte';
|
||||
import ChevronDown from '$lib/components/icons/ChevronDown.svelte';
|
||||
import { WEBUI_API_BASE_URL, WEBUI_BASE_URL } from '$lib/constants';
|
||||
import { WEBUI_API_BASE_URL } from '$lib/constants';
|
||||
|
||||
const i18n = getContext('i18n');
|
||||
|
||||
const EMBEDDING_MODEL = 'TaylorAI/bge-micro-v2';
|
||||
|
||||
let tokenizer = null;
|
||||
let model = null;
|
||||
|
||||
export let feedbacks = [];
|
||||
|
||||
let rankedModels = [];
|
||||
|
||||
let query = '';
|
||||
let loading = true;
|
||||
let debounceTimer: ReturnType<typeof setTimeout>;
|
||||
let orderBy = 'rating';
|
||||
let direction: 'asc' | 'desc' = 'desc';
|
||||
|
||||
let tagEmbeddings = new Map();
|
||||
let loadingLeaderboard = true;
|
||||
let debounceTimer;
|
||||
let showModal = false;
|
||||
let selectedModel = null;
|
||||
|
||||
let orderBy: string = 'rating'; // default sort column
|
||||
let direction: 'asc' | 'desc' = 'desc'; // default sort order
|
||||
|
||||
type Feedback = {
|
||||
id: string;
|
||||
data: {
|
||||
rating: number;
|
||||
model_id: string;
|
||||
sibling_model_ids: string[] | null;
|
||||
reason: string;
|
||||
comment: string;
|
||||
tags: string[];
|
||||
};
|
||||
user: {
|
||||
name: string;
|
||||
profile_image_url: string;
|
||||
};
|
||||
updated_at: number;
|
||||
};
|
||||
|
||||
type ModelStats = {
|
||||
rating: number;
|
||||
won: number;
|
||||
lost: number;
|
||||
};
|
||||
|
||||
function setSortKey(key) {
|
||||
const toggleSort = (key: string) => {
|
||||
if (orderBy === key) {
|
||||
direction = direction === 'asc' ? 'desc' : 'asc';
|
||||
} else {
|
||||
orderBy = key;
|
||||
direction = key === 'name' ? 'asc' : 'desc';
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
//////////////////////
|
||||
//
|
||||
// Aggregate Level Modal
|
||||
//
|
||||
//////////////////////
|
||||
|
||||
let showLeaderboardModal = false;
|
||||
let selectedModel = null;
|
||||
|
||||
const openLeaderboardModelModal = (model) => {
|
||||
showLeaderboardModal = true;
|
||||
const openModal = (model) => {
|
||||
selectedModel = model;
|
||||
showModal = true;
|
||||
};
|
||||
|
||||
const closeLeaderboardModal = () => {
|
||||
showLeaderboardModal = false;
|
||||
const closeModal = () => {
|
||||
selectedModel = null;
|
||||
showModal = false;
|
||||
};
|
||||
|
||||
//////////////////////
|
||||
//
|
||||
// Rank models by Elo rating
|
||||
//
|
||||
//////////////////////
|
||||
const loadLeaderboard = async (searchQuery = '') => {
|
||||
loading = true;
|
||||
try {
|
||||
const result = await getLeaderboard(localStorage.token, searchQuery);
|
||||
const statsMap = new Map(
|
||||
(result?.entries ?? []).map((e) => [e.model_id, e])
|
||||
);
|
||||
|
||||
const rankHandler = async (similarities: Map<string, number> = new Map()) => {
|
||||
const modelStats = calculateModelStats(feedbacks, similarities);
|
||||
|
||||
rankedModels = $models
|
||||
.filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
|
||||
.map((model) => {
|
||||
const stats = modelStats.get(model.id);
|
||||
return {
|
||||
...model,
|
||||
rating: stats ? Math.round(stats.rating) : '-',
|
||||
stats: {
|
||||
count: stats ? stats.won + stats.lost : 0,
|
||||
won: stats ? stats.won.toString() : '-',
|
||||
lost: stats ? stats.lost.toString() : '-'
|
||||
}
|
||||
};
|
||||
})
|
||||
.sort((a, b) => {
|
||||
if (a.rating === '-' && b.rating !== '-') return 1;
|
||||
if (b.rating === '-' && a.rating !== '-') return -1;
|
||||
if (a.rating !== '-' && b.rating !== '-') return b.rating - a.rating;
|
||||
return (a?.name ?? a?.id ?? '').localeCompare(b?.name ?? b?.id ?? '');
|
||||
});
|
||||
|
||||
loadingLeaderboard = false;
|
||||
rankedModels = $models
|
||||
.filter((m) => m?.owned_by !== 'arena' && !m?.info?.meta?.hidden)
|
||||
.map((model) => {
|
||||
const s = statsMap.get(model.id);
|
||||
return {
|
||||
...model,
|
||||
rating: s?.rating ?? '-',
|
||||
stats: {
|
||||
count: s ? s.won + s.lost : 0,
|
||||
won: s?.won?.toString() ?? '-',
|
||||
lost: s?.lost?.toString() ?? '-'
|
||||
},
|
||||
top_tags: s?.top_tags ?? []
|
||||
};
|
||||
})
|
||||
.sort((a, b) => {
|
||||
if (a.rating === '-') return 1;
|
||||
if (b.rating === '-') return -1;
|
||||
return b.rating - a.rating;
|
||||
});
|
||||
} catch (err) {
|
||||
console.error('Leaderboard load failed:', err);
|
||||
}
|
||||
loading = false;
|
||||
};
|
||||
|
||||
function calculateModelStats(
|
||||
feedbacks: Feedback[],
|
||||
similarities: Map<string, number>
|
||||
): Map<string, ModelStats> {
|
||||
const stats = new Map<string, ModelStats>();
|
||||
const K = 32;
|
||||
|
||||
function getOrDefaultStats(modelId: string): ModelStats {
|
||||
return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 };
|
||||
}
|
||||
|
||||
function updateStats(modelId: string, ratingChange: number, outcome: number) {
|
||||
const currentStats = getOrDefaultStats(modelId);
|
||||
currentStats.rating += ratingChange;
|
||||
if (outcome === 1) currentStats.won++;
|
||||
else if (outcome === 0) currentStats.lost++;
|
||||
stats.set(modelId, currentStats);
|
||||
}
|
||||
|
||||
function calculateEloChange(
|
||||
ratingA: number,
|
||||
ratingB: number,
|
||||
outcome: number,
|
||||
similarity: number
|
||||
): number {
|
||||
const expectedScore = 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400));
|
||||
return K * (outcome - expectedScore) * similarity;
|
||||
}
|
||||
|
||||
feedbacks.forEach((feedback) => {
|
||||
if (!feedback?.data?.model_id || !feedback?.data?.rating) return;
|
||||
|
||||
const modelA = feedback.data.model_id;
|
||||
const statsA = getOrDefaultStats(modelA);
|
||||
let outcome: number;
|
||||
|
||||
switch (feedback.data.rating.toString()) {
|
||||
case '1':
|
||||
outcome = 1;
|
||||
break;
|
||||
case '-1':
|
||||
outcome = 0;
|
||||
break;
|
||||
default:
|
||||
return; // Skip invalid ratings
|
||||
}
|
||||
|
||||
// If the query is empty, set similarity to 1, else get the similarity from the map
|
||||
const similarity = query !== '' ? similarities.get(feedback.id) || 0 : 1;
|
||||
const opponents = feedback.data.sibling_model_ids || [];
|
||||
|
||||
opponents.forEach((modelB) => {
|
||||
const statsB = getOrDefaultStats(modelB);
|
||||
const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity);
|
||||
const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity);
|
||||
|
||||
updateStats(modelA, changeA, outcome);
|
||||
updateStats(modelB, changeB, 1 - outcome);
|
||||
});
|
||||
});
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
//////////////////////
|
||||
//
|
||||
// Calculate cosine similarity
|
||||
//
|
||||
//////////////////////
|
||||
|
||||
const cosineSimilarity = (vecA, vecB) => {
|
||||
// Ensure the lengths of the vectors are the same
|
||||
if (vecA.length !== vecB.length) {
|
||||
throw new Error('Vectors must be the same length');
|
||||
}
|
||||
|
||||
// Calculate the dot product
|
||||
let dotProduct = 0;
|
||||
let normA = 0;
|
||||
let normB = 0;
|
||||
|
||||
for (let i = 0; i < vecA.length; i++) {
|
||||
dotProduct += vecA[i] * vecB[i];
|
||||
normA += vecA[i] ** 2;
|
||||
normB += vecB[i] ** 2;
|
||||
}
|
||||
|
||||
// Calculate the magnitudes
|
||||
normA = Math.sqrt(normA);
|
||||
normB = Math.sqrt(normB);
|
||||
|
||||
// Avoid division by zero
|
||||
if (normA === 0 || normB === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Return the cosine similarity
|
||||
return dotProduct / (normA * normB);
|
||||
};
|
||||
|
||||
const calculateMaxSimilarity = (queryEmbedding, tagEmbeddings: Map<string, number[]>) => {
|
||||
let maxSimilarity = 0;
|
||||
for (const tagEmbedding of tagEmbeddings.values()) {
|
||||
const similarity = cosineSimilarity(queryEmbedding, tagEmbedding);
|
||||
maxSimilarity = Math.max(maxSimilarity, similarity);
|
||||
}
|
||||
return maxSimilarity;
|
||||
};
|
||||
|
||||
//////////////////////
|
||||
//
|
||||
// Embedding functions
|
||||
//
|
||||
//////////////////////
|
||||
|
||||
const loadEmbeddingModel = async () => {
|
||||
const { env, AutoModel, AutoTokenizer } = await import('@huggingface/transformers');
|
||||
if (env.backends.onnx.wasm) {
|
||||
env.backends.onnx.wasm.wasmPaths = '/wasm/';
|
||||
}
|
||||
|
||||
// Check if the tokenizer and model are already loaded and stored in the window object
|
||||
if (!window.tokenizer) {
|
||||
window.tokenizer = await AutoTokenizer.from_pretrained(EMBEDDING_MODEL);
|
||||
}
|
||||
|
||||
if (!window.model) {
|
||||
window.model = await AutoModel.from_pretrained(EMBEDDING_MODEL);
|
||||
}
|
||||
|
||||
// Use the tokenizer and model from the window object
|
||||
tokenizer = window.tokenizer;
|
||||
model = window.model;
|
||||
|
||||
// Pre-compute embeddings for all unique tags
|
||||
const allTags = new Set(feedbacks.flatMap((feedback) => feedback.data.tags || []));
|
||||
await getTagEmbeddings(Array.from(allTags));
|
||||
};
|
||||
|
||||
const getEmbeddings = async (text: string) => {
|
||||
const tokens = await tokenizer(text);
|
||||
const output = await model(tokens);
|
||||
|
||||
// Perform mean pooling on the last hidden states
|
||||
const embeddings = output.last_hidden_state.mean(1);
|
||||
return embeddings.ort_tensor.data;
|
||||
};
|
||||
|
||||
const getTagEmbeddings = async (tags: string[]) => {
|
||||
const embeddings = new Map();
|
||||
for (const tag of tags) {
|
||||
if (!tagEmbeddings.has(tag)) {
|
||||
tagEmbeddings.set(tag, await getEmbeddings(tag));
|
||||
}
|
||||
embeddings.set(tag, tagEmbeddings.get(tag));
|
||||
}
|
||||
return embeddings;
|
||||
};
|
||||
|
||||
const debouncedQueryHandler = async () => {
|
||||
loadingLeaderboard = true;
|
||||
|
||||
if (query.trim() === '') {
|
||||
rankHandler();
|
||||
return;
|
||||
}
|
||||
|
||||
const debouncedLoad = () => {
|
||||
loading = true;
|
||||
clearTimeout(debounceTimer);
|
||||
|
||||
debounceTimer = setTimeout(async () => {
|
||||
const queryEmbedding = await getEmbeddings(query);
|
||||
const similarities = new Map<string, number>();
|
||||
|
||||
for (const feedback of feedbacks) {
|
||||
const feedbackTags = feedback.data.tags || [];
|
||||
const tagEmbeddings = await getTagEmbeddings(feedbackTags);
|
||||
const maxSimilarity = calculateMaxSimilarity(queryEmbedding, tagEmbeddings);
|
||||
similarities.set(feedback.id, maxSimilarity);
|
||||
}
|
||||
|
||||
rankHandler(similarities);
|
||||
}, 1500); // Debounce for 1.5 seconds
|
||||
debounceTimer = setTimeout(() => loadLeaderboard(query), 500);
|
||||
};
|
||||
|
||||
$: query, debouncedQueryHandler();
|
||||
$: query, debouncedLoad();
|
||||
|
||||
onMount(async () => {
|
||||
rankHandler();
|
||||
});
|
||||
onMount(() => loadLeaderboard());
|
||||
|
||||
$: sortedModels = [...rankedModels].sort((a, b) => {
|
||||
let aVal, bVal;
|
||||
const getValue = (m, key) => {
|
||||
if (key === 'name') return m.name ?? m.id ?? '';
|
||||
if (key === 'rating') return m.rating === '-' ? -Infinity : m.rating;
|
||||
if (key === 'won' || key === 'lost') {
|
||||
const v = m.stats[key];
|
||||
return v === '-' ? -Infinity : Number(v);
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
const aVal = getValue(a, orderBy);
|
||||
const bVal = getValue(b, orderBy);
|
||||
if (orderBy === 'name') {
|
||||
aVal = a.name;
|
||||
bVal = b.name;
|
||||
return direction === 'asc' ? aVal.localeCompare(bVal) : bVal.localeCompare(aVal);
|
||||
} else if (orderBy === 'rating') {
|
||||
aVal = a.rating === '-' ? -Infinity : a.rating;
|
||||
bVal = b.rating === '-' ? -Infinity : b.rating;
|
||||
return direction === 'asc' ? aVal - bVal : bVal - aVal;
|
||||
} else if (orderBy === 'won') {
|
||||
aVal = a.stats.won === '-' ? -Infinity : Number(a.stats.won);
|
||||
bVal = b.stats.won === '-' ? -Infinity : Number(b.stats.won);
|
||||
return direction === 'asc' ? aVal - bVal : bVal - aVal;
|
||||
} else if (orderBy === 'lost') {
|
||||
aVal = a.stats.lost === '-' ? -Infinity : Number(a.stats.lost);
|
||||
bVal = b.stats.lost === '-' ? -Infinity : Number(b.stats.lost);
|
||||
return direction === 'asc' ? aVal - bVal : bVal - aVal;
|
||||
}
|
||||
return 0;
|
||||
return direction === 'asc' ? aVal - bVal : bVal - aVal;
|
||||
});
|
||||
</script>
|
||||
|
||||
<ModelModal
|
||||
bind:show={showLeaderboardModal}
|
||||
model={selectedModel}
|
||||
{feedbacks}
|
||||
onClose={closeLeaderboardModal}
|
||||
/>
|
||||
<ModelModal bind:show={showModal} model={selectedModel} onClose={closeModal} />
|
||||
|
||||
<div
|
||||
class="pt-0.5 pb-1 gap-1 flex flex-col md:flex-row justify-between sticky top-0 z-10 bg-white dark:bg-gray-900"
|
||||
>
|
||||
<div class="flex items-center md:self-center text-xl font-medium px-0.5 gap-2 shrink-0">
|
||||
<div>
|
||||
{$i18n.t('Leaderboard')}
|
||||
</div>
|
||||
|
||||
<div class="text-lg font-medium text-gray-500 dark:text-gray-500">
|
||||
{rankedModels.length}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class=" flex space-x-2">
|
||||
<Tooltip content={$i18n.t('Re-rank models by topic similarity')}>
|
||||
<div class="flex flex-1">
|
||||
<div class=" self-center ml-1 mr-3">
|
||||
<Search className="size-3" />
|
||||
</div>
|
||||
<input
|
||||
class=" w-full text-sm pr-4 py-1 rounded-r-xl outline-hidden bg-transparent"
|
||||
bind:value={query}
|
||||
placeholder={$i18n.t('Search')}
|
||||
on:focus={() => {
|
||||
loadEmbeddingModel();
|
||||
}}
|
||||
/>
|
||||
</div>
|
||||
</Tooltip>
|
||||
<div class="pt-0.5 pb-1 gap-1 flex flex-col md:flex-row justify-between sticky top-0 z-10 bg-white dark:bg-gray-900">
|
||||
<div class="flex items-center text-xl font-medium px-0.5 gap-2 shrink-0">
|
||||
{$i18n.t('Leaderboard')}
|
||||
<span class="text-lg text-gray-500">{rankedModels.length}</span>
|
||||
</div>
|
||||
<Tooltip content={$i18n.t('Re-rank models by topic similarity')}>
|
||||
<div class="flex flex-1">
|
||||
<Search className="size-3 ml-1 mr-3 self-center" />
|
||||
<input
|
||||
class="w-full text-sm pr-4 py-1 rounded-r-xl outline-hidden bg-transparent"
|
||||
bind:value={query}
|
||||
placeholder={$i18n.t('Search')}
|
||||
/>
|
||||
</div>
|
||||
</Tooltip>
|
||||
</div>
|
||||
|
||||
<div class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded-sm">
|
||||
{#if loadingLeaderboard}
|
||||
<div class=" absolute top-0 bottom-0 left-0 right-0 flex">
|
||||
<div class="m-auto">
|
||||
<Spinner className="size-5" />
|
||||
</div>
|
||||
<div class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded-sm min-h-[100px]">
|
||||
{#if loading}
|
||||
<div class="absolute inset-0 flex items-center justify-center z-10 bg-white/50 dark:bg-gray-900/50">
|
||||
<Spinner className="size-5" />
|
||||
</div>
|
||||
{/if}
|
||||
{#if (rankedModels ?? []).length === 0}
|
||||
<div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
|
||||
{$i18n.t('No models found')}
|
||||
</div>
|
||||
{:else}
|
||||
<table
|
||||
class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full {loadingLeaderboard
|
||||
? 'opacity-20'
|
||||
: ''}"
|
||||
>
|
||||
|
||||
{#if !rankedModels.length && !loading}
|
||||
<div class="text-center text-xs text-gray-500 py-1">{$i18n.t('No models found')}</div>
|
||||
{:else if rankedModels.length}
|
||||
<table class="w-full text-sm text-left text-gray-500 dark:text-gray-400 {loading ? 'opacity-20' : ''}">
|
||||
<thead class="text-xs text-gray-800 uppercase bg-transparent dark:text-gray-200">
|
||||
<tr class=" border-b-[1.5px] border-gray-50 dark:border-gray-850/30">
|
||||
<th
|
||||
scope="col"
|
||||
class="px-2.5 py-2 cursor-pointer select-none w-3"
|
||||
on:click={() => setSortKey('rating')}
|
||||
>
|
||||
<div class="flex gap-1.5 items-center">
|
||||
{$i18n.t('RK')}
|
||||
{#if orderBy === 'rating'}
|
||||
<span class="font-normal">
|
||||
{#if direction === 'asc'}
|
||||
<ChevronUp className="size-2" />
|
||||
{:else}
|
||||
<ChevronDown className="size-2" />
|
||||
{/if}
|
||||
</span>
|
||||
{:else}
|
||||
<span class="invisible">
|
||||
<ChevronUp className="size-2" />
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</th>
|
||||
<th
|
||||
scope="col"
|
||||
class="px-2.5 py-2 cursor-pointer select-none"
|
||||
on:click={() => setSortKey('name')}
|
||||
>
|
||||
<div class="flex gap-1.5 items-center">
|
||||
{$i18n.t('Model')}
|
||||
{#if orderBy === 'name'}
|
||||
<span class="font-normal">
|
||||
{#if direction === 'asc'}
|
||||
<ChevronUp className="size-2" />
|
||||
{:else}
|
||||
<ChevronDown className="size-2" />
|
||||
{/if}
|
||||
</span>
|
||||
{:else}
|
||||
<span class="invisible">
|
||||
<ChevronUp className="size-2" />
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</th>
|
||||
<th
|
||||
scope="col"
|
||||
class="px-2.5 py-2 text-right cursor-pointer select-none w-fit"
|
||||
on:click={() => setSortKey('rating')}
|
||||
>
|
||||
<div class="flex gap-1.5 items-center justify-end">
|
||||
{$i18n.t('Rating')}
|
||||
{#if orderBy === 'rating'}
|
||||
<span class="font-normal">
|
||||
{#if direction === 'asc'}
|
||||
<ChevronUp className="size-2" />
|
||||
{:else}
|
||||
<ChevronDown className="size-2" />
|
||||
{/if}
|
||||
</span>
|
||||
{:else}
|
||||
<span class="invisible">
|
||||
<ChevronUp className="size-2" />
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</th>
|
||||
<th
|
||||
scope="col"
|
||||
class="px-2.5 py-2 text-right cursor-pointer select-none w-5"
|
||||
on:click={() => setSortKey('won')}
|
||||
>
|
||||
<div class="flex gap-1.5 items-center justify-end">
|
||||
{$i18n.t('Won')}
|
||||
{#if orderBy === 'won'}
|
||||
<span class="font-normal">
|
||||
{#if direction === 'asc'}
|
||||
<ChevronUp className="size-2" />
|
||||
{:else}
|
||||
<ChevronDown className="size-2" />
|
||||
{/if}
|
||||
</span>
|
||||
{:else}
|
||||
<span class="invisible">
|
||||
<ChevronUp className="size-2" />
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</th>
|
||||
<th
|
||||
scope="col"
|
||||
class="px-2.5 py-2 text-right cursor-pointer select-none w-5"
|
||||
on:click={() => setSortKey('lost')}
|
||||
>
|
||||
<div class="flex gap-1.5 items-center justify-end">
|
||||
{$i18n.t('Lost')}
|
||||
{#if orderBy === 'lost'}
|
||||
<span class="font-normal">
|
||||
{#if direction === 'asc'}
|
||||
<ChevronUp className="size-2" />
|
||||
{:else}
|
||||
<ChevronDown className="size-2" />
|
||||
{/if}
|
||||
</span>
|
||||
{:else}
|
||||
<span class="invisible">
|
||||
<ChevronUp className="size-2" />
|
||||
</span>
|
||||
{/if}
|
||||
</div>
|
||||
</th>
|
||||
<tr class="border-b-[1.5px] border-gray-50 dark:border-gray-850/30">
|
||||
{#each [
|
||||
{ key: 'rating', label: 'RK', class: 'w-3' },
|
||||
{ key: 'name', label: 'Model', class: '' },
|
||||
{ key: 'rating', label: 'Rating', class: 'text-right w-fit' },
|
||||
{ key: 'won', label: 'Won', class: 'text-right w-5' },
|
||||
{ key: 'lost', label: 'Lost', class: 'text-right w-5' }
|
||||
] as col}
|
||||
<th
|
||||
scope="col"
|
||||
class="px-2.5 py-2 cursor-pointer select-none {col.class}"
|
||||
on:click={() => toggleSort(col.key)}
|
||||
>
|
||||
<div class="flex gap-1.5 items-center {col.class.includes('right') ? 'justify-end' : ''}">
|
||||
{$i18n.t(col.label)}
|
||||
{#if orderBy === col.key}
|
||||
{#if direction === 'asc'}<ChevronUp className="size-2" />{:else}<ChevronDown className="size-2" />{/if}
|
||||
{:else}
|
||||
<span class="invisible"><ChevronUp className="size-2" /></span>
|
||||
{/if}
|
||||
</div>
|
||||
</th>
|
||||
{/each}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody class="">
|
||||
{#each sortedModels as model, modelIdx (model.id)}
|
||||
<tbody>
|
||||
{#each sortedModels as model, idx (model.id)}
|
||||
<tr
|
||||
class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs group cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-850/50 transition"
|
||||
on:click={() => openLeaderboardModelModal(model)}
|
||||
class="bg-white dark:bg-gray-900 text-xs group cursor-pointer hover:bg-gray-50 dark:hover:bg-gray-850/50 transition"
|
||||
on:click={() => openModal(model)}
|
||||
>
|
||||
<td class="px-3 py-1.5 text-left font-medium text-gray-900 dark:text-white w-fit">
|
||||
<div class=" line-clamp-1">
|
||||
{model?.rating !== '-' ? modelIdx + 1 : '-'}
|
||||
</div>
|
||||
<td class="px-3 py-1.5 font-medium text-gray-900 dark:text-white">
|
||||
{model.rating !== '-' ? idx + 1 : '-'}
|
||||
</td>
|
||||
<td class="px-3 py-1.5 flex flex-col justify-center">
|
||||
<td class="px-3 py-1.5">
|
||||
<div class="flex items-center gap-2">
|
||||
<div class="shrink-0">
|
||||
<img
|
||||
src={`${WEBUI_API_BASE_URL}/models/model/profile/image?id=${model.id}`}
|
||||
alt={model.name}
|
||||
class="size-5 rounded-full object-cover shrink-0"
|
||||
/>
|
||||
</div>
|
||||
|
||||
<div class="font-medium text-gray-800 dark:text-gray-200 pr-4">
|
||||
{model.name}
|
||||
</div>
|
||||
<img
|
||||
src="{WEBUI_API_BASE_URL}/models/model/profile/image?id={model.id}"
|
||||
alt={model.name}
|
||||
class="size-5 rounded-full object-cover"
|
||||
/>
|
||||
<span class="font-medium text-gray-800 dark:text-gray-200">{model.name}</span>
|
||||
</div>
|
||||
</td>
|
||||
<td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white w-max">
|
||||
<td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white">
|
||||
{model.rating}
|
||||
</td>
|
||||
|
||||
<td class=" px-3 py-1.5 text-right font-medium text-green-500">
|
||||
<div class=" w-10">
|
||||
{#if model.stats.won === '-'}
|
||||
-
|
||||
{:else}
|
||||
<span class="hidden group-hover:inline"
|
||||
>{((model.stats.won / model.stats.count) * 100).toFixed(1)}%</span
|
||||
>
|
||||
<span class=" group-hover:hidden">{model.stats.won}</span>
|
||||
{/if}
|
||||
</div>
|
||||
<td class="px-3 py-1.5 text-right font-medium text-green-500 w-10">
|
||||
{#if model.stats.won === '-'}-{:else}
|
||||
<span class="hidden group-hover:inline">{((Number(model.stats.won) / model.stats.count) * 100).toFixed(1)}%</span>
|
||||
<span class="group-hover:hidden">{model.stats.won}</span>
|
||||
{/if}
|
||||
</td>
|
||||
|
||||
<td class="px-3 py-1.5 text-right font-medium text-red-500">
|
||||
<div class=" w-10">
|
||||
{#if model.stats.lost === '-'}
|
||||
-
|
||||
{:else}
|
||||
<span class="hidden group-hover:inline"
|
||||
>{((model.stats.lost / model.stats.count) * 100).toFixed(1)}%</span
|
||||
>
|
||||
<span class=" group-hover:hidden">{model.stats.lost}</span>
|
||||
{/if}
|
||||
</div>
|
||||
<td class="px-3 py-1.5 text-right font-medium text-red-500 w-10">
|
||||
{#if model.stats.lost === '-'}-{:else}
|
||||
<span class="hidden group-hover:inline">{((Number(model.stats.lost) / model.stats.count) * 100).toFixed(1)}%</span>
|
||||
<span class="group-hover:hidden">{model.stats.lost}</span>
|
||||
{/if}
|
||||
</td>
|
||||
</tr>
|
||||
{/each}
|
||||
@@ -562,15 +201,11 @@
|
||||
{/if}
|
||||
</div>
|
||||
|
||||
<div class=" text-gray-500 text-xs mt-1.5 w-full flex justify-end">
|
||||
<div class=" text-right">
|
||||
<div class="text-gray-500 text-xs mt-1.5 w-full flex justify-end">
|
||||
<div class="text-right">
|
||||
<div class="line-clamp-1">
|
||||
ⓘ {$i18n.t(
|
||||
'The evaluation leaderboard is based on the Elo rating system and is updated in real-time.'
|
||||
)}
|
||||
ⓘ {$i18n.t('The evaluation leaderboard is based on the Elo rating system and is updated in real-time.')}
|
||||
</div>
|
||||
{$i18n.t(
|
||||
'The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.'
|
||||
)}
|
||||
{$i18n.t('The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.')}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
import { getContext } from 'svelte';
|
||||
export let show = false;
|
||||
export let model = null;
|
||||
export let feedbacks = [];
|
||||
export let onClose: () => void = () => {};
|
||||
const i18n = getContext('i18n');
|
||||
import XMark from '$lib/components/icons/XMark.svelte';
|
||||
@@ -13,22 +12,8 @@
|
||||
onClose();
|
||||
};
|
||||
|
||||
$: topTags = model ? getTopTagsForModel(model.id, feedbacks) : [];
|
||||
|
||||
const getTopTagsForModel = (modelId: string, feedbacks: any[], topN = 5) => {
|
||||
const tagCounts = new Map();
|
||||
feedbacks
|
||||
.filter((fb) => fb.data.model_id === modelId)
|
||||
.forEach((fb) => {
|
||||
(fb.data.tags || []).forEach((tag) => {
|
||||
tagCounts.set(tag, (tagCounts.get(tag) || 0) + 1);
|
||||
});
|
||||
});
|
||||
return Array.from(tagCounts.entries())
|
||||
.sort((a, b) => b[1] - a[1])
|
||||
.slice(0, topN)
|
||||
.map(([tag, count]) => ({ tag, count }));
|
||||
};
|
||||
// Use top_tags from backend response (already computed)
|
||||
$: topTags = model?.top_tags ?? [];
|
||||
</script>
|
||||
|
||||
<Modal size="sm" bind:show>
|
||||
|
||||
Reference in New Issue
Block a user