open-webui/src/lib/components/admin/Evaluations.svelte

551 lines
16 KiB
Svelte
Raw Normal View History

2024-10-22 10:16:48 +00:00
<script lang="ts">
import { onMount, getContext } from 'svelte';
2024-10-23 06:24:49 +00:00
import dayjs from 'dayjs';
import relativeTime from 'dayjs/plugin/relativeTime';
dayjs.extend(relativeTime);
2024-10-24 05:35:12 +00:00
import * as ort from 'onnxruntime-web';
import { AutoModel, AutoTokenizer } from '@huggingface/transformers';
2024-10-24 05:38:58 +00:00
const EMBEDDING_MODEL = 'TaylorAI/bge-micro-v2';
2024-10-24 05:35:12 +00:00
let tokenizer = null;
let model = null;
2024-10-23 03:14:10 +00:00
import { models } from '$lib/stores';
2024-10-23 08:05:45 +00:00
import { deleteFeedbackById, getAllFeedbacks } from '$lib/apis/evaluations';
2024-10-23 06:24:49 +00:00
2024-10-23 05:55:34 +00:00
import FeedbackMenu from './Evaluations/FeedbackMenu.svelte';
import EllipsisHorizontal from '../icons/EllipsisHorizontal.svelte';
2024-10-23 06:24:49 +00:00
import Tooltip from '../common/Tooltip.svelte';
import Badge from '../common/Badge.svelte';
2024-10-23 07:51:27 +00:00
import Pagination from '../common/Pagination.svelte';
2024-10-24 05:35:12 +00:00
import MagnifyingGlass from '../icons/MagnifyingGlass.svelte';
2024-10-23 06:24:49 +00:00
2024-10-22 10:16:48 +00:00
const i18n = getContext('i18n');
2024-10-23 03:14:10 +00:00
let rankedModels = [];
2024-10-23 05:55:34 +00:00
let feedbacks = [];
2024-10-23 03:14:10 +00:00
2024-10-24 05:35:12 +00:00
let query = '';
2024-10-23 07:51:27 +00:00
let page = 1;
2024-10-24 05:35:12 +00:00
let tagEmbeddings = new Map();
let loaded = false;
let debounceTimer;
2024-10-23 07:51:27 +00:00
$: paginatedFeedbacks = feedbacks.slice((page - 1) * 10, page * 10);
2024-10-23 06:24:49 +00:00
type Feedback = {
2024-10-24 05:35:12 +00:00
id: string;
data: {
rating: number;
model_id: string;
sibling_model_ids: string[] | null;
reason: string;
comment: string;
tags: string[];
};
user: {
name: string;
profile_image_url: string;
};
updated_at: number;
2024-10-23 06:24:49 +00:00
};
type ModelStats = {
rating: number;
won: number;
lost: number;
};
2024-10-24 05:35:12 +00:00
//////////////////////
//
// Rank models by Elo rating
//
//////////////////////
const rankHandler = async (similarities: Map<string, number> = new Map()) => {
const modelStats = calculateModelStats(feedbacks, similarities);
rankedModels = $models
.filter((m) => m?.owned_by !== 'arena' && (m?.info?.meta?.hidden ?? false) !== true)
.map((model) => {
const stats = modelStats.get(model.id);
return {
...model,
rating: stats ? Math.round(stats.rating) : '-',
stats: {
count: stats ? stats.won + stats.lost : 0,
won: stats ? stats.won.toString() : '-',
lost: stats ? stats.lost.toString() : '-'
}
};
})
.sort((a, b) => {
if (a.rating === '-' && b.rating !== '-') return 1;
if (b.rating === '-' && a.rating !== '-') return -1;
if (a.rating !== '-' && b.rating !== '-') return b.rating - a.rating;
return a.name.localeCompare(b.name);
});
};
function calculateModelStats(
feedbacks: Feedback[],
similarities: Map<string, number>
): Map<string, ModelStats> {
2024-10-23 06:24:49 +00:00
const stats = new Map<string, ModelStats>();
const K = 32;
function getOrDefaultStats(modelId: string): ModelStats {
2024-10-24 05:35:12 +00:00
return stats.get(modelId) || { rating: 1000, won: 0, lost: 0 };
2024-10-23 06:24:49 +00:00
}
function updateStats(modelId: string, ratingChange: number, outcome: number) {
const currentStats = getOrDefaultStats(modelId);
currentStats.rating += ratingChange;
if (outcome === 1) currentStats.won++;
else if (outcome === 0) currentStats.lost++;
stats.set(modelId, currentStats);
}
2024-10-24 05:35:12 +00:00
function calculateEloChange(
ratingA: number,
ratingB: number,
outcome: number,
similarity: number
): number {
2024-10-23 06:24:49 +00:00
const expectedScore = 1 / (1 + Math.pow(10, (ratingB - ratingA) / 400));
2024-10-24 05:35:12 +00:00
return K * (outcome - expectedScore) * similarity;
2024-10-23 06:24:49 +00:00
}
feedbacks.forEach((feedback) => {
const modelA = feedback.data.model_id;
const statsA = getOrDefaultStats(modelA);
let outcome: number;
switch (feedback.data.rating.toString()) {
case '1':
outcome = 1;
break;
case '-1':
outcome = 0;
break;
default:
return; // Skip invalid ratings
}
2024-10-24 05:38:58 +00:00
// If the query is empty, set similarity to 1, else get the similarity from the map
const similarity = query !== '' ? similarities.get(feedback.id) || 0 : 1;
2024-10-23 06:24:49 +00:00
const opponents = feedback.data.sibling_model_ids || [];
2024-10-24 05:38:58 +00:00
2024-10-23 06:24:49 +00:00
opponents.forEach((modelB) => {
const statsB = getOrDefaultStats(modelB);
2024-10-24 05:35:12 +00:00
const changeA = calculateEloChange(statsA.rating, statsB.rating, outcome, similarity);
const changeB = calculateEloChange(statsB.rating, statsA.rating, 1 - outcome, similarity);
2024-10-23 06:24:49 +00:00
updateStats(modelA, changeA, outcome);
updateStats(modelB, changeB, 1 - outcome);
});
});
return stats;
}
2024-10-24 05:35:12 +00:00
//////////////////////
//
// Calculate cosine similarity
//
//////////////////////
const cosineSimilarity = (vecA, vecB) => {
// Ensure the lengths of the vectors are the same
if (vecA.length !== vecB.length) {
throw new Error('Vectors must be the same length');
}
// Calculate the dot product
let dotProduct = 0;
let normA = 0;
let normB = 0;
for (let i = 0; i < vecA.length; i++) {
dotProduct += vecA[i] * vecB[i];
normA += vecA[i] ** 2;
normB += vecB[i] ** 2;
}
// Calculate the magnitudes
normA = Math.sqrt(normA);
normB = Math.sqrt(normB);
// Avoid division by zero
if (normA === 0 || normB === 0) {
return 0;
}
// Return the cosine similarity
return dotProduct / (normA * normB);
};
const calculateMaxSimilarity = (queryEmbedding, tagEmbeddings: Map<string, number[]>) => {
let maxSimilarity = 0;
for (const tagEmbedding of tagEmbeddings.values()) {
const similarity = cosineSimilarity(queryEmbedding, tagEmbedding);
maxSimilarity = Math.max(maxSimilarity, similarity);
}
return maxSimilarity;
};
//////////////////////
//
// Embedding functions
//
//////////////////////
const getEmbeddings = async (text: string) => {
const tokens = await tokenizer(text);
const output = await model(tokens);
// Perform mean pooling on the last hidden states
const embeddings = output.last_hidden_state.mean(1);
return embeddings.ort_tensor.data;
};
const getTagEmbeddings = async (tags: string[]) => {
const embeddings = new Map();
for (const tag of tags) {
if (!tagEmbeddings.has(tag)) {
tagEmbeddings.set(tag, await getEmbeddings(tag));
}
embeddings.set(tag, tagEmbeddings.get(tag));
}
return embeddings;
};
const debouncedQueryHandler = async () => {
if (query.trim() === '') {
rankHandler();
return;
}
clearTimeout(debounceTimer);
debounceTimer = setTimeout(async () => {
const queryEmbedding = await getEmbeddings(query);
const similarities = new Map<string, number>();
for (const feedback of feedbacks) {
const feedbackTags = feedback.data.tags || [];
const tagEmbeddings = await getTagEmbeddings(feedbackTags);
const maxSimilarity = calculateMaxSimilarity(queryEmbedding, tagEmbeddings);
similarities.set(feedback.id, maxSimilarity);
}
rankHandler(similarities);
}, 1500); // Debounce for 1.5 seconds
};
$: query, debouncedQueryHandler();
//////////////////////
//
// CRUD operations
//
//////////////////////
2024-10-23 08:05:45 +00:00
const deleteFeedbackHandler = async (feedbackId: string) => {
const response = await deleteFeedbackById(localStorage.token, feedbackId).catch((err) => {
toast.error(err);
return null;
});
if (response) {
feedbacks = feedbacks.filter((f) => f.id !== feedbackId);
}
};
2024-10-24 05:35:12 +00:00
onMount(async () => {
feedbacks = await getAllFeedbacks(localStorage.token);
loaded = true;
2024-10-23 03:14:10 +00:00
2024-10-24 05:38:58 +00:00
tokenizer = await AutoTokenizer.from_pretrained(EMBEDDING_MODEL);
model = await AutoModel.from_pretrained(EMBEDDING_MODEL);
2024-10-23 03:14:10 +00:00
2024-10-24 05:35:12 +00:00
// Pre-compute embeddings for all unique tags
const allTags = new Set(feedbacks.flatMap((feedback) => feedback.data.tags || []));
await getTagEmbeddings(Array.from(allTags));
2024-10-23 08:05:45 +00:00
rankHandler();
2024-10-22 10:16:48 +00:00
});
</script>
{#if loaded}
2024-10-23 05:55:34 +00:00
<div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between">
2024-10-24 06:07:44 +00:00
<div class="flex md:self-center text-lg font-medium px-0.5 shrink-0 items-center">
<div class=" gap-1">
{$i18n.t('Leaderboard')}
</div>
2024-10-23 03:14:10 +00:00
<div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" />
2024-10-24 06:07:44 +00:00
<span class="text-lg font-medium text-gray-500 dark:text-gray-300 mr-1.5"
>{rankedModels.length}</span
2024-10-23 03:14:10 +00:00
>
</div>
2024-10-24 05:35:12 +00:00
<div class=" flex space-x-2">
<Tooltip content={$i18n.t('Re-rank models by topic similarity')}>
<div class="flex flex-1">
<div class=" self-center ml-1 mr-3">
<MagnifyingGlass className="size-3" />
</div>
<input
class=" w-full text-sm pr-4 py-1 rounded-r-xl outline-none bg-transparent"
bind:value={query}
placeholder={$i18n.t('Search')}
/>
</div>
</Tooltip>
</div>
2024-10-23 03:14:10 +00:00
</div>
<div
class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded pt-0.5"
>
2024-10-23 05:55:34 +00:00
{#if (rankedModels ?? []).length === 0}
<div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
{$i18n.t('No models found')}
</div>
{:else}
<table
class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded"
2024-10-23 03:14:10 +00:00
>
2024-10-23 05:55:34 +00:00
<thead
class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5"
>
<tr class="">
<th scope="col" class="px-3 py-1.5 cursor-pointer select-none w-3">
{$i18n.t('RK')}
</th>
<th scope="col" class="px-3 py-1.5 cursor-pointer select-none">
{$i18n.t('Model')}
</th>
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
{$i18n.t('Rating')}
</th>
2024-10-23 06:44:13 +00:00
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-5">
2024-10-23 05:55:34 +00:00
{$i18n.t('Won')}
</th>
2024-10-23 06:44:13 +00:00
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-5">
2024-10-23 05:55:34 +00:00
{$i18n.t('Lost')}
</th>
</tr>
</thead>
<tbody class="">
2024-10-23 06:24:49 +00:00
{#each rankedModels as model, modelIdx (model.id)}
2024-10-23 06:44:13 +00:00
<tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs group">
2024-10-23 05:55:34 +00:00
<td class="px-3 py-1.5 text-left font-medium text-gray-900 dark:text-white w-fit">
<div class=" line-clamp-1">
2024-10-23 06:24:49 +00:00
{model?.rating !== '-' ? modelIdx + 1 : '-'}
2024-10-23 03:14:10 +00:00
</div>
2024-10-23 05:55:34 +00:00
</td>
<td class="px-3 py-1.5 flex flex-col justify-center">
<div class="flex items-center gap-2">
<div class="flex-shrink-0">
<img
src={model?.info?.meta?.profile_image_url ?? '/favicon.png'}
alt={model.name}
class="size-5 rounded-full object-cover shrink-0"
/>
</div>
<div class="font-medium text-gray-800 dark:text-gray-200 pr-4">
{model.name}
</div>
2024-10-23 03:14:10 +00:00
</div>
2024-10-23 05:55:34 +00:00
</td>
<td class="px-3 py-1.5 text-right font-medium text-gray-900 dark:text-white w-max">
{model.rating}
</td>
<td class=" px-3 py-1.5 text-right font-semibold text-green-500">
2024-10-23 06:44:13 +00:00
<div class=" w-10">
{#if model.stats.won === '-'}
-
{:else}
<span class="hidden group-hover:inline"
>{((model.stats.won / model.stats.count) * 100).toFixed(1)}%</span
>
<span class=" group-hover:hidden">{model.stats.won}</span>
{/if}
</div>
2024-10-23 05:55:34 +00:00
</td>
<td class="px-3 py-1.5 text-right font-semibold text-red-500">
2024-10-23 06:44:13 +00:00
<div class=" w-10">
{#if model.stats.lost === '-'}
-
{:else}
<span class="hidden group-hover:inline"
>{((model.stats.lost / model.stats.count) * 100).toFixed(1)}%</span
>
<span class=" group-hover:hidden">{model.stats.lost}</span>
{/if}
</div>
2024-10-23 05:55:34 +00:00
</td>
</tr>
{/each}
</tbody>
</table>
{/if}
2024-10-23 03:14:10 +00:00
</div>
2024-10-24 06:07:44 +00:00
<div class=" text-gray-500 text-xs mt-1.5 w-full flex justify-end">
<div class=" text-right">
<div class="line-clamp-1">
{$i18n.t(
'The evaluation leaderboard is based on the Elo rating system and is updated in real-time.'
)}
</div>
{$i18n.t(
'The leaderboard is currently in beta, and we may adjust the rating calculations as we refine the algorithm.'
)}
</div>
</div>
2024-10-23 03:14:10 +00:00
<div class="pb-4"></div>
2024-10-23 05:55:34 +00:00
<div class="mt-0.5 mb-2 gap-1 flex flex-col md:flex-row justify-between">
2024-10-23 03:14:10 +00:00
<div class="flex md:self-center text-lg font-medium px-0.5">
2024-10-23 05:55:34 +00:00
{$i18n.t('Feedback History')}
2024-10-23 07:51:27 +00:00
<div class="flex self-center w-[1px] h-6 mx-2.5 bg-gray-50 dark:bg-gray-850" />
<span class="text-lg font-medium text-gray-500 dark:text-gray-300">{feedbacks.length}</span>
2024-10-22 10:16:48 +00:00
</div>
</div>
2024-10-23 03:14:10 +00:00
2024-10-23 05:55:34 +00:00
<div
class="scrollbar-hidden relative whitespace-nowrap overflow-x-auto max-w-full rounded pt-0.5"
>
{#if (feedbacks ?? []).length === 0}
<div class="text-center text-xs text-gray-500 dark:text-gray-400 py-1">
{$i18n.t('No feedbacks found')}
</div>
{:else}
<table
class="w-full text-sm text-left text-gray-500 dark:text-gray-400 table-auto max-w-full rounded"
>
<thead
class="text-xs text-gray-700 uppercase bg-gray-50 dark:bg-gray-850 dark:text-gray-400 -translate-y-0.5"
>
<tr class="">
2024-10-23 06:28:41 +00:00
<th scope="col" class="px-3 text-right cursor-pointer select-none w-0">
2024-10-23 06:24:49 +00:00
{$i18n.t('User')}
</th>
2024-10-23 06:28:41 +00:00
<th scope="col" class="px-3 pr-1.5 cursor-pointer select-none">
2024-10-23 05:55:34 +00:00
{$i18n.t('Models')}
</th>
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-fit">
{$i18n.t('Result')}
</th>
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0">
2024-10-23 06:24:49 +00:00
{$i18n.t('Updated At')}
2024-10-23 05:55:34 +00:00
</th>
<th scope="col" class="px-3 py-1.5 text-right cursor-pointer select-none w-0"> </th>
</tr>
</thead>
<tbody class="">
2024-10-23 07:51:27 +00:00
{#each paginatedFeedbacks as feedback (feedback.id)}
2024-10-23 05:55:34 +00:00
<tr class="bg-white dark:bg-gray-900 dark:border-gray-850 text-xs">
2024-10-23 06:31:51 +00:00
<td class=" py-0.5 text-right font-semibold">
2024-10-23 06:24:49 +00:00
<div class="flex justify-center">
<Tooltip content={feedback?.user?.name}>
<div class="flex-shrink-0">
<img
src={feedback?.user?.profile_image_url ?? '/user.png'}
alt={feedback?.user?.name}
2024-10-23 06:27:07 +00:00
class="size-5 rounded-full object-cover shrink-0"
2024-10-23 06:24:49 +00:00
/>
</div>
</Tooltip>
</div>
</td>
2024-10-23 06:31:51 +00:00
<td class=" py-1 pl-3 flex flex-col">
2024-10-23 06:24:49 +00:00
<div class="flex flex-col items-start gap-0.5 h-full">
<div class="flex flex-col h-full">
{#if feedback.data?.sibling_model_ids}
<div class="font-semibold text-gray-600 dark:text-gray-400 flex-1">
{feedback.data?.model_id}
</div>
2024-10-23 06:54:37 +00:00
<Tooltip content={feedback.data.sibling_model_ids.join(', ')}>
<div class=" text-[0.65rem] text-gray-600 dark:text-gray-400 line-clamp-1">
{#if feedback.data.sibling_model_ids.length > 2}
2024-10-23 07:42:13 +00:00
<!-- {$i18n.t('and {{COUNT}} more')} -->
2024-10-23 06:55:36 +00:00
{feedback.data.sibling_model_ids.slice(0, 2).join(', ')}, {$i18n.t(
2024-10-23 07:42:13 +00:00
'and {{COUNT}} more',
{ COUNT: feedback.data.sibling_model_ids.length - 2 }
2024-10-23 06:55:36 +00:00
)}
2024-10-23 06:54:37 +00:00
{:else}
{feedback.data.sibling_model_ids.join(', ')}
{/if}
</div>
</Tooltip>
2024-10-23 06:24:49 +00:00
{:else}
<div
2024-10-23 06:31:51 +00:00
class=" text-sm font-medium text-gray-600 dark:text-gray-400 flex-1 py-1.5"
2024-10-23 06:24:49 +00:00
>
{feedback.data?.model_id}
</div>
{/if}
2024-10-23 05:55:34 +00:00
</div>
</div>
</td>
<td class="px-3 py-1 text-right font-medium text-gray-900 dark:text-white w-max">
2024-10-23 06:24:49 +00:00
<div class=" flex justify-end">
{#if feedback.data.rating.toString() === '1'}
<Badge type="info" content={$i18n.t('Won')} />
{:else if feedback.data.rating.toString() === '0'}
<Badge type="muted" content={$i18n.t('Draw')} />
{:else if feedback.data.rating.toString() === '-1'}
<Badge type="error" content={$i18n.t('Lost')} />
{/if}
</div>
2024-10-23 05:55:34 +00:00
</td>
2024-10-23 06:24:49 +00:00
<td class=" px-3 py-1 text-right font-medium">
{dayjs(feedback.updated_at * 1000).fromNow()}
2024-10-23 05:55:34 +00:00
</td>
<td class=" px-3 py-1 text-right font-semibold">
2024-10-23 08:05:45 +00:00
<FeedbackMenu
on:delete={(e) => {
deleteFeedbackHandler(feedback.id);
}}
>
2024-10-23 05:55:34 +00:00
<button
class="self-center w-fit text-sm p-1.5 dark:text-gray-300 dark:hover:text-white hover:bg-black/5 dark:hover:bg-white/5 rounded-xl"
>
<EllipsisHorizontal />
</button>
</FeedbackMenu>
</td>
</tr>
{/each}
</tbody>
</table>
{/if}
</div>
2024-10-23 08:05:45 +00:00
{#if feedbacks.length > 10}
<Pagination bind:page count={feedbacks.length} perPage={10} />
{/if}
2024-10-23 07:51:27 +00:00
2024-10-23 03:14:10 +00:00
<div class="pb-8"></div>
2024-10-22 10:16:48 +00:00
{/if}