Bonsai-demo / dashboard.html
PrismML Deploy
Bonsai-demo: multi-GPU llama.cpp server with live dashboard and metrics
0633a27
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Bonsai-demo β€” Dashboard</title>
<script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.min.js"></script>
<style>
:root {
--bg: #0d0f14;
--surface: #161920;
--border: #252830;
--text: #e2e4ea;
--muted: #6b7280;
--green: #22c55e;
--blue: #3b82f6;
--orange: #f97316;
--purple: #a855f7;
--red: #ef4444;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
background: var(--bg);
color: var(--text);
font-family: 'SF Mono', 'Fira Code', monospace;
font-size: 13px;
min-height: 100vh;
padding: 20px 24px;
}
/* ── header ── */
header {
display: flex;
align-items: center;
justify-content: space-between;
margin-bottom: 20px;
gap: 16px;
flex-wrap: wrap;
}
header h1 { font-size: 15px; font-weight: 600; letter-spacing: 0.05em; }
.header-right { display: flex; align-items: center; gap: 20px; }
#status { display: flex; align-items: center; gap: 8px; font-size: 12px; color: var(--muted); }
#dot { width: 8px; height: 8px; border-radius: 50%; background: var(--muted); transition: background 0.3s; }
#dot.live { background: var(--green); box-shadow: 0 0 6px var(--green); }
#dot.error { background: var(--red); }
.analytics-ts { font-size: 11px; color: var(--muted); }
.analytics-ts span { color: var(--text); }
/* ── server bar ── */
.url-bar { display: flex; gap: 8px; margin-bottom: 20px; align-items: center; }
.url-bar label { color: var(--muted); font-size: 11px; white-space: nowrap; }
.url-bar input {
flex: 1; background: var(--surface); border: 1px solid var(--border);
border-radius: 6px; color: var(--text); font-family: inherit;
font-size: 12px; padding: 6px 10px; outline: none;
}
.url-bar input:focus { border-color: var(--blue); }
.url-bar button {
background: var(--blue); border: none; border-radius: 6px; color: #fff;
cursor: pointer; font-family: inherit; font-size: 12px; padding: 6px 14px;
}
/* ── section labels ── */
.section-label {
font-size: 10px; letter-spacing: 0.1em; text-transform: uppercase;
color: var(--muted); margin-bottom: 10px; margin-top: 20px;
}
.section-label:first-of-type { margin-top: 0; }
/* ── stat grids ── */
.stats-live {
display: grid;
grid-template-columns: repeat(4, 1fr);
gap: 10px;
margin-bottom: 14px;
}
.stats-hist {
display: grid;
grid-template-columns: repeat(5, 1fr);
gap: 10px;
margin-bottom: 20px;
}
@media (max-width: 900px) {
.stats-live { grid-template-columns: repeat(2, 1fr); }
.stats-hist { grid-template-columns: repeat(3, 1fr); }
}
.stat {
background: var(--surface); border: 1px solid var(--border);
border-radius: 8px; padding: 14px 16px;
}
.stat-label { font-size: 10px; letter-spacing: 0.07em; color: var(--muted); text-transform: uppercase; margin-bottom: 6px; }
.stat-value { font-size: 26px; font-weight: 700; line-height: 1; }
.stat-unit { font-size: 10px; color: var(--muted); margin-top: 4px; }
/* ── charts ── */
.charts-2 { display: grid; grid-template-columns: 1fr 1fr; gap: 14px; margin-bottom: 20px; }
@media (max-width: 900px) { .charts-2 { grid-template-columns: 1fr; } }
.chart-card {
background: var(--surface); border: 1px solid var(--border);
border-radius: 8px; padding: 16px;
}
.chart-card h2 {
font-size: 10px; letter-spacing: 0.08em; text-transform: uppercase;
color: var(--muted); margin-bottom: 12px;
}
.chart-card canvas { max-height: 160px; }
/* ── GPU ── */
.gpu-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); gap: 10px; }
.gpu-card { background: var(--surface); border: 1px solid var(--border); border-radius: 8px; padding: 14px; }
.gpu-name { font-size: 12px; font-weight: 600; margin-bottom: 10px; }
.gpu-index { color: var(--muted); font-weight: 400; }
.gpu-row { display: flex; align-items: center; gap: 10px; margin-bottom: 6px; }
.gpu-label { width: 70px; font-size: 10px; color: var(--muted); text-transform: uppercase; letter-spacing: .05em; flex-shrink: 0; }
.bar-track { flex: 1; height: 7px; background: var(--border); border-radius: 4px; overflow: hidden; }
.bar-fill { height: 100%; border-radius: 4px; transition: width .4s ease; }
.gpu-val { width: 60px; text-align: right; font-size: 11px; color: var(--text); flex-shrink: 0; }
.gpu-meta { display: flex; gap: 14px; margin-top: 8px; padding-top: 8px; border-top: 1px solid var(--border); flex-wrap: wrap; }
.gpu-meta-item { font-size: 11px; color: var(--muted); }
.gpu-meta-item span { color: var(--text); }
</style>
</head>
<body>
<header>
<h1>🌿 Bonsai-demo · Dashboard</h1>
<div class="header-right">
<div class="analytics-ts">requests updated <span id="u-updated">β€”</span></div>
<div id="status">
<div id="dot"></div>
<span id="status-text">connecting…</span>
</div>
</div>
</header>
<div class="url-bar">
<label>SERVER</label>
<input id="server-url" type="text" value="" placeholder="(same origin β€” leave blank for deployed, or http://host:port for local)">
<button onclick="reconnect()">Connect</button>
</div>
<!-- ── Live ── -->
<div class="section-label">Live</div>
<div class="stats-live" style="grid-template-columns: repeat(6, 1fr);">
<div class="stat">
<div class="stat-label">Active</div>
<div class="stat-value" id="v-active" style="color:var(--orange)">β€”</div>
<div class="stat-unit">requests processing</div>
</div>
<div class="stat">
<div class="stat-label">Queued</div>
<div class="stat-value" id="v-queued" style="color:var(--purple)">β€”</div>
<div class="stat-unit">requests waiting</div>
</div>
<div class="stat">
<div class="stat-label">Avg Latency</div>
<div class="stat-value" id="v-latency-avg" style="color:var(--text)">β€”</div>
<div class="stat-unit">ms Β· last 5 min</div>
</div>
<div class="stat">
<div class="stat-label">p90 Latency</div>
<div class="stat-value" id="v-latency-p90" style="color:var(--text)">β€”</div>
<div class="stat-unit">ms Β· last 5 min</div>
</div>
<div class="stat">
<div class="stat-label">Gen Speed</div>
<div class="stat-value" id="v-gen" style="color:var(--green)">β€”</div>
<div class="stat-unit">tok / s Β· avg</div>
</div>
<div class="stat">
<div class="stat-label">Prompt Speed</div>
<div class="stat-value" id="v-prompt" style="color:var(--blue)">β€”</div>
<div class="stat-unit">tok / s Β· avg</div>
</div>
</div>
<div class="charts-2">
<div class="chart-card">
<h2>Concurrency β€” active &amp; queued slots</h2>
<canvas id="chart-slots"></canvas>
</div>
<div class="chart-card">
<h2>Generation β€” tok / s (current)</h2>
<canvas id="chart-gen"></canvas>
</div>
<div class="chart-card">
<h2>Prompt processing β€” tok / s (current)</h2>
<canvas id="chart-prompt"></canvas>
</div>
</div>
<!-- ── Historical ── -->
<div class="section-label">Historical</div>
<div class="stats-hist">
<div class="stat">
<div class="stat-label">Requests β€” 24h</div>
<div class="stat-value" id="u-req-1d" style="color:var(--blue)">β€”</div>
<div class="stat-unit">chat completions</div>
</div>
<div class="stat">
<div class="stat-label">Requests β€” 7d</div>
<div class="stat-value" id="u-req-7d" style="color:var(--blue)">β€”</div>
<div class="stat-unit">chat completions</div>
</div>
<div class="stat">
<div class="stat-label">Requests β€” Total</div>
<div class="stat-value" id="u-req-total" style="color:var(--text)">β€”</div>
<div class="stat-unit">since last restart</div>
</div>
<div class="stat">
<div class="stat-label">Tokens Generated</div>
<div class="stat-value" id="v-total-tok" style="color:var(--text)">β€”</div>
<div class="stat-unit">cumulative</div>
</div>
<div class="stat">
<div class="stat-label">Decodes</div>
<div class="stat-value" id="v-decodes" style="color:var(--text)">β€”</div>
<div class="stat-unit">llama_decode() calls</div>
</div>
</div>
<div class="charts-2">
<div class="chart-card">
<h2>Requests per hour β€” last 24h</h2>
<canvas id="chart-req-hour"></canvas>
</div>
<div class="chart-card">
<h2>Tokens generated (cumulative)</h2>
<canvas id="chart-tokens"></canvas>
</div>
</div>
<!-- ── GPU ── -->
<div class="section-label">GPU Health</div>
<div id="gpu-panel"><div style="color:var(--muted);font-size:12px;">loading…</div></div>
<script>
const MAX_POINTS = 120;
let serverUrl = '', timer = null;
// ── chart defaults ──────────────────────────────────────────────
const chartDefaults = {
responsive: true, animation: false,
plugins: { legend: { labels: { color: '#6b7280', font: { size: 11 } } } },
scales: {
x: { ticks: { color: '#6b7280', maxTicksLimit: 6, font: { size: 10 } }, grid: { color: '#1e2028' } },
y: { ticks: { color: '#6b7280', font: { size: 10 } }, grid: { color: '#1e2028' }, beginAtZero: true },
},
};
const BAR_DEFAULTS = {
responsive: true, animation: false,
plugins: { legend: { display: false } },
scales: {
x: { ticks: { color: '#6b7280', font: { size: 10 }, maxRotation: 45 }, grid: { color: '#1e2028' } },
y: { ticks: { color: '#6b7280', font: { size: 10 } }, grid: { color: '#1e2028' }, beginAtZero: true },
},
};
function mkLine(id, datasets) {
return new Chart(document.getElementById(id), {
type: 'line', data: { labels: [], datasets },
options: JSON.parse(JSON.stringify(chartDefaults)),
});
}
const charts = {
slots: mkLine('chart-slots', [
{ label: 'Active', data: [], borderColor: '#f97316', backgroundColor: '#f9731628', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
{ label: 'Queued', data: [], borderColor: '#a855f7', backgroundColor: '#a855f720', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
]),
gen: mkLine('chart-gen', [
{ label: 'Gen tok/s', data: [], borderColor: '#22c55e', backgroundColor: '#22c55e18', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
]),
prompt: mkLine('chart-prompt', [
{ label: 'Prompt tok/s', data: [], borderColor: '#3b82f6', backgroundColor: '#3b82f618', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
]),
tokens: mkLine('chart-tokens', [
{ label: 'Tokens', data: [], borderColor: '#e2e4ea', backgroundColor: '#e2e4ea10', fill: true, tension: 0.3, pointRadius: 0, borderWidth: 2 },
]),
};
const aC = {
reqHour: new Chart(document.getElementById('chart-req-hour'), {
type: 'bar',
data: { labels: [], datasets: [{ data: [], backgroundColor: '#3b82f660', borderColor: '#3b82f6', borderWidth: 1 }] },
options: JSON.parse(JSON.stringify(BAR_DEFAULTS)),
}),
};
// ── helpers ─────────────────────────────────────────────────────
function parseMetrics(text) {
const m = {};
for (const line of text.split('\n')) {
if (line.startsWith('#') || !line.trim()) continue;
const match = line.match(/^([^\s{]+)(?:\{[^}]*\})?\s+([\d.eE+\-]+)/);
if (match) m[match[1]] = parseFloat(match[2]);
}
return m;
}
function fmt(v, dec = 1) {
if (v === undefined || isNaN(v)) return 'β€”';
return v >= 1000 ? (v / 1000).toFixed(1) + 'k' : v.toFixed(dec);
}
function push(chart, label, ...values) {
chart.data.labels.push(label);
values.forEach((v, i) => chart.data.datasets[i].data.push(v));
if (chart.data.labels.length > MAX_POINTS) {
chart.data.labels.shift();
chart.data.datasets.forEach(d => d.data.shift());
}
chart.update('none');
}
// ── live metrics tick ────────────────────────────────────────────
async function tick() {
const dot = document.getElementById('dot');
const statusText = document.getElementById('status-text');
const ts = new Date().toLocaleTimeString();
try {
const res = await fetch(serverUrl + '/metrics-2e215f981f3f?t=' + Date.now());
if (!res.ok) throw new Error('HTTP ' + res.status);
const m = parseMetrics(await res.text());
dot.className = 'live';
statusText.textContent = 'live Β· ' + ts;
// stat cards: lifetime averages (always meaningful, never stuck at 0)
const genAvg = m['llamacpp:predicted_tokens_seconds'] ?? 0;
const promptAvg = m['llamacpp:prompt_tokens_seconds'] ?? 0;
const active = m['llamacpp:requests_processing'] || 0;
const queued = m['llamacpp:requests_deferred'] || 0;
document.getElementById('v-active').textContent = fmt(active, 0);
document.getElementById('v-queued').textContent = fmt(queued, 0);
document.getElementById('v-gen').textContent = fmt(genAvg);
document.getElementById('v-prompt').textContent = fmt(promptAvg);
document.getElementById('v-total-tok').textContent = fmt(m['llamacpp:tokens_predicted_total'], 0);
document.getElementById('v-decodes').textContent = fmt(m['llamacpp:n_decode_total'], 0);
// charts: delta-based current rates (go to 0 when idle)
const genRate = m['bonsai:gen_rate_current'] || 0;
const promptRate = m['bonsai:prompt_rate_current'] || 0;
push(charts.slots, ts, active, queued);
push(charts.gen, ts, genRate);
push(charts.prompt, ts, promptRate);
push(charts.tokens, ts, m['llamacpp:tokens_predicted_total'] || 0);
} catch (e) {
dot.className = 'error';
statusText.textContent = 'error Β· ' + e.message;
}
}
function reconnect() {
serverUrl = document.getElementById('server-url').value.replace(/\/$/, '');
if (timer) clearInterval(timer);
tick();
timer = setInterval(tick, 2000);
}
reconnect();
// ── GPU ─────────────────────────────────────────────────────────
function utilColor(pct) { return pct >= 90 ? '#ef4444' : pct >= 60 ? '#f97316' : '#22c55e'; }
function renderGPU(gpus) {
const panel = document.getElementById('gpu-panel');
if (!gpus || !gpus.length) {
panel.innerHTML = '<div style="color:var(--muted);font-size:12px;">No GPU data.</div>';
return;
}
panel.innerHTML = '<div class="gpu-grid">' + gpus.map(g => {
const utilPct = g.util_gpu.toFixed(0);
const memPct = (g.mem_used_mib / g.mem_total_mib * 100).toFixed(0);
const memUsedGB = (g.mem_used_mib / 1024).toFixed(1);
const memTotGB = (g.mem_total_mib / 1024).toFixed(0);
const powerPct = g.power_limit_w > 0 ? (g.power_w / g.power_limit_w * 100).toFixed(0) : 0;
return `<div class="gpu-card">
<div class="gpu-name"><span class="gpu-index">GPU ${g.index} &nbsp;Β·&nbsp;</span>${g.name}</div>
<div class="gpu-row"><div class="gpu-label">Compute</div><div class="bar-track"><div class="bar-fill" style="width:${utilPct}%;background:${utilColor(g.util_gpu)}"></div></div><div class="gpu-val">${utilPct}%</div></div>
<div class="gpu-row"><div class="gpu-label">VRAM</div><div class="bar-track"><div class="bar-fill" style="width:${memPct}%;background:${utilColor(+memPct)}"></div></div><div class="gpu-val">${memUsedGB} / ${memTotGB} GB</div></div>
<div class="gpu-row"><div class="gpu-label">Power</div><div class="bar-track"><div class="bar-fill" style="width:${powerPct}%;background:#3b82f6"></div></div><div class="gpu-val">${g.power_w.toFixed(0)} W</div></div>
<div class="gpu-meta">
<div class="gpu-meta-item">Temp <span>${g.temp_c.toFixed(0)}Β°C</span></div>
<div class="gpu-meta-item">SM clock <span>${g.clock_sm_mhz.toFixed(0)} MHz</span></div>
<div class="gpu-meta-item">Mem util <span>${g.util_mem.toFixed(0)}%</span></div>
</div></div>`;
}).join('') + '</div>';
}
async function fetchGPU() {
try { const r = await fetch(serverUrl + '/gpu-2e215f981f3f?t=' + Date.now()); if (r.ok) renderGPU((await r.json()).gpus); } catch (_) {}
}
fetchGPU(); setInterval(fetchGPU, 5000);
// ── analytics ───────────────────────────────────────────────────
async function fetchAnalytics() {
try {
const r = await fetch(serverUrl + '/analytics-2e215f981f3f?t=' + Date.now()); if (!r.ok) return;
const d = await r.json();
document.getElementById('u-req-1d').textContent = d.summary_24h?.requests ?? 'β€”';
document.getElementById('u-req-7d').textContent = d.summary_7d?.requests ?? 'β€”';
document.getElementById('u-req-total').textContent = d.summary_total?.requests ?? 'β€”';
document.getElementById('u-updated').textContent = d.updated_at ? new Date(d.updated_at).toLocaleTimeString() : 'β€”';
const lat = d.latency_5m || {};
document.getElementById('v-latency-avg').textContent = lat.sample_count > 0 ? fmt(lat.avg_ms, 0) : 'β€”';
document.getElementById('v-latency-p90').textContent = lat.sample_count > 0 ? fmt(lat.p90_ms, 0) : 'β€”';
const hours = d.requests_by_hour || [];
aC.reqHour.data.labels = hours.map(h => h.hour.slice(11, 16));
aC.reqHour.data.datasets[0].data = hours.map(h => h.requests);
aC.reqHour.update('none');
} catch (_) {}
}
fetchAnalytics(); setInterval(fetchAnalytics, 10000);
</script>
</body>
</html>