veylant/test/k6/load_test.js
2026-02-23 13:35:04 +01:00

103 lines
4.1 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// k6 load test for Veylant IA Proxy (E10-10).
//
// Targets:
// - p99 latency < 300ms
// - error rate < 1%
// - 1 000 VU sustained for 8 minutes
//
// Run (requires a running proxy + mock Ollama):
// k6 run test/k6/load_test.js
//
// Environment variables:
// BASE_URL — proxy base URL (default: http://localhost:8090)
// AUTH_TOKEN — Bearer token (default: dev-token)
// MODEL — LLM model name (default: llama3.2, routed to local Ollama)
import http from 'k6/http';
import { check, sleep } from 'k6';
import { Rate, Trend } from 'k6/metrics';
// ── Custom metrics ────────────────────────────────────────────────────────────
const errorRate = new Rate('custom_error_rate');
const chatLatency = new Trend('chat_latency_ms', true);
const healthLatency = new Trend('health_latency_ms', true);
// ── Test configuration ────────────────────────────────────────────────────────
export const options = {
stages: [
{ duration: '1m', target: 100 }, // ramp-up
{ duration: '8m', target: 1000 }, // sustained load
{ duration: '1m', target: 0 }, // ramp-down
],
thresholds: {
// SLA targets
http_req_duration: ['p(99)<300'], // p99 < 300ms
http_req_failed: ['rate<0.01'], // < 1% HTTP errors
custom_error_rate: ['rate<0.01'], // < 1% application errors
chat_latency_ms: ['p(99)<300'],
},
};
// ── Helpers ───────────────────────────────────────────────────────────────────
const BASE_URL = __ENV.BASE_URL || 'http://localhost:8090';
const AUTH_TOKEN = __ENV.AUTH_TOKEN || 'dev-token';
const MODEL = __ENV.MODEL || 'llama3.2';
const chatParams = {
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${AUTH_TOKEN}`,
},
timeout: '5s',
};
const chatBody = JSON.stringify({
model: MODEL,
messages: [{ role: 'user', content: 'Dis-moi bonjour en une phrase.' }],
stream: false,
});
// ── Default scenario ──────────────────────────────────────────────────────────
export default function () {
// 90% chat completions, 10% health checks (mirrors production traffic mix).
if (Math.random() < 0.9) {
const res = http.post(`${BASE_URL}/v1/chat/completions`, chatBody, chatParams);
const ok = check(res, {
'chat: status 200': (r) => r.status === 200,
'chat: has choices': (r) => {
try {
const body = JSON.parse(r.body);
return Array.isArray(body.choices) && body.choices.length > 0;
} catch (_) {
return false;
}
},
});
chatLatency.add(res.timings.duration);
errorRate.add(!ok);
} else {
const res = http.get(`${BASE_URL}/healthz`, { timeout: '2s' });
check(res, { 'health: status 200': (r) => r.status === 200 });
healthLatency.add(res.timings.duration);
}
// Think time: 0200ms random (simulates realistic inter-request spacing).
sleep(Math.random() * 0.2);
}
// ── Setup — verify proxy is reachable before starting ────────────────────────
export function setup() {
const res = http.get(`${BASE_URL}/healthz`);
if (res.status !== 200) {
throw new Error(`Proxy not reachable at ${BASE_URL}/healthz — status ${res.status}`);
}
console.log(`Load test starting. Target: ${BASE_URL}, model: ${MODEL}`);
}
// ── Teardown — summary ────────────────────────────────────────────────────────
export function teardown(data) {
console.log('Load test complete. Check thresholds in the summary above.');
}