103 lines
4.1 KiB
JavaScript
103 lines
4.1 KiB
JavaScript
// k6 load test for Veylant IA Proxy (E10-10).
|
||
//
|
||
// Targets:
|
||
// - p99 latency < 300ms
|
||
// - error rate < 1%
|
||
// - 1 000 VU sustained for 8 minutes
|
||
//
|
||
// Run (requires a running proxy + mock Ollama):
|
||
// k6 run test/k6/load_test.js
|
||
//
|
||
// Environment variables:
|
||
// BASE_URL — proxy base URL (default: http://localhost:8090)
|
||
// AUTH_TOKEN — Bearer token (default: dev-token)
|
||
// MODEL — LLM model name (default: llama3.2, routed to local Ollama)
|
||
|
||
import http from 'k6/http';
|
||
import { check, sleep } from 'k6';
|
||
import { Rate, Trend } from 'k6/metrics';
|
||
|
||
// ── Custom metrics ────────────────────────────────────────────────────────────
|
||
const errorRate = new Rate('custom_error_rate');
|
||
const chatLatency = new Trend('chat_latency_ms', true);
|
||
const healthLatency = new Trend('health_latency_ms', true);
|
||
|
||
// ── Test configuration ────────────────────────────────────────────────────────
|
||
export const options = {
|
||
stages: [
|
||
{ duration: '1m', target: 100 }, // ramp-up
|
||
{ duration: '8m', target: 1000 }, // sustained load
|
||
{ duration: '1m', target: 0 }, // ramp-down
|
||
],
|
||
thresholds: {
|
||
// SLA targets
|
||
http_req_duration: ['p(99)<300'], // p99 < 300ms
|
||
http_req_failed: ['rate<0.01'], // < 1% HTTP errors
|
||
custom_error_rate: ['rate<0.01'], // < 1% application errors
|
||
chat_latency_ms: ['p(99)<300'],
|
||
},
|
||
};
|
||
|
||
// ── Helpers ───────────────────────────────────────────────────────────────────
|
||
const BASE_URL = __ENV.BASE_URL || 'http://localhost:8090';
|
||
const AUTH_TOKEN = __ENV.AUTH_TOKEN || 'dev-token';
|
||
const MODEL = __ENV.MODEL || 'llama3.2';
|
||
|
||
const chatParams = {
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
'Authorization': `Bearer ${AUTH_TOKEN}`,
|
||
},
|
||
timeout: '5s',
|
||
};
|
||
|
||
const chatBody = JSON.stringify({
|
||
model: MODEL,
|
||
messages: [{ role: 'user', content: 'Dis-moi bonjour en une phrase.' }],
|
||
stream: false,
|
||
});
|
||
|
||
// ── Default scenario ──────────────────────────────────────────────────────────
|
||
export default function () {
|
||
// 90% chat completions, 10% health checks (mirrors production traffic mix).
|
||
if (Math.random() < 0.9) {
|
||
const res = http.post(`${BASE_URL}/v1/chat/completions`, chatBody, chatParams);
|
||
|
||
const ok = check(res, {
|
||
'chat: status 200': (r) => r.status === 200,
|
||
'chat: has choices': (r) => {
|
||
try {
|
||
const body = JSON.parse(r.body);
|
||
return Array.isArray(body.choices) && body.choices.length > 0;
|
||
} catch (_) {
|
||
return false;
|
||
}
|
||
},
|
||
});
|
||
|
||
chatLatency.add(res.timings.duration);
|
||
errorRate.add(!ok);
|
||
} else {
|
||
const res = http.get(`${BASE_URL}/healthz`, { timeout: '2s' });
|
||
check(res, { 'health: status 200': (r) => r.status === 200 });
|
||
healthLatency.add(res.timings.duration);
|
||
}
|
||
|
||
// Think time: 0–200ms random (simulates realistic inter-request spacing).
|
||
sleep(Math.random() * 0.2);
|
||
}
|
||
|
||
// ── Setup — verify proxy is reachable before starting ────────────────────────
|
||
export function setup() {
|
||
const res = http.get(`${BASE_URL}/healthz`);
|
||
if (res.status !== 200) {
|
||
throw new Error(`Proxy not reachable at ${BASE_URL}/healthz — status ${res.status}`);
|
||
}
|
||
console.log(`Load test starting. Target: ${BASE_URL}, model: ${MODEL}`);
|
||
}
|
||
|
||
// ── Teardown — summary ────────────────────────────────────────────────────────
|
||
export function teardown(data) {
|
||
console.log('Load test complete. Check thresholds in the summary above.');
|
||
}
|