153 lines
5.4 KiB
JavaScript
153 lines
5.4 KiB
JavaScript
// k6 multi-scenario load test for Veylant IA Proxy (E2-12, Sprint 12).
|
||
//
|
||
// Four scenarios in a single script:
|
||
// smoke — 1 VU, 1 min : sanity check (CI)
|
||
// load — ramp 0→50 VUs, 5 min plateau : steady-state validation
|
||
// stress — ramp 0→200 VUs : find the breaking point
|
||
// soak — 20 VUs, 30 min : detect memory leaks and slow degradation
|
||
//
|
||
// Select scenario via SCENARIO env var (default: load):
|
||
// k6 run --env SCENARIO=smoke test/k6/k6-load-test.js
|
||
// k6 run --env SCENARIO=stress test/k6/k6-load-test.js
|
||
//
|
||
// Other env vars:
|
||
// VEYLANT_URL — proxy base URL (default: http://localhost:8090)
|
||
// VEYLANT_TOKEN — Bearer token (default: dev-token)
|
||
// MODEL — model name (default: llama3.2)
|
||
//
|
||
// Run via Makefile:
|
||
// make load-test (load scenario)
|
||
// make load-test SCENARIO=stress (stress scenario)
|
||
|
||
import http from 'k6/http';
|
||
import { check, sleep, group } from 'k6';
|
||
import { Counter, Rate, Trend } from 'k6/metrics';
|
||
|
||
// ── Custom metrics ─────────────────────────────────────────────────────────────
|
||
const chatErrors = new Counter('veylant_chat_errors');
|
||
const healthErrors = new Counter('veylant_health_errors');
|
||
const errorRate = new Rate('veylant_error_rate');
|
||
const chatLatency = new Trend('veylant_chat_latency_ms', true);
|
||
|
||
// ── Config ─────────────────────────────────────────────────────────────────────
|
||
const VEYLANT_URL = __ENV.VEYLANT_URL || 'http://localhost:8090';
|
||
const TOKEN = __ENV.VEYLANT_TOKEN || 'dev-token';
|
||
const MODEL = __ENV.MODEL || 'llama3.2';
|
||
const SCENARIO = __ENV.SCENARIO || 'load';
|
||
|
||
const scenarios = {
|
||
smoke: {
|
||
executor: 'constant-vus',
|
||
vus: 1,
|
||
duration: '1m',
|
||
},
|
||
load: {
|
||
executor: 'ramping-vus',
|
||
startVUs: 0,
|
||
stages: [
|
||
{ duration: '1m', target: 50 }, // ramp up
|
||
{ duration: '5m', target: 50 }, // steady state
|
||
{ duration: '1m', target: 0 }, // ramp down
|
||
],
|
||
gracefulRampDown: '30s',
|
||
},
|
||
stress: {
|
||
executor: 'ramping-vus',
|
||
startVUs: 0,
|
||
stages: [
|
||
{ duration: '2m', target: 50 },
|
||
{ duration: '2m', target: 100 },
|
||
{ duration: '2m', target: 200 },
|
||
{ duration: '1m', target: 0 },
|
||
],
|
||
gracefulRampDown: '30s',
|
||
},
|
||
soak: {
|
||
executor: 'constant-vus',
|
||
vus: 20,
|
||
duration: '30m',
|
||
},
|
||
};
|
||
|
||
export const options = {
|
||
scenarios: { [SCENARIO]: scenarios[SCENARIO] },
|
||
thresholds: {
|
||
http_req_duration: ['p(99)<500', 'p(95)<200'],
|
||
http_req_failed: ['rate<0.01'],
|
||
veylant_error_rate: ['rate<0.01'],
|
||
veylant_chat_latency_ms: ['p(99)<500'],
|
||
},
|
||
};
|
||
|
||
// ── Request helpers ────────────────────────────────────────────────────────────
|
||
const chatParams = {
|
||
headers: {
|
||
'Content-Type': 'application/json',
|
||
'Authorization': `Bearer ${TOKEN}`,
|
||
},
|
||
timeout: '10s',
|
||
};
|
||
|
||
const chatBody = JSON.stringify({
|
||
model: MODEL,
|
||
messages: [{ role: 'user', content: 'Bonjour, résume en une phrase le principe de la RGPD.' }],
|
||
stream: false,
|
||
});
|
||
|
||
// ── Default function ───────────────────────────────────────────────────────────
|
||
export default function () {
|
||
// Mix: 90% chat completions, 10% health checks.
|
||
if (Math.random() < 0.9) {
|
||
group('chat_completions', () => {
|
||
const res = http.post(`${VEYLANT_URL}/v1/chat/completions`, chatBody, chatParams);
|
||
|
||
const ok = check(res, {
|
||
'chat: status 200': (r) => r.status === 200,
|
||
'chat: has choices': (r) => {
|
||
try {
|
||
const body = JSON.parse(r.body);
|
||
return Array.isArray(body.choices) && body.choices.length > 0;
|
||
} catch (_) {
|
||
return false;
|
||
}
|
||
},
|
||
'chat: no error key': (r) => !r.body.includes('"error"'),
|
||
});
|
||
|
||
chatLatency.add(res.timings.duration);
|
||
if (!ok) {
|
||
chatErrors.add(1);
|
||
errorRate.add(1);
|
||
} else {
|
||
errorRate.add(0);
|
||
}
|
||
});
|
||
} else {
|
||
group('health', () => {
|
||
const res = http.get(`${VEYLANT_URL}/healthz`, { timeout: '2s' });
|
||
const ok = check(res, {
|
||
'health: status 200': (r) => r.status === 200,
|
||
'health: body ok': (r) => r.body.includes('"ok"') || r.body.includes('"status"'),
|
||
});
|
||
if (!ok) healthErrors.add(1);
|
||
});
|
||
}
|
||
|
||
// Realistic inter-request think time: 100–500ms.
|
||
sleep(0.1 + Math.random() * 0.4);
|
||
}
|
||
|
||
// ── Setup — abort early if proxy is unreachable ────────────────────────────────
|
||
export function setup() {
|
||
const res = http.get(`${VEYLANT_URL}/healthz`, { timeout: '5s' });
|
||
if (res.status !== 200) {
|
||
throw new Error(`[setup] Proxy not reachable: ${VEYLANT_URL}/healthz → HTTP ${res.status}`);
|
||
}
|
||
console.log(`[setup] Scenario="${SCENARIO}" URL="${VEYLANT_URL}" model="${MODEL}"`);
|
||
return { startTime: new Date().toISOString() };
|
||
}
|
||
|
||
export function teardown(data) {
|
||
console.log(`[teardown] Test started at ${data.startTime}. Check threshold summary above.`);
|
||
}
|