// k6 load test for Veylant IA Proxy (E10-10). // // Targets: // - p99 latency < 300ms // - error rate < 1% // - 1 000 VU sustained for 8 minutes // // Run (requires a running proxy + mock Ollama): // k6 run test/k6/load_test.js // // Environment variables: // BASE_URL — proxy base URL (default: http://localhost:8090) // AUTH_TOKEN — Bearer token (default: dev-token) // MODEL — LLM model name (default: llama3.2, routed to local Ollama) import http from 'k6/http'; import { check, sleep } from 'k6'; import { Rate, Trend } from 'k6/metrics'; // ── Custom metrics ──────────────────────────────────────────────────────────── const errorRate = new Rate('custom_error_rate'); const chatLatency = new Trend('chat_latency_ms', true); const healthLatency = new Trend('health_latency_ms', true); // ── Test configuration ──────────────────────────────────────────────────────── export const options = { stages: [ { duration: '1m', target: 100 }, // ramp-up { duration: '8m', target: 1000 }, // sustained load { duration: '1m', target: 0 }, // ramp-down ], thresholds: { // SLA targets http_req_duration: ['p(99)<300'], // p99 < 300ms http_req_failed: ['rate<0.01'], // < 1% HTTP errors custom_error_rate: ['rate<0.01'], // < 1% application errors chat_latency_ms: ['p(99)<300'], }, }; // ── Helpers ─────────────────────────────────────────────────────────────────── const BASE_URL = __ENV.BASE_URL || 'http://localhost:8090'; const AUTH_TOKEN = __ENV.AUTH_TOKEN || 'dev-token'; const MODEL = __ENV.MODEL || 'llama3.2'; const chatParams = { headers: { 'Content-Type': 'application/json', 'Authorization': `Bearer ${AUTH_TOKEN}`, }, timeout: '5s', }; const chatBody = JSON.stringify({ model: MODEL, messages: [{ role: 'user', content: 'Dis-moi bonjour en une phrase.' }], stream: false, }); // ── Default scenario ────────────────────────────────────────────────────────── export default function () { // 90% chat completions, 10% health checks (mirrors production traffic mix). if (Math.random() < 0.9) { const res = http.post(`${BASE_URL}/v1/chat/completions`, chatBody, chatParams); const ok = check(res, { 'chat: status 200': (r) => r.status === 200, 'chat: has choices': (r) => { try { const body = JSON.parse(r.body); return Array.isArray(body.choices) && body.choices.length > 0; } catch (_) { return false; } }, }); chatLatency.add(res.timings.duration); errorRate.add(!ok); } else { const res = http.get(`${BASE_URL}/healthz`, { timeout: '2s' }); check(res, { 'health: status 200': (r) => r.status === 200 }); healthLatency.add(res.timings.duration); } // Think time: 0–200ms random (simulates realistic inter-request spacing). sleep(Math.random() * 0.2); } // ── Setup — verify proxy is reachable before starting ──────────────────────── export function setup() { const res = http.get(`${BASE_URL}/healthz`); if (res.status !== 200) { throw new Error(`Proxy not reachable at ${BASE_URL}/healthz — status ${res.status}`); } console.log(`Load test starting. Target: ${BASE_URL}, model: ${MODEL}`); } // ── Teardown — summary ──────────────────────────────────────────────────────── export function teardown(data) { console.log('Load test complete. Check thresholds in the summary above.'); }