bddec84f4e
Introduce full AI orchestration ecosystem: - MCP Server with 16 tools, scoped auth, and audit logging - A2A v0.3 server with JSON-RPC 2.0, SSE streaming, and task manager - Auto-Combo engine with 6-factor scoring and self-healing - VS Code extension with smart dispatch and budget tracking - Harden CI pipeline: add static checks, remove continue-on-error - Add translator schema validation tests - Update .gitignore and CHANGELOG for release checklist
150 lines
4.1 KiB
JavaScript
150 lines
4.1 KiB
JavaScript
/**
|
|
* OmniRoute — k6 Load / Performance Test (T-5)
|
|
*
|
|
* Tests the proxy endpoint under sustained load to measure:
|
|
* - Request throughput (RPS)
|
|
* - Response latency (p50, p95, p99)
|
|
* - Error rate
|
|
* - Concurrent connection handling
|
|
*
|
|
* Usage:
|
|
* k6 run tests/load/proxy-load.js
|
|
* k6 run tests/load/proxy-load.js --env BASE_URL=https://llms.omniroute.online
|
|
* k6 run tests/load/proxy-load.js --env VUS=50 --env DURATION=120s
|
|
*
|
|
* Prerequisites:
|
|
* - k6 installed: https://grafana.com/docs/k6/latest/set-up/install-k6/
|
|
* - OMNIROUTE_API_KEY env var or --env API_KEY=... set
|
|
*/
|
|
|
|
import http from "k6/http";
|
|
import { check, sleep } from "k6";
|
|
import { Rate, Trend } from "k6/metrics";
|
|
|
|
// ── Custom metrics ──
|
|
const errorRate = new Rate("errors");
|
|
const chatLatency = new Trend("chat_latency", true); // in ms
|
|
const healthLatency = new Trend("health_latency", true);
|
|
|
|
// ── Configuration ──
|
|
const BASE_URL = __ENV.BASE_URL || "http://localhost:3000";
|
|
const API_KEY = __ENV.API_KEY || __ENV.OMNIROUTE_API_KEY || "test-key";
|
|
const VUS = parseInt(__ENV.VUS || "10", 10);
|
|
const DURATION = __ENV.DURATION || "60s";
|
|
|
|
export const options = {
|
|
scenarios: {
|
|
// Ramp-up scenario for stress testing
|
|
chat_stress: {
|
|
executor: "ramping-vus",
|
|
startVUs: 1,
|
|
stages: [
|
|
{ duration: "10s", target: VUS }, // Ramp up
|
|
{ duration: DURATION, target: VUS }, // Sustained load
|
|
{ duration: "10s", target: 0 }, // Ramp down
|
|
],
|
|
exec: "chatCompletions",
|
|
},
|
|
// Constant rate for health checks
|
|
health_check: {
|
|
executor: "constant-vus",
|
|
vus: 2,
|
|
duration: DURATION,
|
|
exec: "healthCheck",
|
|
},
|
|
},
|
|
thresholds: {
|
|
http_req_duration: ["p(95)<5000"], // 95% of requests < 5s
|
|
errors: ["rate<0.1"], // Error rate < 10%
|
|
chat_latency: ["p(50)<3000", "p(95)<8000"],
|
|
health_latency: ["p(95)<500"],
|
|
},
|
|
};
|
|
|
|
// ── Headers ──
|
|
const headers = {
|
|
"Content-Type": "application/json",
|
|
Authorization: `Bearer ${API_KEY}`,
|
|
};
|
|
|
|
// ── Scenarios ──
|
|
|
|
/**
|
|
* Chat Completions — main proxy endpoint
|
|
* Sends a simple non-streaming chat request.
|
|
*/
|
|
export function chatCompletions() {
|
|
const payload = JSON.stringify({
|
|
model: "gpt-4o-mini",
|
|
messages: [{ role: "user", content: "Say hello in one word." }],
|
|
temperature: 0,
|
|
max_tokens: 10,
|
|
stream: false,
|
|
});
|
|
|
|
const res = http.post(`${BASE_URL}/v1/chat/completions`, payload, {
|
|
headers,
|
|
timeout: "15s",
|
|
});
|
|
|
|
chatLatency.add(res.timings.duration);
|
|
|
|
const passed = check(res, {
|
|
"status is 200": (r) => r.status === 200,
|
|
"has choices": (r) => {
|
|
try {
|
|
const body = JSON.parse(r.body);
|
|
return body.choices && body.choices.length > 0;
|
|
} catch {
|
|
return false;
|
|
}
|
|
},
|
|
"response time < 10s": (r) => r.timings.duration < 10000,
|
|
});
|
|
|
|
errorRate.add(!passed);
|
|
sleep(0.5);
|
|
}
|
|
|
|
/**
|
|
* Health Check — lightweight endpoint to measure base latency.
|
|
*/
|
|
export function healthCheck() {
|
|
const res = http.get(`${BASE_URL}/api/monitoring/health`, {
|
|
headers: { Authorization: `Bearer ${API_KEY}` },
|
|
timeout: "5s",
|
|
});
|
|
|
|
healthLatency.add(res.timings.duration);
|
|
|
|
const passed = check(res, {
|
|
"health status 200": (r) => r.status === 200,
|
|
"response time < 1s": (r) => r.timings.duration < 1000,
|
|
});
|
|
|
|
errorRate.add(!passed);
|
|
sleep(2);
|
|
}
|
|
|
|
/**
|
|
* Summary handler — outputs a custom summary.
|
|
*/
|
|
export function handleSummary(data) {
|
|
const summary = {
|
|
timestamp: new Date().toISOString(),
|
|
scenarios: Object.keys(options.scenarios),
|
|
metrics: {
|
|
http_reqs: data.metrics.http_reqs?.values?.count || 0,
|
|
avg_duration_ms: Math.round(data.metrics.http_req_duration?.values?.avg || 0),
|
|
p95_duration_ms: Math.round(data.metrics.http_req_duration?.values?.["p(95)"] || 0),
|
|
p99_duration_ms: Math.round(data.metrics.http_req_duration?.values?.["p(99)"] || 0),
|
|
error_rate: (data.metrics.errors?.values?.rate || 0).toFixed(4),
|
|
},
|
|
};
|
|
|
|
return {
|
|
stdout: `\n📊 Load Test Summary\n${JSON.stringify(summary, null, 2)}\n`,
|
|
"tests/load/results.json": JSON.stringify(summary, null, 2),
|
|
};
|
|
}
|