Files
OmniRoute/tests/load/proxy-load.js
T
diegosouzapw bddec84f4e feat: add MCP server, A2A protocol, auto-combo engine & VS Code extension
Introduce full AI orchestration ecosystem:
- MCP Server with 16 tools, scoped auth, and audit logging
- A2A v0.3 server with JSON-RPC 2.0, SSE streaming, and task manager
- Auto-Combo engine with 6-factor scoring and self-healing
- VS Code extension with smart dispatch and budget tracking
- Harden CI pipeline: add static checks, remove continue-on-error
- Add translator schema validation tests
- Update .gitignore and CHANGELOG for release checklist
2026-03-04 18:45:02 -03:00

150 lines
4.1 KiB
JavaScript

/**
* OmniRoute — k6 Load / Performance Test (T-5)
*
* Tests the proxy endpoint under sustained load to measure:
* - Request throughput (RPS)
* - Response latency (p50, p95, p99)
* - Error rate
* - Concurrent connection handling
*
* Usage:
* k6 run tests/load/proxy-load.js
* k6 run tests/load/proxy-load.js --env BASE_URL=https://llms.omniroute.online
* k6 run tests/load/proxy-load.js --env VUS=50 --env DURATION=120s
*
* Prerequisites:
* - k6 installed: https://grafana.com/docs/k6/latest/set-up/install-k6/
* - OMNIROUTE_API_KEY env var or --env API_KEY=... set
*/
import http from "k6/http";
import { check, sleep } from "k6";
import { Rate, Trend } from "k6/metrics";
// ── Custom metrics ──
const errorRate = new Rate("errors");
const chatLatency = new Trend("chat_latency", true); // in ms
const healthLatency = new Trend("health_latency", true);
// ── Configuration ──
const BASE_URL = __ENV.BASE_URL || "http://localhost:3000";
const API_KEY = __ENV.API_KEY || __ENV.OMNIROUTE_API_KEY || "test-key";
const VUS = parseInt(__ENV.VUS || "10", 10);
const DURATION = __ENV.DURATION || "60s";
export const options = {
scenarios: {
// Ramp-up scenario for stress testing
chat_stress: {
executor: "ramping-vus",
startVUs: 1,
stages: [
{ duration: "10s", target: VUS }, // Ramp up
{ duration: DURATION, target: VUS }, // Sustained load
{ duration: "10s", target: 0 }, // Ramp down
],
exec: "chatCompletions",
},
// Constant rate for health checks
health_check: {
executor: "constant-vus",
vus: 2,
duration: DURATION,
exec: "healthCheck",
},
},
thresholds: {
http_req_duration: ["p(95)<5000"], // 95% of requests < 5s
errors: ["rate<0.1"], // Error rate < 10%
chat_latency: ["p(50)<3000", "p(95)<8000"],
health_latency: ["p(95)<500"],
},
};
// ── Headers ──
const headers = {
"Content-Type": "application/json",
Authorization: `Bearer ${API_KEY}`,
};
// ── Scenarios ──
/**
* Chat Completions — main proxy endpoint
* Sends a simple non-streaming chat request.
*/
export function chatCompletions() {
const payload = JSON.stringify({
model: "gpt-4o-mini",
messages: [{ role: "user", content: "Say hello in one word." }],
temperature: 0,
max_tokens: 10,
stream: false,
});
const res = http.post(`${BASE_URL}/v1/chat/completions`, payload, {
headers,
timeout: "15s",
});
chatLatency.add(res.timings.duration);
const passed = check(res, {
"status is 200": (r) => r.status === 200,
"has choices": (r) => {
try {
const body = JSON.parse(r.body);
return body.choices && body.choices.length > 0;
} catch {
return false;
}
},
"response time < 10s": (r) => r.timings.duration < 10000,
});
errorRate.add(!passed);
sleep(0.5);
}
/**
* Health Check — lightweight endpoint to measure base latency.
*/
export function healthCheck() {
const res = http.get(`${BASE_URL}/api/monitoring/health`, {
headers: { Authorization: `Bearer ${API_KEY}` },
timeout: "5s",
});
healthLatency.add(res.timings.duration);
const passed = check(res, {
"health status 200": (r) => r.status === 200,
"response time < 1s": (r) => r.timings.duration < 1000,
});
errorRate.add(!passed);
sleep(2);
}
/**
* Summary handler — outputs a custom summary.
*/
export function handleSummary(data) {
const summary = {
timestamp: new Date().toISOString(),
scenarios: Object.keys(options.scenarios),
metrics: {
http_reqs: data.metrics.http_reqs?.values?.count || 0,
avg_duration_ms: Math.round(data.metrics.http_req_duration?.values?.avg || 0),
p95_duration_ms: Math.round(data.metrics.http_req_duration?.values?.["p(95)"] || 0),
p99_duration_ms: Math.round(data.metrics.http_req_duration?.values?.["p(99)"] || 0),
error_rate: (data.metrics.errors?.values?.rate || 0).toFixed(4),
},
};
return {
stdout: `\n📊 Load Test Summary\n${JSON.stringify(summary, null, 2)}\n`,
"tests/load/results.json": JSON.stringify(summary, null, 2),
};
}