71d14209a4
OmniRoute is an intelligent API gateway that unifies 20+ AI providers behind a single OpenAI-compatible endpoint. Features include intelligent routing with 6 strategies, multi-format translation (OpenAI/Claude/Gemini/Responses API), circuit breakers, semantic caching, combo fallback chains, real-time health monitoring, and a full dashboard with provider management, analytics, and CLI tool integration. Key highlights: - 20+ providers (Claude Code, Codex, Gemini CLI, GitHub Copilot, iFlow, Qwen, Kiro, etc.) - 6 routing strategies (Fill First, Round Robin, P2C, Random, Least Used, Cost Optimized) - Export/Import database backup with full archive support - Translator Playground with 4 modes (Playground, Chat Tester, Test Bench, Live Monitor) - 100% TypeScript across src/ and open-sse/ - Docker support with multi-stage builds - Comprehensive documentation and 9 dashboard screenshots
120 lines
4.2 KiB
JavaScript
120 lines
4.2 KiB
JavaScript
import test from "node:test";
|
|
import assert from "node:assert/strict";
|
|
|
|
const {
|
|
compressContext,
|
|
estimateTokens,
|
|
getTokenLimit,
|
|
} = await import("../../open-sse/services/contextManager.ts");
|
|
|
|
// ─── estimateTokens ─────────────────────────────────────────────────────────
|
|
|
|
test("estimateTokens: estimates from string", () => {
|
|
assert.equal(estimateTokens("hello"), 2); // 5/4 = 2
|
|
assert.ok(estimateTokens("a".repeat(100)) === 25);
|
|
});
|
|
|
|
test("estimateTokens: handles null", () => {
|
|
assert.equal(estimateTokens(null), 0);
|
|
assert.equal(estimateTokens(""), 0);
|
|
});
|
|
|
|
// ─── getTokenLimit ──────────────────────────────────────────────────────────
|
|
|
|
test("getTokenLimit: detects claude", () => {
|
|
assert.equal(getTokenLimit("claude", "claude-sonnet-4"), 200000);
|
|
});
|
|
|
|
test("getTokenLimit: detects gemini", () => {
|
|
assert.equal(getTokenLimit("gemini", "gemini-2.5-pro"), 1000000);
|
|
});
|
|
|
|
test("getTokenLimit: default fallback", () => {
|
|
assert.equal(getTokenLimit("unknown"), 128000);
|
|
});
|
|
|
|
// ─── compressContext ────────────────────────────────────────────────────────
|
|
|
|
test("compressContext: returns unchanged if fits", () => {
|
|
const body = {
|
|
model: "claude-sonnet-4",
|
|
messages: [
|
|
{ role: "system", content: "You are helpful." },
|
|
{ role: "user", content: "Hello" },
|
|
],
|
|
};
|
|
const result = compressContext(body);
|
|
assert.equal(result.compressed, false);
|
|
});
|
|
|
|
test("compressContext: handles null/empty body", () => {
|
|
assert.equal(compressContext(null).compressed, false);
|
|
assert.equal(compressContext({}).compressed, false);
|
|
assert.equal(compressContext({ messages: null }).compressed, false);
|
|
});
|
|
|
|
test("compressContext: Layer 1 — trims long tool messages", () => {
|
|
const longContent = "x".repeat(10000);
|
|
const body = {
|
|
model: "test",
|
|
messages: [
|
|
{ role: "user", content: "run tool" },
|
|
{ role: "tool", content: longContent, tool_call_id: "t1" },
|
|
{ role: "user", content: "done?" },
|
|
],
|
|
};
|
|
// Use very tight limit to force compression
|
|
const result = compressContext(body, { maxTokens: 500, reserveTokens: 100 });
|
|
assert.ok(result.compressed);
|
|
const toolMsg = result.body.messages.find((m) => m.role === "tool");
|
|
assert.ok(toolMsg.content.length < longContent.length);
|
|
assert.ok(toolMsg.content.includes("[truncated]"));
|
|
});
|
|
|
|
test("compressContext: Layer 2 — compresses thinking in old messages", () => {
|
|
const body = {
|
|
model: "test",
|
|
messages: [
|
|
{ role: "user", content: "q1" },
|
|
{
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "thinking", thinking: "lots of thinking here ".repeat(500) },
|
|
{ type: "text", text: "answer1" },
|
|
],
|
|
},
|
|
{ role: "user", content: "q2" },
|
|
{
|
|
role: "assistant",
|
|
content: [
|
|
{ type: "thinking", thinking: "more thinking" },
|
|
{ type: "text", text: "answer2" },
|
|
],
|
|
},
|
|
],
|
|
};
|
|
const result = compressContext(body, { maxTokens: 2000, reserveTokens: 500 });
|
|
// First assistant should have thinking removed
|
|
const firstAssistant = result.body.messages.find((m) => m.role === "assistant");
|
|
if (Array.isArray(firstAssistant.content)) {
|
|
const hasThinking = firstAssistant.content.some((b) => b.type === "thinking");
|
|
assert.equal(hasThinking, false);
|
|
}
|
|
});
|
|
|
|
test("compressContext: Layer 3 — drops old messages to fit", () => {
|
|
const messages = [
|
|
{ role: "system", content: "You are helpful" },
|
|
...Array.from({ length: 100 }, (_, i) => [
|
|
{ role: "user", content: `Message ${i}: ${"content ".repeat(50)}` },
|
|
{ role: "assistant", content: `Response ${i}: ${"answer ".repeat(50)}` },
|
|
]).flat(),
|
|
];
|
|
const body = { model: "test", messages };
|
|
const result = compressContext(body, { maxTokens: 3000, reserveTokens: 500 });
|
|
assert.ok(result.compressed);
|
|
assert.ok(result.body.messages.length < messages.length);
|
|
// System message preserved
|
|
assert.equal(result.body.messages[0].role, "system");
|
|
});
|