Compare commits
33 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 370070f489 | |||
| 7168f4014d | |||
| f0912feefb | |||
| af338d447b | |||
| 6fad06f659 | |||
| 1d51d8ff27 | |||
| 8af9bd1ac3 | |||
| 9fc3845d92 | |||
| 93bbe8e7a8 | |||
| 46acd16999 | |||
| 5ad2c6abf6 | |||
| d5781d60bd | |||
| e464a95c5a | |||
| a50ea4bb9e | |||
| aa11bb6d93 | |||
| 319018f055 | |||
| 394b986ccb | |||
| 26f7b36ce4 | |||
| f0daad10ce | |||
| 0bc557fb8b | |||
| 3571421a0e | |||
| aed80f3e4f | |||
| fdaeccf1e5 | |||
| 7723e46c26 | |||
| dce355cce6 | |||
| 213e7b7093 | |||
| fe7d8f93a1 | |||
| 9e2f4216f9 | |||
| a48f7b2222 | |||
| 0b85d8a9bc | |||
| 58d6938065 | |||
| a536a2b822 | |||
| 769be46bf9 |
@@ -0,0 +1,43 @@
|
||||
name: Sync Upstream
|
||||
|
||||
on:
|
||||
schedule:
|
||||
# Run every 6 hours
|
||||
- cron: '0 */6 * * *'
|
||||
workflow_dispatch:
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
sync:
|
||||
name: Sync with upstream
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Configure Git
|
||||
run: |
|
||||
git config user.name "github-actions[bot]"
|
||||
git config user.email "github-actions[bot]@users.noreply.github.com"
|
||||
|
||||
- name: Fetch upstream
|
||||
run: |
|
||||
git remote add upstream https://github.com/diegosouzapw/OmniRoute.git || true
|
||||
git fetch upstream
|
||||
git fetch origin
|
||||
|
||||
- name: Sync main branch
|
||||
run: |
|
||||
git checkout main
|
||||
git merge upstream/main --no-edit || {
|
||||
echo "Merge conflict detected. Manual intervention required."
|
||||
exit 1
|
||||
}
|
||||
|
||||
- name: Push changes
|
||||
run: git push https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/tombii/OmniRoute.git main
|
||||
@@ -2,6 +2,51 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
---
|
||||
|
||||
## [3.3.0] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Release Stabilization** — Finalized v3.2.9 release (combo diagnostics, quality gates, Gemini tool fix) and created missing git tag. Consolidated all staged changes into a single atomic release commit.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Auto-Update Test** — Fixed `buildDockerComposeUpdateScript` test assertion to match unexpanded shell variable references (`$TARGET_TAG`, `${TARGET_TAG#v}`) in the generated deploy script, aligning with the refactored template from v3.2.8.
|
||||
- **Circuit Breaker Test** — Hardened `combo-circuit-breaker.test.mjs` by injecting `maxRetries: 0` to prevent retry inflation from skewing failure count assertions during breaker state transitions.
|
||||
|
||||
---
|
||||
|
||||
## [3.2.9] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Combo Diagnostics** — Introduced a live test bypass flag (`forceLiveComboTest`) allowing administrators to execute real upstream health checks that bypass all local circuit-breaker and cooldown state mechanisms, enabling precise diagnostics during rolling outages (PR #759)
|
||||
- **Quality Gates** — Added automated response quality validation for combos and officially integrated `claude-4.6` model support into the core routing schemas (PR #762)
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Tool Definition Validation** — Repaired Gemini API integration by normalizing enum types inside tool definitions, preventing upstream HTTP 400 parameter errors (PR #760)
|
||||
|
||||
---
|
||||
|
||||
## [3.2.8] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Docker Auto-Update UI** — Integrated a detached background update process for Docker Compose deployments. The Dashboard UI now seamlessly tracks update lifecycle events combining JSON REST responses with SSE streaming progress overlays for robust cross-environment reliability.
|
||||
- **Cache Analytics** — Repaired zero-metrics visualization mapping by migrating Semantic Cache telemetry logs directly into the centralized tracking SQLite module.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Authentication Logic** — Fixed a bug where saving dashboard settings or adding models failed with a 401 Unauthorized error when `requireLogin` was disabled. API endpoints now correctly evaluate the global authentication toggle. Resolved global redirection by reactivating `src/middleware.ts`.
|
||||
- **CLI Tool Detection (Windows)** — Prevented fatal initialization exceptions during CLI environment detection by catching `cross-spawn` ENOENT errors correctly. Adds explicit detection paths for `\AppData\Local\droid\droid.exe`.
|
||||
- **Codex Native Passthrough** — Normalized model translation parameters preventing context poisoning in proxy pass-through mode, enforcing generic `store: false` constraints explicitly for all Codex-originated requests.
|
||||
- **SSE Token Reporting** — Normalized provider tool-call chunk `finish_reason` detection, fixing 0% Usage analytics for stream-only responses missing strict `<DONE>` indicators.
|
||||
- **DeepSeek <think> Tags** — Implemented an explicit `<think>` extraction mapping inside `responsesHandler.ts`, ensuring DeepSeek reasoning streams map equivalently to native Anthropic `<thinking>` structures.
|
||||
|
||||
---
|
||||
|
||||
## [3.2.7] - 2026-03-29
|
||||
|
||||
### Fixed
|
||||
|
||||
+1
-1
@@ -60,7 +60,7 @@ FROM runner-base AS runner-cli
|
||||
|
||||
# Install system dependencies required by openclaw (git+ssh references).
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends git ca-certificates \
|
||||
&& apt-get install -y --no-install-recommends git ca-certificates docker.io docker-compose \
|
||||
&& rm -rf /var/lib/apt/lists/* \
|
||||
&& git config --system url."https://github.com/".insteadOf "ssh://git@github.com/"
|
||||
|
||||
|
||||
@@ -59,6 +59,11 @@ services:
|
||||
ports:
|
||||
- "${DASHBOARD_PORT:-${PORT:-20128}}:${DASHBOARD_PORT:-${PORT:-20128}}"
|
||||
- "${API_PORT:-20129}:${API_PORT:-20129}"
|
||||
volumes:
|
||||
- omniroute-data:/app/data
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /usr/libexec/docker/cli-plugins:/usr/libexec/docker/cli-plugins:ro
|
||||
- ${AUTO_UPDATE_HOST_REPO_DIR:-.}:/workspace/omniroute:rw
|
||||
profiles:
|
||||
- cli
|
||||
|
||||
|
||||
+1
-1
@@ -43,7 +43,7 @@ See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot,
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo via a real upstream request |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: OmniRoute API
|
||||
version: 3.2.7
|
||||
version: 3.3.0
|
||||
description: |
|
||||
OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
|
||||
endpoint that routes requests to multiple AI providers with load balancing,
|
||||
|
||||
@@ -500,6 +500,12 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
clientVersion: "1.1.3",
|
||||
models: [
|
||||
{ id: "default", name: "Auto (Server Picks)" },
|
||||
{ id: "claude-4.6-opus-high-thinking", name: "Claude 4.6 Opus High Thinking" },
|
||||
{ id: "claude-4.6-opus-high", name: "Claude 4.6 Opus High" },
|
||||
{ id: "claude-4.6-sonnet-high-thinking", name: "Claude 4.6 Sonnet High Thinking" },
|
||||
{ id: "claude-4.6-sonnet-high", name: "Claude 4.6 Sonnet High" },
|
||||
{ id: "claude-4.6-haiku", name: "Claude 4.6 Haiku" },
|
||||
{ id: "claude-4.6-opus", name: "Claude 4.6 Opus" },
|
||||
{ id: "claude-4.5-opus-high-thinking", name: "Claude 4.5 Opus High Thinking" },
|
||||
{ id: "claude-4.5-opus-high", name: "Claude 4.5 Opus High" },
|
||||
{ id: "claude-4.5-sonnet-thinking", name: "Claude 4.5 Sonnet Thinking" },
|
||||
|
||||
@@ -260,11 +260,9 @@ export class CodexExecutor extends BaseExecutor {
|
||||
body.service_tier = CODEX_FAST_WIRE_VALUE;
|
||||
}
|
||||
|
||||
if (nativeCodexPassthrough) {
|
||||
return body;
|
||||
}
|
||||
|
||||
// If no instructions provided, inject default Codex instructions
|
||||
// NOTE: must run before the passthrough return — Codex upstream rejects
|
||||
// requests without instructions even when the body is forwarded as-is.
|
||||
if (!body.instructions || body.instructions.trim() === "") {
|
||||
body.instructions = CODEX_DEFAULT_INSTRUCTIONS;
|
||||
}
|
||||
@@ -272,6 +270,10 @@ export class CodexExecutor extends BaseExecutor {
|
||||
// Ensure store is false (Codex requirement)
|
||||
body.store = false;
|
||||
|
||||
if (nativeCodexPassthrough) {
|
||||
return body;
|
||||
}
|
||||
|
||||
// Extract thinking level from model name suffix
|
||||
// e.g., gpt-5.3-codex-high → high, gpt-5.3-codex → medium (default)
|
||||
const effortLevels = ["none", "low", "medium", "high", "xhigh"];
|
||||
|
||||
@@ -42,6 +42,9 @@ import {
|
||||
getModelUpstreamExtraHeaders,
|
||||
} from "@/lib/localDb";
|
||||
import { getExecutor } from "../executors/index.ts";
|
||||
import { getCacheControlSettings } from "@/lib/cacheControlSettings";
|
||||
import { shouldPreserveCacheControl } from "../utils/cacheControlPolicy.ts";
|
||||
import { getCacheMetrics } from "@/lib/db/settings.ts";
|
||||
|
||||
import {
|
||||
parseCodexQuotaHeaders,
|
||||
@@ -306,6 +309,11 @@ function attachLogMeta(
|
||||
* @param {function} options.onDisconnect - Callback when client disconnects
|
||||
* @param {string} options.connectionId - Connection ID for usage tracking
|
||||
* @param {object} options.apiKeyInfo - API key metadata for usage attribution
|
||||
* @param {string} options.userAgent - Client user agent for caching decisions
|
||||
* @param {string} options.comboName - Combo name if this is a combo request
|
||||
* @param {string} options.comboStrategy - Combo routing strategy (e.g., 'priority', 'cost-optimized')
|
||||
* @param {boolean} options.isCombo - Whether this request is from a combo
|
||||
* @param {string} options.connectionId - Connection ID for settings lookup
|
||||
*/
|
||||
export async function handleChatCore({
|
||||
body,
|
||||
@@ -320,6 +328,8 @@ export async function handleChatCore({
|
||||
apiKeyInfo = null,
|
||||
userAgent,
|
||||
comboName,
|
||||
comboStrategy = null,
|
||||
isCombo = false,
|
||||
}) {
|
||||
let { provider, model, extendedContext } = modelInfo;
|
||||
const requestedModel =
|
||||
@@ -674,6 +684,25 @@ export async function handleChatCore({
|
||||
// Translate request (pass reqLogger for intermediate logging)
|
||||
let translatedBody = body;
|
||||
const isClaudePassthrough = sourceFormat === FORMATS.CLAUDE && targetFormat === FORMATS.CLAUDE;
|
||||
|
||||
// Determine if we should preserve client-side cache_control headers
|
||||
// Fetch settings from DB to get user preference
|
||||
const cacheControlMode = await getCacheControlSettings().catch(() => "auto" as const);
|
||||
const preserveCacheControl = shouldPreserveCacheControl({
|
||||
userAgent,
|
||||
isCombo,
|
||||
comboStrategy,
|
||||
targetProvider: provider,
|
||||
settings: { alwaysPreserveClientCache: cacheControlMode },
|
||||
});
|
||||
|
||||
if (preserveCacheControl) {
|
||||
log?.debug?.(
|
||||
"CACHE",
|
||||
`Preserving client cache_control (client=${userAgent?.substring(0, 20)}, combo=${isCombo}, strategy=${comboStrategy}, provider=${provider})`
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
if (nativeCodexPassthrough) {
|
||||
translatedBody = { ...body, _nativeCodexPassthrough: true };
|
||||
@@ -701,7 +730,7 @@ export async function handleChatCore({
|
||||
credentials,
|
||||
provider,
|
||||
reqLogger,
|
||||
{ normalizeToolCallId, preserveDeveloperRole }
|
||||
{ normalizeToolCallId, preserveDeveloperRole, preserveCacheControl }
|
||||
);
|
||||
translatedBody = translateRequest(
|
||||
FORMATS.OPENAI,
|
||||
@@ -712,7 +741,7 @@ export async function handleChatCore({
|
||||
credentials,
|
||||
provider,
|
||||
reqLogger,
|
||||
{ normalizeToolCallId, preserveDeveloperRole }
|
||||
{ normalizeToolCallId, preserveDeveloperRole, preserveCacheControl }
|
||||
);
|
||||
log?.debug?.("FORMAT", "claude->openai->claude normalized passthrough");
|
||||
} else {
|
||||
@@ -816,7 +845,7 @@ export async function handleChatCore({
|
||||
credentials,
|
||||
provider,
|
||||
reqLogger,
|
||||
{ normalizeToolCallId, preserveDeveloperRole }
|
||||
{ normalizeToolCallId, preserveDeveloperRole, preserveCacheControl }
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
@@ -1406,6 +1435,18 @@ export async function handleChatCore({
|
||||
const msg = `[${new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit" })}] 📊 [USAGE] ${provider.toUpperCase()} | in=${getLoggedInputTokens(usage)} | out=${getLoggedOutputTokens(usage)}${connectionId ? ` | account=${connectionId.slice(0, 8)}...` : ""}`;
|
||||
console.log(`${COLORS.green}${msg}${COLORS.reset}`);
|
||||
|
||||
// Track cache token metrics
|
||||
const inputTokens = usage.prompt_tokens || 0;
|
||||
const cachedTokens = toPositiveNumber(
|
||||
usage.cache_read_input_tokens ??
|
||||
usage.cached_tokens ??
|
||||
(usage as any).prompt_tokens_details?.cached_tokens
|
||||
);
|
||||
const cacheCreationTokens = toPositiveNumber(
|
||||
usage.cache_creation_input_tokens ??
|
||||
(usage as any).prompt_tokens_details?.cache_creation_tokens
|
||||
);
|
||||
|
||||
saveRequestUsage({
|
||||
provider: provider || "unknown",
|
||||
model: model || "unknown",
|
||||
@@ -1549,8 +1590,41 @@ export async function handleChatCore({
|
||||
responseBody: streamResponseBody,
|
||||
providerPayload,
|
||||
clientPayload,
|
||||
ttft,
|
||||
}) => {
|
||||
const cacheUsageLogMeta = buildCacheUsageLogMeta(streamUsage);
|
||||
|
||||
// Track cache token metrics for streaming responses
|
||||
if (streamUsage && typeof streamUsage === "object") {
|
||||
const inputTokens = streamUsage.prompt_tokens || 0;
|
||||
const cachedTokens = toPositiveNumber(
|
||||
streamUsage.cache_read_input_tokens ??
|
||||
streamUsage.cached_tokens ??
|
||||
(streamUsage as any).prompt_tokens_details?.cached_tokens
|
||||
);
|
||||
const cacheCreationTokens = toPositiveNumber(
|
||||
streamUsage.cache_creation_input_tokens ??
|
||||
(streamUsage as any).prompt_tokens_details?.cache_creation_tokens
|
||||
);
|
||||
|
||||
saveRequestUsage({
|
||||
provider: provider || "unknown",
|
||||
model: model || "unknown",
|
||||
tokens: streamUsage,
|
||||
status: String(streamStatus || 200),
|
||||
success: streamStatus === 200,
|
||||
latencyMs: Date.now() - startTime,
|
||||
timeToFirstTokenMs: ttft,
|
||||
errorCode: null,
|
||||
timestamp: new Date().toISOString(),
|
||||
connectionId: connectionId || undefined,
|
||||
apiKeyId: apiKeyInfo?.id || undefined,
|
||||
apiKeyName: apiKeyInfo?.name || undefined,
|
||||
}).catch((err) => {
|
||||
console.error("Failed to save usage stats:", err.message);
|
||||
});
|
||||
}
|
||||
|
||||
persistAttemptLogs({
|
||||
status: streamStatus || 200,
|
||||
tokens: streamUsage || {},
|
||||
|
||||
@@ -80,16 +80,24 @@ export async function handleEmbedding({
|
||||
};
|
||||
}
|
||||
|
||||
// Build upstream request
|
||||
// Build upstream request — start with standard fields, then forward any extras
|
||||
// the client sent (e.g. input_type, user, truncate for NVIDIA NIM asymmetric models).
|
||||
const KNOWN_FIELDS = new Set(["model", "input", "dimensions", "encoding_format"]);
|
||||
|
||||
const upstreamBody: Record<string, unknown> = {
|
||||
model: model,
|
||||
input: body.input,
|
||||
};
|
||||
|
||||
// Pass optional parameters
|
||||
if (body.dimensions !== undefined) upstreamBody.dimensions = body.dimensions;
|
||||
if (body.encoding_format !== undefined) upstreamBody.encoding_format = body.encoding_format;
|
||||
|
||||
for (const [key, value] of Object.entries(body)) {
|
||||
if (!KNOWN_FIELDS.has(key) && value !== undefined) {
|
||||
upstreamBody[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Build headers
|
||||
const headers = {
|
||||
"Content-Type": "application/json",
|
||||
@@ -104,6 +112,12 @@ export async function handleEmbedding({
|
||||
} else if (providerConfig.authHeader === "x-api-key") {
|
||||
headers["x-api-key"] = token;
|
||||
}
|
||||
} else if (providerConfig.authType !== "none") {
|
||||
return {
|
||||
success: false,
|
||||
status: 401,
|
||||
error: `No valid authentication token for provider ${provider}. Check provider credentials.`,
|
||||
};
|
||||
}
|
||||
|
||||
if (log) {
|
||||
|
||||
@@ -52,6 +52,10 @@ export function parseSSEToOpenAIResponse(rawSSE, fallbackModel) {
|
||||
if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) {
|
||||
reasoningParts.push(delta.reasoning_content);
|
||||
}
|
||||
// Normalize `reasoning` alias (NVIDIA kimi-k2.5 etc.)
|
||||
if (typeof delta.reasoning === "string" && delta.reasoning.length > 0 && !delta.reasoning_content) {
|
||||
reasoningParts.push(delta.reasoning);
|
||||
}
|
||||
|
||||
// T18: Accumulate tool calls correctly across streamed chunks
|
||||
if (delta.tool_calls) {
|
||||
@@ -94,12 +98,14 @@ export function parseSSEToOpenAIResponse(rawSSE, fallbackModel) {
|
||||
}
|
||||
}
|
||||
|
||||
const joinedContent = contentParts.length > 0 ? contentParts.join("").trim() : null;
|
||||
const joinedReasoning = reasoningParts.length > 0 ? reasoningParts.join("").trim() : null;
|
||||
const message: Record<string, unknown> = {
|
||||
role: "assistant",
|
||||
content: contentParts.length > 0 ? contentParts.join("") : null,
|
||||
content: joinedContent || null,
|
||||
};
|
||||
if (reasoningParts.length > 0) {
|
||||
message.reasoning_content = reasoningParts.join("");
|
||||
if (joinedReasoning) {
|
||||
message.reasoning_content = joinedReasoning;
|
||||
}
|
||||
|
||||
const finalToolCalls = [...accumulatedToolCalls.values()].filter(Boolean).sort((a, b) => {
|
||||
|
||||
@@ -137,7 +137,7 @@ omniroute --mcp
|
||||
| 9 | `omniroute_simulate_route` | `read:health`, `read:combos` | Dry-run routing simulation showing fallback tree and estimated costs |
|
||||
| 10 | `omniroute_set_budget_guard` | `write:budget` | Set session budget with action on exceed: `degrade`, `block`, or `alert` |
|
||||
| 11 | `omniroute_set_resilience_profile` | `write:resilience` | Apply resilience profile: `aggressive`, `balanced`, or `conservative` |
|
||||
| 12 | `omniroute_test_combo` | `execute:completions`, `read:combos` | Test each provider in a combo with a real prompt, report latency/cost |
|
||||
| 12 | `omniroute_test_combo` | `execute:completions`, `read:combos` | Test each provider in a combo with a real prompt and a real upstream call, report latency/cost |
|
||||
| 13 | `omniroute_get_provider_metrics` | `read:health` | Per-provider metrics with latency percentiles (p50/p95/p99), circuit breaker |
|
||||
| 14 | `omniroute_best_combo_for_task` | `read:combos`, `read:health` | AI-powered combo recommendation by task type with budget/latency constraints |
|
||||
| 15 | `omniroute_explain_route` | `read:health`, `read:usage` | Explain why a request was routed to a provider (scoring factors, fallbacks) |
|
||||
|
||||
@@ -17,6 +17,10 @@ export const ACCOUNT_DEACTIVATED_SIGNALS = [
|
||||
"account has been disabled",
|
||||
"your account has been suspended",
|
||||
"this account is deactivated",
|
||||
// AG (Antigravity/Google Cloud Code) permanent ban signals
|
||||
"verify your account to continue",
|
||||
"this service has been disabled in this account for violation",
|
||||
"this service has been disabled in this account",
|
||||
];
|
||||
|
||||
// T10 (sub2api PR #1169): Signals that indicate billing credits are exhausted.
|
||||
|
||||
+110
-2
@@ -45,6 +45,80 @@ const DEFAULT_MODEL_P95_MS = {
|
||||
};
|
||||
const MIN_HISTORY_SAMPLES = 10;
|
||||
|
||||
/**
|
||||
* Validate that a successful (HTTP 200) non-streaming response actually contains
|
||||
* meaningful content. Returns { valid: true } or { valid: false, reason }.
|
||||
*
|
||||
* Only inspects non-streaming JSON responses — streaming responses are passed through
|
||||
* because buffering the full stream would defeat the purpose of streaming.
|
||||
*
|
||||
* Checks:
|
||||
* 1. Body is valid JSON
|
||||
* 2. Has at least one choice with non-empty content or tool_calls
|
||||
*/
|
||||
async function validateResponseQuality(
|
||||
response: Response,
|
||||
isStreaming: boolean,
|
||||
log: { warn?: (...args: any[]) => void }
|
||||
): Promise<{ valid: boolean; reason?: string; clonedResponse?: Response }> {
|
||||
if (isStreaming) return { valid: true };
|
||||
|
||||
const contentType = response.headers.get("content-type") || "";
|
||||
if (!contentType.includes("application/json") && !contentType.includes("text/")) {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
let cloned: Response;
|
||||
try {
|
||||
cloned = response.clone();
|
||||
} catch {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
let text: string;
|
||||
try {
|
||||
text = await cloned.text();
|
||||
} catch {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
if (!text || text.trim().length === 0) {
|
||||
return { valid: false, reason: "empty response body" };
|
||||
}
|
||||
|
||||
let json: any;
|
||||
try {
|
||||
json = JSON.parse(text);
|
||||
} catch {
|
||||
if (text.startsWith("data:")) return { valid: true };
|
||||
return { valid: false, reason: "response is not valid JSON" };
|
||||
}
|
||||
|
||||
const choices = json?.choices;
|
||||
if (!Array.isArray(choices) || choices.length === 0) {
|
||||
if (json?.output || json?.result || json?.data || json?.response) return { valid: true };
|
||||
if (json?.error) return { valid: false, reason: `upstream error in 200 body: ${json.error?.message || JSON.stringify(json.error).substring(0, 200)}` };
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
const firstChoice = choices[0];
|
||||
const message = firstChoice?.message || firstChoice?.delta;
|
||||
if (!message) {
|
||||
return { valid: false, reason: "choice has no message object" };
|
||||
}
|
||||
|
||||
const content = message.content;
|
||||
const toolCalls = message.tool_calls;
|
||||
const hasContent = content !== null && content !== undefined && content !== "";
|
||||
const hasToolCalls = Array.isArray(toolCalls) && toolCalls.length > 0;
|
||||
|
||||
if (!hasContent && !hasToolCalls) {
|
||||
return { valid: false, reason: "empty content and no tool_calls in response" };
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
// In-memory atomic counter per combo for round-robin distribution
|
||||
// Resets on server restart (by design — no stale state)
|
||||
const rrCounters = new Map();
|
||||
@@ -872,14 +946,31 @@ export async function handleComboChat({
|
||||
|
||||
const result = await handleSingleModelWrapped(body, modelStr);
|
||||
|
||||
// Success — return response
|
||||
// Success — validate response quality before returning
|
||||
if (result.ok) {
|
||||
const quality = await validateResponseQuality(result, !!body.stream, log);
|
||||
if (!quality.valid) {
|
||||
log.warn(
|
||||
"COMBO",
|
||||
`Model ${modelStr} returned 200 but failed quality check: ${quality.reason}`
|
||||
);
|
||||
breaker._onFailure();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: false,
|
||||
latencyMs: Date.now() - startTime,
|
||||
fallbackCount,
|
||||
strategy,
|
||||
});
|
||||
if (i > 0) fallbackCount++;
|
||||
break; // move to next model
|
||||
}
|
||||
resolvedByModel = modelStr;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
log.info(
|
||||
"COMBO",
|
||||
`Model ${modelStr} succeeded (${latencyMs}ms, ${fallbackCount} fallbacks)`
|
||||
);
|
||||
breaker._onSuccess();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: true,
|
||||
latencyMs,
|
||||
@@ -1139,13 +1230,30 @@ async function handleRoundRobinCombo({
|
||||
|
||||
const result = await handleSingleModel(body, modelStr);
|
||||
|
||||
// Success
|
||||
// Success — validate response quality before returning
|
||||
if (result.ok) {
|
||||
const quality = await validateResponseQuality(result, !!body.stream, log);
|
||||
if (!quality.valid) {
|
||||
log.warn(
|
||||
"COMBO-RR",
|
||||
`${modelStr} returned 200 but failed quality check: ${quality.reason}`
|
||||
);
|
||||
breaker._onFailure();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: false,
|
||||
latencyMs: Date.now() - startTime,
|
||||
fallbackCount,
|
||||
strategy: "round-robin",
|
||||
});
|
||||
if (offset > 0) fallbackCount++;
|
||||
break; // move to next model
|
||||
}
|
||||
const latencyMs = Date.now() - startTime;
|
||||
log.info(
|
||||
"COMBO-RR",
|
||||
`${modelStr} succeeded (${latencyMs}ms, ${fallbackCount} fallbacks)`
|
||||
);
|
||||
breaker._onSuccess();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: true,
|
||||
latencyMs,
|
||||
|
||||
@@ -48,3 +48,54 @@ export function supportsToolCalling(modelStr: string): boolean {
|
||||
|
||||
return !blocked;
|
||||
}
|
||||
|
||||
// Models that do NOT support reasoning/thinking parameters.
|
||||
// AG (Antigravity) claude-sonnet-4-6 routes through a Google internal API
|
||||
// that returns 400 if thinking params are included.
|
||||
const REASONING_UNSUPPORTED_PATTERNS = [
|
||||
"antigravity/claude-sonnet-4-6",
|
||||
"antigravity/claude-sonnet-4-5",
|
||||
"antigravity/claude-sonnet-4",
|
||||
"ag/claude-sonnet-4-6",
|
||||
"ag/claude-sonnet-4-5",
|
||||
"ag/claude-sonnet-4",
|
||||
];
|
||||
|
||||
function getRegistryReasoningFlag(providerIdOrAlias: string, modelId: string): boolean | null {
|
||||
const providerAlias = PROVIDER_ID_TO_ALIAS[providerIdOrAlias] || providerIdOrAlias;
|
||||
const models = PROVIDER_MODELS[providerAlias];
|
||||
if (!Array.isArray(models)) return null;
|
||||
const found = models.find((m) => m?.id === modelId);
|
||||
if (!found) return null;
|
||||
return typeof found.supportsReasoning === "boolean" ? found.supportsReasoning : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a model supports reasoning/thinking parameters.
|
||||
*
|
||||
* Decision order:
|
||||
* 1) Provider registry metadata (supportsReasoning flag) when available.
|
||||
* 2) Explicit denylist for known unsupported models (e.g. AG Claude Sonnet).
|
||||
* 3) Default true (pass through — safe, provider will ignore if unsupported).
|
||||
*/
|
||||
export function supportsReasoning(modelStr: string): boolean {
|
||||
const parsed = parseModel(modelStr);
|
||||
const provider = parsed.provider || parsed.providerAlias || "";
|
||||
const model = parsed.model || modelStr;
|
||||
|
||||
if (provider) {
|
||||
const fromRegistry = getRegistryReasoningFlag(provider, model);
|
||||
if (fromRegistry !== null) return fromRegistry;
|
||||
}
|
||||
|
||||
const normalized = String(modelStr || "").toLowerCase();
|
||||
if (!normalized) return true;
|
||||
|
||||
const blocked = REASONING_UNSUPPORTED_PATTERNS.some((pattern) =>
|
||||
normalized === pattern ||
|
||||
normalized.endsWith(`/${pattern}`) ||
|
||||
normalized.includes(pattern)
|
||||
);
|
||||
|
||||
return !blocked;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ export const ThinkingMode = {
|
||||
};
|
||||
|
||||
import { capThinkingBudget, getDefaultThinkingBudget } from "@/shared/constants/modelSpecs";
|
||||
import { supportsReasoning } from "./modelCapabilities.ts";
|
||||
|
||||
// Effort → budget token mapping
|
||||
export const EFFORT_BUDGETS = {
|
||||
@@ -151,6 +152,13 @@ export function applyThinkingBudget(body, config = null) {
|
||||
const cfg = config || _config;
|
||||
if (!body || typeof body !== "object") return body;
|
||||
|
||||
// Early exit: strip ALL reasoning/thinking params for models that don't support them.
|
||||
// Sending thinking params to unsupported models (e.g. AG claude-sonnet-4-6) causes 400 errors.
|
||||
const modelStr = typeof body.model === "string" ? body.model : "";
|
||||
if (modelStr && !supportsReasoning(modelStr)) {
|
||||
return stripThinkingConfig(body);
|
||||
}
|
||||
|
||||
// Pre-processing: convert string thinkingLevel to numeric budget
|
||||
let processed = normalizeThinkingLevel(body);
|
||||
|
||||
|
||||
@@ -98,6 +98,7 @@ export function createResponsesApiTransformStream(logger = null) {
|
||||
funcItemDone: {},
|
||||
buffer: "",
|
||||
completedSent: false,
|
||||
usage: null,
|
||||
};
|
||||
|
||||
const encoder = new TextEncoder();
|
||||
@@ -249,16 +250,52 @@ export function createResponsesApiTransformStream(logger = null) {
|
||||
const sendCompleted = (controller) => {
|
||||
if (!state.completedSent) {
|
||||
state.completedSent = true;
|
||||
|
||||
// Build output from accumulated state
|
||||
const output = [];
|
||||
if (state.reasoningId) {
|
||||
output.push({
|
||||
id: state.reasoningId,
|
||||
type: "reasoning",
|
||||
summary: [{ type: "summary_text", text: state.reasoningBuf }],
|
||||
});
|
||||
}
|
||||
for (const idx in state.msgItemAdded) {
|
||||
output.push({
|
||||
id: `msg_${state.responseId}_${idx}`,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [{ type: "output_text", annotations: [], text: state.msgTextBuf[idx] || "" }],
|
||||
});
|
||||
}
|
||||
for (const idx in state.funcCallIds) {
|
||||
const callId = state.funcCallIds[idx];
|
||||
output.push({
|
||||
id: `fc_${callId}`,
|
||||
type: "function_call",
|
||||
call_id: callId,
|
||||
name: state.funcNames[idx] || "",
|
||||
arguments: state.funcArgsBuf[idx] || "{}",
|
||||
});
|
||||
}
|
||||
|
||||
const response: Record<string, unknown> = {
|
||||
id: state.responseId,
|
||||
object: "response",
|
||||
created_at: state.created,
|
||||
status: "completed",
|
||||
background: false,
|
||||
error: null,
|
||||
output,
|
||||
};
|
||||
|
||||
if (state.usage) {
|
||||
response.usage = state.usage;
|
||||
}
|
||||
|
||||
emit(controller, "response.completed", {
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: state.responseId,
|
||||
object: "response",
|
||||
created_at: state.created,
|
||||
status: "completed",
|
||||
background: false,
|
||||
error: null,
|
||||
},
|
||||
response,
|
||||
});
|
||||
}
|
||||
};
|
||||
@@ -288,7 +325,12 @@ export function createResponsesApiTransformStream(logger = null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!parsed.choices?.length) continue;
|
||||
if (!parsed.choices?.length) {
|
||||
if (parsed.usage) {
|
||||
state.usage = parsed.usage;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const choice = parsed.choices[0];
|
||||
const idx = choice.index || 0;
|
||||
@@ -335,7 +377,7 @@ export function createResponsesApiTransformStream(logger = null) {
|
||||
|
||||
if (content.includes("<think>")) {
|
||||
state.inThinking = true;
|
||||
content = content.replace("<think>", "");
|
||||
content = content.replaceAll("<think>", "");
|
||||
startReasoning(controller, idx);
|
||||
}
|
||||
|
||||
|
||||
@@ -167,13 +167,19 @@ function convertConstToEnum(obj) {
|
||||
}
|
||||
|
||||
// Convert enum values to strings (Gemini requires string enum values)
|
||||
// For integer types, remove enum entirely as Gemini doesn't support it
|
||||
function convertEnumValuesToStrings(obj) {
|
||||
if (!obj || typeof obj !== "object") return;
|
||||
|
||||
if (obj.enum && Array.isArray(obj.enum)) {
|
||||
obj.enum = obj.enum.map((v) => String(v));
|
||||
if (!obj.type) {
|
||||
obj.type = "string";
|
||||
// Gemini only supports enum for string types, not integer
|
||||
if (obj.type === "integer" || obj.type === "number") {
|
||||
delete obj.enum;
|
||||
} else {
|
||||
obj.enum = obj.enum.map((v) => String(v));
|
||||
if (!obj.type) {
|
||||
obj.type = "string";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,105 +1,9 @@
|
||||
/**
|
||||
* Convert OpenAI Responses API format to standard chat completions format
|
||||
* Responses API uses: { input: [...], instructions: "..." }
|
||||
* Chat API uses: { messages: [...] }
|
||||
* Convert OpenAI Responses API format to standard chat completions format.
|
||||
* Delegates to the canonical translator to avoid logic duplication.
|
||||
*/
|
||||
import { openaiResponsesToOpenAIRequest } from "../request/openai-responses.ts";
|
||||
|
||||
export function convertResponsesApiFormat(body) {
|
||||
if (!body.input) return body;
|
||||
|
||||
const result = { ...body };
|
||||
result.messages = [];
|
||||
|
||||
// Convert instructions to system message
|
||||
if (body.instructions) {
|
||||
result.messages.push({ role: "system", content: body.instructions });
|
||||
}
|
||||
|
||||
// Group items by conversation turn
|
||||
let currentAssistantMsg = null;
|
||||
let pendingToolCalls = [];
|
||||
let pendingToolResults = [];
|
||||
|
||||
for (const item of body.input) {
|
||||
// Determine item type - Droid CLI sends role-based items without 'type' field
|
||||
// Fallback: if no type but has role property, treat as message
|
||||
const itemType = item.type || (item.role ? "message" : null);
|
||||
|
||||
if (itemType === "message") {
|
||||
// Flush each pending assistant message with tool calls
|
||||
if (currentAssistantMsg) {
|
||||
result.messages.push(currentAssistantMsg);
|
||||
currentAssistantMsg = null;
|
||||
}
|
||||
// Flush pending tool results
|
||||
if (pendingToolResults.length > 0) {
|
||||
for (const tr of pendingToolResults) {
|
||||
result.messages.push(tr);
|
||||
}
|
||||
pendingToolResults = [];
|
||||
}
|
||||
|
||||
// Convert content: input_text → text, output_text → text
|
||||
const content = Array.isArray(item.content)
|
||||
? item.content.map((c) => {
|
||||
if (c.type === "input_text") return { type: "text", text: c.text };
|
||||
if (c.type === "output_text") return { type: "text", text: c.text };
|
||||
return c;
|
||||
})
|
||||
: item.content;
|
||||
result.messages.push({ role: item.role, content });
|
||||
} else if (itemType === "function_call") {
|
||||
// Start or append to assistant message with tool_calls
|
||||
if (!currentAssistantMsg) {
|
||||
currentAssistantMsg = {
|
||||
role: "assistant",
|
||||
content: null,
|
||||
tool_calls: [],
|
||||
};
|
||||
}
|
||||
currentAssistantMsg.tool_calls.push({
|
||||
id: item.call_id,
|
||||
type: "function",
|
||||
function: {
|
||||
name: item.name,
|
||||
arguments: item.arguments,
|
||||
},
|
||||
});
|
||||
} else if (itemType === "function_call_output") {
|
||||
// Flush assistant message first if exists
|
||||
if (currentAssistantMsg) {
|
||||
result.messages.push(currentAssistantMsg);
|
||||
currentAssistantMsg = null;
|
||||
}
|
||||
// Add tool result
|
||||
pendingToolResults.push({
|
||||
role: "tool",
|
||||
tool_call_id: item.call_id,
|
||||
content: typeof item.output === "string" ? item.output : JSON.stringify(item.output),
|
||||
});
|
||||
} else if (itemType === "reasoning") {
|
||||
// Skip reasoning items - they are for display only
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining
|
||||
if (currentAssistantMsg) {
|
||||
result.messages.push(currentAssistantMsg);
|
||||
}
|
||||
if (pendingToolResults.length > 0) {
|
||||
for (const tr of pendingToolResults) {
|
||||
result.messages.push(tr);
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup Responses API specific fields
|
||||
// Note: prompt_cache_key is intentionally preserved — it is used by Codex and other
|
||||
// providers as a cache-affinity signal. Stripping it breaks prompt caching (#517).
|
||||
delete result.input;
|
||||
delete result.instructions;
|
||||
delete result.include;
|
||||
delete result.store;
|
||||
delete result.reasoning;
|
||||
|
||||
return result;
|
||||
return openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
}
|
||||
|
||||
@@ -73,6 +73,7 @@ function normalizeOpenAIResponsesRequest(body) {
|
||||
|
||||
/** @param options.normalizeToolCallId - When true, use 9-char tool call ids (e.g. Mistral); when false, leave ids as-is */
|
||||
/** @param options.preserveDeveloperRole - undefined/true: keep developer for OpenAI format (default); false: map to system */
|
||||
/** @param options.preserveCacheControl - When true, preserve client-side cache_control markers (for Claude Code, etc.) */
|
||||
// Translate request: source -> openai -> target
|
||||
export function translateRequest(
|
||||
sourceFormat,
|
||||
@@ -83,7 +84,7 @@ export function translateRequest(
|
||||
credentials = null,
|
||||
provider = null,
|
||||
reqLogger = null,
|
||||
options?: { normalizeToolCallId?: boolean; preserveDeveloperRole?: boolean }
|
||||
options?: { normalizeToolCallId?: boolean; preserveDeveloperRole?: boolean; preserveCacheControl?: boolean }
|
||||
) {
|
||||
let result = body;
|
||||
const use9CharId = options?.normalizeToolCallId === true;
|
||||
@@ -149,10 +150,13 @@ export function translateRequest(
|
||||
}
|
||||
|
||||
// Final step: prepare request for Claude format endpoints
|
||||
// In Claude passthrough mode (Claude → Claude), preserve cache_control markers
|
||||
// Preserve cache_control when:
|
||||
// 1. Claude passthrough mode (Claude → Claude), OR
|
||||
// 2. Explicitly requested via options (for caching-aware clients like Claude Code)
|
||||
if (targetFormat === FORMATS.CLAUDE) {
|
||||
const isClaudePassthrough = sourceFormat === FORMATS.CLAUDE;
|
||||
result = prepareClaudeRequest(result, provider, isClaudePassthrough);
|
||||
const preserveCache = isClaudePassthrough || options?.preserveCacheControl === true;
|
||||
result = prepareClaudeRequest(result, provider, preserveCache);
|
||||
}
|
||||
|
||||
// Normalize openai-responses input shape for providers that require list input.
|
||||
|
||||
@@ -10,8 +10,6 @@ import { generateToolCallId } from "../helpers/toolCallHelper.ts";
|
||||
|
||||
type JsonRecord = Record<string, unknown>;
|
||||
|
||||
const UNSUPPORTED_TOOLS = ["file_search", "code_interpreter", "web_search_preview"];
|
||||
|
||||
function toRecord(value: unknown): JsonRecord {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? (value as JsonRecord) : {};
|
||||
}
|
||||
@@ -47,14 +45,16 @@ export function openaiResponsesToOpenAIRequest(
|
||||
const root = toRecord(body);
|
||||
if (root.input === undefined) return body;
|
||||
|
||||
// Validate unsupported features - return clear errors instead of silent failure
|
||||
// Validate tool types — only function tools can be translated to Chat Completions
|
||||
const tools = toArray(root.tools);
|
||||
if (tools.length > 0) {
|
||||
for (const toolValue of tools) {
|
||||
const tool = toRecord(toolValue);
|
||||
if (UNSUPPORTED_TOOLS.includes(toString(tool.type))) {
|
||||
const toolType = toString(tool.type);
|
||||
// Allow: function tools, and tools already in Chat format (have .function property)
|
||||
if (toolType && toolType !== "function" && !tool.function) {
|
||||
throw unsupportedFeature(
|
||||
`Unsupported Responses API feature: ${toString(tool.type)} tool type is not supported by omniroute`
|
||||
`Unsupported Responses API feature: ${toolType} tool type is not supported by omniroute`
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -112,6 +112,24 @@ export function openaiResponsesToOpenAIRequest(
|
||||
if (contentItem.type === "output_text") {
|
||||
return { type: "text", text: toString(contentItem.text) };
|
||||
}
|
||||
if (contentItem.type === "input_image") {
|
||||
const imgResult: JsonRecord = {
|
||||
type: "image_url",
|
||||
image_url: { url: toString(contentItem.image_url) },
|
||||
};
|
||||
if (contentItem.detail !== undefined) {
|
||||
(imgResult.image_url as JsonRecord).detail = contentItem.detail;
|
||||
}
|
||||
return imgResult;
|
||||
}
|
||||
if (contentItem.type === "input_file") {
|
||||
const fileObj: JsonRecord = {};
|
||||
if (contentItem.file_data !== undefined) fileObj.file_data = contentItem.file_data;
|
||||
if (contentItem.file_id !== undefined) fileObj.file_id = contentItem.file_id;
|
||||
if (contentItem.file_url !== undefined) fileObj.file_url = contentItem.file_url;
|
||||
if (contentItem.filename !== undefined) fileObj.filename = contentItem.filename;
|
||||
return { type: "file", file: fileObj };
|
||||
}
|
||||
return contentValue;
|
||||
})
|
||||
: item.content;
|
||||
@@ -144,7 +162,9 @@ export function openaiResponsesToOpenAIRequest(
|
||||
type: "function",
|
||||
function: {
|
||||
name: fnName,
|
||||
arguments: item.arguments,
|
||||
arguments: typeof item.arguments === "string"
|
||||
? item.arguments
|
||||
: JSON.stringify(item.arguments ?? {}),
|
||||
},
|
||||
});
|
||||
currentAssistantMsg.tool_calls = toolCalls;
|
||||
@@ -226,6 +246,20 @@ export function openaiResponsesToOpenAIRequest(
|
||||
return true;
|
||||
});
|
||||
|
||||
// Translate tool_choice object format: Responses {type,name} → Chat {type,function:{name}}
|
||||
if (result.tool_choice && typeof result.tool_choice === "object" && !Array.isArray(result.tool_choice)) {
|
||||
const tc = toRecord(result.tool_choice);
|
||||
const tcType = toString(tc.type);
|
||||
if (tcType === "function" && tc.name !== undefined && !tc.function) {
|
||||
result.tool_choice = { type: "function", function: { name: tc.name } };
|
||||
} else if (tcType && tcType !== "function" && tcType !== "allowed_tools") {
|
||||
// Built-in tool types (web_search_preview, file_search, etc.) have no Chat equivalent
|
||||
throw unsupportedFeature(
|
||||
`Unsupported Responses API feature: tool_choice type '${tcType}' is not supported by omniroute`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup Responses API specific fields
|
||||
// Note: prompt_cache_key is intentionally preserved — it is used by Codex and other
|
||||
// providers as a cache-affinity signal. Stripping it breaks prompt caching (#517).
|
||||
@@ -288,11 +322,24 @@ export function openaiToOpenAIResponsesRequest(
|
||||
return { type: "input_text", text: toString(contentItem.text) };
|
||||
}
|
||||
if (contentItem.type === "image_url") {
|
||||
const imgUrl = contentItem.image_url as string | { url?: string };
|
||||
return {
|
||||
const imgUrl = contentItem.image_url as string | { url?: string; detail?: string };
|
||||
const imgResult: JsonRecord = {
|
||||
type: "input_image",
|
||||
image_url: typeof imgUrl === "string" ? imgUrl : imgUrl?.url || "",
|
||||
};
|
||||
if (typeof imgUrl === "object" && imgUrl?.detail !== undefined) {
|
||||
imgResult.detail = imgUrl.detail;
|
||||
}
|
||||
return imgResult;
|
||||
}
|
||||
if (contentItem.type === "file") {
|
||||
const file = toRecord(contentItem.file);
|
||||
const fileResult: JsonRecord = { type: "input_file" };
|
||||
if (file.file_data !== undefined) fileResult.file_data = file.file_data;
|
||||
if (file.file_id !== undefined) fileResult.file_id = file.file_id;
|
||||
if (file.file_url !== undefined) fileResult.file_url = file.file_url;
|
||||
if (file.filename !== undefined) fileResult.filename = file.filename;
|
||||
return fileResult;
|
||||
}
|
||||
return contentValue;
|
||||
})
|
||||
@@ -358,6 +405,20 @@ export function openaiToOpenAIResponsesRequest(
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Handle deprecated function_call field (pre-tool_calls API)
|
||||
if (msg.function_call && !msg.tool_calls) {
|
||||
const fc = toRecord(msg.function_call);
|
||||
const fnName = toString(fc.name).trim();
|
||||
if (fnName) {
|
||||
input.push({
|
||||
type: "function_call",
|
||||
call_id: `call_${fnName}`,
|
||||
name: fnName,
|
||||
arguments: toString(fc.arguments, "{}"),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Convert tool results
|
||||
@@ -365,7 +426,24 @@ export function openaiToOpenAIResponsesRequest(
|
||||
input.push({
|
||||
type: "function_call_output",
|
||||
call_id: toString(msg.tool_call_id),
|
||||
output: msg.content,
|
||||
output: typeof msg.content === "string"
|
||||
? msg.content
|
||||
: Array.isArray(msg.content)
|
||||
? msg.content.map((c) => {
|
||||
const part = toRecord(c);
|
||||
if (part.type === "text") return { type: "input_text", text: toString(part.text) };
|
||||
return c;
|
||||
})
|
||||
: String(msg.content ?? ""),
|
||||
});
|
||||
}
|
||||
|
||||
// Handle deprecated function role messages
|
||||
if (role === "function") {
|
||||
input.push({
|
||||
type: "function_call_output",
|
||||
call_id: `call_${toString(msg.name)}`,
|
||||
output: typeof msg.content === "string" ? msg.content : String(msg.content ?? ""),
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -409,6 +487,23 @@ export function openaiToOpenAIResponsesRequest(
|
||||
});
|
||||
}
|
||||
|
||||
// Translate tool_choice: Chat {type,function:{name}} → Responses {type,name}
|
||||
if (root.tool_choice !== undefined) {
|
||||
if (typeof root.tool_choice === "string") {
|
||||
result.tool_choice = root.tool_choice;
|
||||
} else if (typeof root.tool_choice === "object" && !Array.isArray(root.tool_choice)) {
|
||||
const tc = toRecord(root.tool_choice);
|
||||
if (tc.type === "function" && tc.function) {
|
||||
const fn = toRecord(tc.function);
|
||||
result.tool_choice = { type: "function", name: fn.name };
|
||||
} else {
|
||||
result.tool_choice = root.tool_choice;
|
||||
}
|
||||
} else {
|
||||
result.tool_choice = root.tool_choice;
|
||||
}
|
||||
}
|
||||
|
||||
// Pass through relevant fields
|
||||
if (root.service_tier !== undefined) result.service_tier = root.service_tier;
|
||||
if (root.temperature !== undefined) result.temperature = root.temperature;
|
||||
|
||||
@@ -14,7 +14,13 @@ export function openaiToOpenAIResponsesResponse(chunk, state) {
|
||||
return flushEvents(state);
|
||||
}
|
||||
|
||||
if (!chunk.choices?.length) return [];
|
||||
if (!chunk.choices?.length) {
|
||||
// Capture usage from usage-only chunks (stream_options.include_usage)
|
||||
if (chunk.usage) {
|
||||
state.usage = chunk.usage;
|
||||
}
|
||||
return [];
|
||||
}
|
||||
|
||||
const events = [];
|
||||
const nextSeq = () => ++state.seq;
|
||||
@@ -69,7 +75,7 @@ export function openaiToOpenAIResponsesResponse(chunk, state) {
|
||||
|
||||
if (content.includes("<think>")) {
|
||||
state.inThinking = true;
|
||||
content = content.replace("<think>", "");
|
||||
content = content.replaceAll("<think>", "");
|
||||
startReasoning(state, emit, idx);
|
||||
}
|
||||
|
||||
@@ -334,16 +340,52 @@ function closeToolCall(state, emit, idx) {
|
||||
function sendCompleted(state, emit) {
|
||||
if (!state.completedSent) {
|
||||
state.completedSent = true;
|
||||
|
||||
// Build output from accumulated state
|
||||
const output = [];
|
||||
if (state.reasoningId) {
|
||||
output.push({
|
||||
id: state.reasoningId,
|
||||
type: "reasoning",
|
||||
summary: [{ type: "summary_text", text: state.reasoningBuf }],
|
||||
});
|
||||
}
|
||||
for (const idx in state.msgItemAdded) {
|
||||
output.push({
|
||||
id: `msg_${state.responseId}_${idx}`,
|
||||
type: "message",
|
||||
role: "assistant",
|
||||
content: [{ type: "output_text", annotations: [], text: state.msgTextBuf[idx] || "" }],
|
||||
});
|
||||
}
|
||||
for (const idx in state.funcCallIds) {
|
||||
const callId = state.funcCallIds[idx];
|
||||
output.push({
|
||||
id: `fc_${callId}`,
|
||||
type: "function_call",
|
||||
call_id: callId,
|
||||
name: state.funcNames[idx] || "",
|
||||
arguments: state.funcArgsBuf[idx] || "{}",
|
||||
});
|
||||
}
|
||||
|
||||
const response: Record<string, unknown> = {
|
||||
id: state.responseId,
|
||||
object: "response",
|
||||
created_at: state.created,
|
||||
status: "completed",
|
||||
background: false,
|
||||
error: null,
|
||||
output,
|
||||
};
|
||||
|
||||
if (state.usage) {
|
||||
response.usage = state.usage;
|
||||
}
|
||||
|
||||
emit("response.completed", {
|
||||
type: "response.completed",
|
||||
response: {
|
||||
id: state.responseId,
|
||||
object: "response",
|
||||
created_at: state.created,
|
||||
status: "completed",
|
||||
background: false,
|
||||
error: null,
|
||||
},
|
||||
response,
|
||||
});
|
||||
}
|
||||
}
|
||||
@@ -560,10 +602,21 @@ export function openaiResponsesToOpenAIResponse(chunk, state) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Reasoning events (convert to content or skip)
|
||||
// Reasoning events — emit as reasoning_content in Chat format
|
||||
if (eventType === "response.reasoning_summary_text.delta") {
|
||||
// Optionally include reasoning as content, or skip
|
||||
return null;
|
||||
const reasoningDelta = data.delta || "";
|
||||
if (!reasoningDelta) return null;
|
||||
return {
|
||||
id: state.chatId,
|
||||
object: "chat.completion.chunk",
|
||||
created: state.created,
|
||||
model: state.model || "gpt-4",
|
||||
choices: [{
|
||||
index: 0,
|
||||
delta: { reasoning_content: reasoningDelta },
|
||||
finish_reason: null,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
||||
// Ignore other events
|
||||
|
||||
@@ -0,0 +1,305 @@
|
||||
/**
|
||||
* Cache Control Policy
|
||||
*
|
||||
* Determines when to preserve client-side prompt caching headers (cache_control)
|
||||
* vs. applying OmniRoute's own caching strategy.
|
||||
*
|
||||
* Client-side caching (e.g., Claude Code) should be preserved when:
|
||||
* 1. Client is Claude Code or similar caching-aware client
|
||||
* 2. Request will hit a deterministic target (single model or deterministic combo strategy)
|
||||
* 3. Provider supports prompt caching (Anthropic, Alibaba Qwen, etc.)
|
||||
*/
|
||||
|
||||
import type { RoutingStrategyValue } from "../../src/shared/constants/routingStrategies";
|
||||
|
||||
/**
|
||||
* Cache control preservation modes
|
||||
*/
|
||||
export type CacheControlMode = "auto" | "always" | "never";
|
||||
|
||||
/**
|
||||
* Cache control settings from the database
|
||||
*/
|
||||
export interface CacheControlSettings {
|
||||
alwaysPreserveClientCache?: CacheControlMode;
|
||||
}
|
||||
|
||||
/**
|
||||
* Cache metrics for tracking effectiveness
|
||||
*/
|
||||
export interface CacheControlMetrics {
|
||||
// Totals
|
||||
totalRequests: number;
|
||||
requestsWithCacheControl: number;
|
||||
|
||||
// Token counts
|
||||
totalInputTokens: number;
|
||||
totalCachedTokens: number;
|
||||
totalCacheCreationTokens: number;
|
||||
|
||||
// Savings
|
||||
tokensSaved: number;
|
||||
estimatedCostSaved: number;
|
||||
|
||||
// Breakdowns
|
||||
byProvider: Record<
|
||||
string,
|
||||
{
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
>;
|
||||
byStrategy: Record<
|
||||
string,
|
||||
{
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
>;
|
||||
|
||||
lastUpdated: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Routing strategies that are deterministic (same request → same provider)
|
||||
*/
|
||||
const DETERMINISTIC_STRATEGIES: Set<RoutingStrategyValue> = new Set(["priority", "cost-optimized"]);
|
||||
|
||||
/**
|
||||
* Providers that support prompt caching
|
||||
*/
|
||||
const CACHING_PROVIDERS = new Set([
|
||||
"claude",
|
||||
"anthropic",
|
||||
"zai",
|
||||
"qwen", // Alibaba Qwen Coding Plan International
|
||||
]);
|
||||
|
||||
/**
|
||||
* Detect if the client is Claude Code or another caching-aware client
|
||||
*/
|
||||
export function isClaudeCodeClient(userAgent: string | null | undefined): boolean {
|
||||
if (!userAgent) return false;
|
||||
const ua = userAgent.toLowerCase();
|
||||
|
||||
// Claude Code user agents
|
||||
if (ua.includes("claude-code") || ua.includes("claude_code")) return true;
|
||||
if (ua.includes("anthropic") && ua.includes("cli")) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a provider supports prompt caching
|
||||
*/
|
||||
export function providerSupportsCaching(provider: string | null | undefined): boolean {
|
||||
if (!provider) return false;
|
||||
return CACHING_PROVIDERS.has(provider.toLowerCase());
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a routing strategy is deterministic
|
||||
*/
|
||||
export function isDeterministicStrategy(
|
||||
strategy: RoutingStrategyValue | null | undefined
|
||||
): boolean {
|
||||
if (!strategy) return false;
|
||||
return DETERMINISTIC_STRATEGIES.has(strategy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if client-side cache_control headers should be preserved
|
||||
*
|
||||
* @param userAgent - User-Agent header from the request
|
||||
* @param isCombo - Whether this is a combo model
|
||||
* @param comboStrategy - The combo's routing strategy (if applicable)
|
||||
* @param targetProvider - The target provider for the request
|
||||
* @param settings - Cache control settings from database (optional)
|
||||
* @returns true if cache_control should be preserved, false if OmniRoute should manage it
|
||||
*/
|
||||
export function shouldPreserveCacheControl({
|
||||
userAgent,
|
||||
isCombo,
|
||||
comboStrategy,
|
||||
targetProvider,
|
||||
settings,
|
||||
}: {
|
||||
userAgent: string | null | undefined;
|
||||
isCombo: boolean;
|
||||
comboStrategy?: RoutingStrategyValue | null;
|
||||
targetProvider: string | null | undefined;
|
||||
settings?: CacheControlSettings;
|
||||
}): boolean {
|
||||
// User override takes precedence
|
||||
if (settings?.alwaysPreserveClientCache === "always") {
|
||||
return true;
|
||||
}
|
||||
if (settings?.alwaysPreserveClientCache === "never") {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Auto mode: use automatic detection (existing logic)
|
||||
// Must be a caching-aware client
|
||||
if (!isClaudeCodeClient(userAgent)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Target provider must support caching
|
||||
if (!providerSupportsCaching(targetProvider)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Single model: always preserve (deterministic)
|
||||
if (!isCombo) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Combo: only preserve if strategy is deterministic
|
||||
return isDeterministicStrategy(comboStrategy);
|
||||
}
|
||||
|
||||
/**
|
||||
* Track cache control metrics for a request
|
||||
*/
|
||||
export function trackCacheMetrics({
|
||||
preserved,
|
||||
provider,
|
||||
strategy,
|
||||
metrics,
|
||||
inputTokens,
|
||||
cachedTokens,
|
||||
cacheCreationTokens,
|
||||
}: {
|
||||
preserved: boolean;
|
||||
provider: string;
|
||||
strategy: string | null | undefined;
|
||||
metrics: CacheControlMetrics;
|
||||
inputTokens?: number;
|
||||
cachedTokens?: number;
|
||||
cacheCreationTokens?: number;
|
||||
}): CacheControlMetrics {
|
||||
const now = new Date().toISOString();
|
||||
|
||||
// Initialize metrics if empty
|
||||
if (!metrics) {
|
||||
metrics = {
|
||||
totalRequests: 0,
|
||||
requestsWithCacheControl: 0,
|
||||
totalInputTokens: 0,
|
||||
totalCachedTokens: 0,
|
||||
totalCacheCreationTokens: 0,
|
||||
tokensSaved: 0,
|
||||
estimatedCostSaved: 0,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: now,
|
||||
};
|
||||
}
|
||||
|
||||
// Increment total requests
|
||||
metrics.totalRequests++;
|
||||
|
||||
// Track token counts
|
||||
const input = inputTokens || 0;
|
||||
const cached = cachedTokens || 0;
|
||||
const creation = cacheCreationTokens || 0;
|
||||
|
||||
metrics.totalInputTokens += input;
|
||||
metrics.totalCachedTokens += cached;
|
||||
metrics.totalCacheCreationTokens += creation;
|
||||
|
||||
// Calculate tokens saved (cached tokens are reused, not charged)
|
||||
if (cached > 0) {
|
||||
metrics.tokensSaved += cached;
|
||||
}
|
||||
|
||||
// Only track requests where cache_control was preserved
|
||||
if (preserved) {
|
||||
metrics.requestsWithCacheControl++;
|
||||
|
||||
// Initialize provider tracking
|
||||
if (!metrics.byProvider[provider]) {
|
||||
metrics.byProvider[provider] = {
|
||||
requests: 0,
|
||||
inputTokens: 0,
|
||||
cachedTokens: 0,
|
||||
cacheCreationTokens: 0,
|
||||
};
|
||||
}
|
||||
metrics.byProvider[provider].requests++;
|
||||
metrics.byProvider[provider].inputTokens += input;
|
||||
metrics.byProvider[provider].cachedTokens += cached;
|
||||
metrics.byProvider[provider].cacheCreationTokens += creation;
|
||||
|
||||
// Initialize strategy tracking
|
||||
if (strategy && !metrics.byStrategy[strategy]) {
|
||||
metrics.byStrategy[strategy] = {
|
||||
requests: 0,
|
||||
inputTokens: 0,
|
||||
cachedTokens: 0,
|
||||
cacheCreationTokens: 0,
|
||||
};
|
||||
}
|
||||
if (strategy) {
|
||||
metrics.byStrategy[strategy].requests++;
|
||||
metrics.byStrategy[strategy].inputTokens += input;
|
||||
metrics.byStrategy[strategy].cachedTokens += cached;
|
||||
metrics.byStrategy[strategy].cacheCreationTokens += creation;
|
||||
}
|
||||
}
|
||||
|
||||
metrics.lastUpdated = now;
|
||||
return metrics;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record cache token usage and update metrics
|
||||
*/
|
||||
export function updateCacheTokenMetrics({
|
||||
metrics,
|
||||
provider,
|
||||
strategy,
|
||||
inputTokens,
|
||||
cachedTokens,
|
||||
cacheCreationTokens,
|
||||
costSaved,
|
||||
}: {
|
||||
metrics: CacheControlMetrics;
|
||||
provider: string;
|
||||
strategy: string | null | undefined;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
costSaved?: number;
|
||||
}): CacheControlMetrics {
|
||||
metrics.totalCachedTokens += cachedTokens;
|
||||
metrics.totalCacheCreationTokens += cacheCreationTokens;
|
||||
metrics.totalInputTokens += inputTokens;
|
||||
|
||||
// Cached tokens are reused (saved), creation tokens are new cache writes
|
||||
metrics.tokensSaved += cachedTokens;
|
||||
if (costSaved !== undefined) {
|
||||
metrics.estimatedCostSaved += costSaved;
|
||||
}
|
||||
|
||||
// Update provider tracking
|
||||
if (metrics.byProvider[provider]) {
|
||||
metrics.byProvider[provider].cachedTokens += cachedTokens;
|
||||
metrics.byProvider[provider].cacheCreationTokens += cacheCreationTokens;
|
||||
metrics.byProvider[provider].inputTokens += inputTokens;
|
||||
}
|
||||
|
||||
// Update strategy tracking
|
||||
if (strategy && metrics.byStrategy[strategy]) {
|
||||
metrics.byStrategy[strategy].cachedTokens += cachedTokens;
|
||||
metrics.byStrategy[strategy].cacheCreationTokens += cacheCreationTokens;
|
||||
metrics.byStrategy[strategy].inputTokens += inputTokens;
|
||||
}
|
||||
|
||||
metrics.lastUpdated = new Date().toISOString();
|
||||
return metrics;
|
||||
}
|
||||
@@ -159,8 +159,9 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
|
||||
// Track content length for usage estimation (both modes)
|
||||
let totalContentLength = 0;
|
||||
// Passthrough: accumulate content for call log response body
|
||||
// Passthrough: accumulate content and reasoning separately for call log response body
|
||||
let passthroughAccumulatedContent = "";
|
||||
let passthroughAccumulatedReasoning = "";
|
||||
|
||||
// Guard against duplicate [DONE] events — ensures exactly one per stream
|
||||
let doneSent = false;
|
||||
@@ -304,6 +305,14 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
}
|
||||
} else {
|
||||
// Chat Completions: full sanitization pipeline
|
||||
|
||||
// Detect reasoning alias before sanitization strips it
|
||||
const hadReasoningAlias = !!(
|
||||
parsed.choices?.[0]?.delta?.reasoning &&
|
||||
typeof parsed.choices[0].delta.reasoning === "string" &&
|
||||
!parsed.choices[0].delta.reasoning_content
|
||||
);
|
||||
|
||||
parsed = sanitizeStreamingChunk(parsed);
|
||||
|
||||
const idFixed = fixInvalidId(parsed);
|
||||
@@ -323,6 +332,31 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
// Split combined reasoning+content deltas into separate SSE events.
|
||||
// Standard OpenAI streaming never mixes both fields in one delta;
|
||||
// clients (e.g. LobeChat) may skip content when reasoning_content
|
||||
// is present, causing the first content token to be lost.
|
||||
if (delta?.reasoning_content && delta?.content) {
|
||||
const reasoningChunk = JSON.parse(JSON.stringify(parsed));
|
||||
const rDelta = reasoningChunk.choices[0].delta;
|
||||
delete rDelta.content;
|
||||
reasoningChunk.choices[0].finish_reason = null;
|
||||
delete reasoningChunk.usage;
|
||||
const rOutput = `data: ${JSON.stringify(reasoningChunk)}\n`;
|
||||
passthroughAccumulatedReasoning += delta.reasoning_content;
|
||||
totalContentLength += delta.reasoning_content.length;
|
||||
clientPayloadCollector.push(reasoningChunk);
|
||||
reqLogger?.appendConvertedChunk?.(rOutput);
|
||||
controller.enqueue(encoder.encode(rOutput));
|
||||
controller.enqueue(encoder.encode("\n"));
|
||||
delete delta.reasoning_content;
|
||||
}
|
||||
|
||||
// Track whether we need to re-serialize (separate from injectedUsage
|
||||
// to avoid blocking subsequent finish_reason / usage mutations)
|
||||
const needsReserialization =
|
||||
hadReasoningAlias || (delta?.content === "" && delta?.reasoning_content);
|
||||
|
||||
// T18: Track if we saw tool calls & accumulate for call log
|
||||
if (delta?.tool_calls && delta.tool_calls.length > 0) {
|
||||
passthroughHasToolCalls = true;
|
||||
@@ -365,7 +399,7 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
if (typeof delta?.content === "string")
|
||||
passthroughAccumulatedContent += delta.content;
|
||||
if (typeof delta?.reasoning_content === "string")
|
||||
passthroughAccumulatedContent += delta.reasoning_content;
|
||||
passthroughAccumulatedReasoning += delta.reasoning_content;
|
||||
|
||||
const extracted = extractUsage(parsed);
|
||||
if (extracted) {
|
||||
@@ -398,7 +432,7 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
parsed.usage = filterUsageForFormat(buffered, FORMATS.OPENAI);
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
} else if (idFixed) {
|
||||
} else if (idFixed || needsReserialization) {
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
}
|
||||
@@ -483,6 +517,19 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
if (state?.accumulatedContent !== undefined) state.accumulatedContent += r;
|
||||
}
|
||||
}
|
||||
// Normalize `reasoning` alias → `reasoning_content` (NVIDIA kimi-k2.5 etc.)
|
||||
if (
|
||||
parsed.choices?.[0]?.delta?.reasoning &&
|
||||
!parsed.choices?.[0]?.delta?.reasoning_content
|
||||
) {
|
||||
const r = parsed.choices[0].delta.reasoning;
|
||||
if (typeof r === "string") {
|
||||
parsed.choices[0].delta.reasoning_content = r;
|
||||
delete parsed.choices[0].delta.reasoning;
|
||||
totalContentLength += r.length;
|
||||
if (state?.accumulatedContent !== undefined) state.accumulatedContent += r;
|
||||
}
|
||||
}
|
||||
|
||||
// Gemini format - may have multiple parts
|
||||
if (parsed.candidates?.[0]?.content?.parts) {
|
||||
@@ -635,6 +682,10 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
role: "assistant",
|
||||
content: content || null,
|
||||
};
|
||||
const reasoning = passthroughAccumulatedReasoning.trim();
|
||||
if (reasoning) {
|
||||
message.reasoning_content = reasoning;
|
||||
}
|
||||
if (passthroughToolCalls.size > 0) {
|
||||
message.tool_calls = [...passthroughToolCalls.values()].sort(
|
||||
(a, b) => a.index - b.index
|
||||
|
||||
@@ -157,6 +157,10 @@ function buildOpenAISummary(events: StructuredSSEEvent[], fallbackModel?: string
|
||||
if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) {
|
||||
reasoningParts.push(delta.reasoning_content);
|
||||
}
|
||||
// Normalize `reasoning` alias (NVIDIA kimi-k2.5 etc.)
|
||||
if (typeof delta.reasoning === "string" && delta.reasoning.length > 0 && !delta.reasoning_content) {
|
||||
reasoningParts.push(delta.reasoning);
|
||||
}
|
||||
|
||||
if (Array.isArray(delta.tool_calls)) {
|
||||
for (const item of delta.tool_calls) {
|
||||
@@ -203,12 +207,14 @@ function buildOpenAISummary(events: StructuredSSEEvent[], fallbackModel?: string
|
||||
}
|
||||
}
|
||||
|
||||
const joinedContent = contentParts.length > 0 ? contentParts.join("").trim() : null;
|
||||
const joinedReasoning = reasoningParts.length > 0 ? reasoningParts.join("").trim() : null;
|
||||
const message: JsonRecord = {
|
||||
role: "assistant",
|
||||
content: contentParts.length > 0 ? contentParts.join("") : null,
|
||||
content: joinedContent || null,
|
||||
};
|
||||
if (reasoningParts.length > 0) {
|
||||
message.reasoning_content = reasoningParts.join("");
|
||||
if (joinedReasoning) {
|
||||
message.reasoning_content = joinedReasoning;
|
||||
}
|
||||
|
||||
const finalToolCalls = [...toolCalls.values()].sort((a, b) => a.index - b.index);
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "3.2.6",
|
||||
"version": "3.3.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "omniroute",
|
||||
"version": "3.2.6",
|
||||
"version": "3.3.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"workspaces": [
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "3.2.7",
|
||||
"version": "3.3.0",
|
||||
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
## [3.2.8] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Docker Auto-Update UI** — Integrated a detached background update process for Docker Compose deployments. The Dashboard UI now seamlessly tracks update lifecycle events combining JSON REST responses with SSE streaming progress overlays for robust cross-environment reliability.
|
||||
- **Cache Analytics** — Repaired zero-metrics visualization mapping by migrating Semantic Cache telemetry logs directly into the centralized tracking SQLite module.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Authentication Logic** — Fixed a bug where saving dashboard settings or adding models failed with a 401 Unauthorized error when `requireLogin` was disabled. API endpoints now correctly evaluate the global authentication toggle. Resolved global redirection by reactivating `src/middleware.ts`.
|
||||
- **CLI Tool Detection (Windows)** — Prevented fatal initialization exceptions during CLI environment detection by catching `cross-spawn` ENOENT errors correctly. Adds explicit detection paths for `\AppData\Local\droid\droid.exe`.
|
||||
- **Codex Native Passthrough** — Normalized model translation parameters preventing context poisoning in proxy pass-through mode, enforcing generic `store: false` constraints explicitly for all Codex-originated requests.
|
||||
- **SSE Token Reporting** — Normalized provider tool-call chunk `finish_reason` detection, fixing 0% Usage analytics for stream-only responses missing strict `<DONE>` indicators.
|
||||
- **DeepSeek <think> Tags** — Implemented an explicit `<think>` extraction mapping inside `responsesHandler.ts`, ensuring DeepSeek reasoning streams map equivalently to native Anthropic `<thinking>` structures.
|
||||
|
||||
---
|
||||
|
||||
Executable
+151
@@ -0,0 +1,151 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
OmniRoute i18n Auto-Translator
|
||||
This script scans all docs/i18n directory markdown files and uses an LLM
|
||||
API (like OmniRoute itself) to translate any English paragraphs into the
|
||||
target language.
|
||||
|
||||
Usage:
|
||||
python3 scripts/i18n_autotranslate.py --api-url http://192.168.0.15:20128/v1 --api-key sk-14e76c286e84ff2d-agn73z-5a1fd283 --model cx/gpt-5.4
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import glob
|
||||
import json
|
||||
import urllib.request
|
||||
import urllib.error
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
# The base path of the project
|
||||
SCRIPT_DIR = Path(__file__).parent.resolve()
|
||||
PROJECT_ROOT = SCRIPT_DIR.parent
|
||||
I18N_DIR = PROJECT_ROOT / "docs" / "i18n"
|
||||
|
||||
def get_language_name(lang_code):
|
||||
lang_map = {
|
||||
"pt-BR": "Portuguese (Brazil)", "es": "Spanish", "fr": "French",
|
||||
"it": "Italian", "ru": "Russian", "zh-CN": "Simplified Chinese",
|
||||
"de": "German", "in": "Hindi", "th": "Thai", "uk-UA": "Ukrainian",
|
||||
"ar": "Arabic", "ja": "Japanese", "vi": "Vietnamese", "bg": "Bulgarian",
|
||||
"da": "Danish", "fi": "Finnish", "he": "Hebrew", "hu": "Hungarian",
|
||||
"id": "Indonesian", "ko": "Korean", "ms": "Malay", "nl": "Dutch",
|
||||
"no": "Norwegian", "pt": "Portuguese (Portugal)", "ro": "Romanian",
|
||||
"pl": "Polish", "sk": "Slovak", "sv": "Swedish", "phi": "Filipino",
|
||||
"cs": "Czech"
|
||||
}
|
||||
return lang_map.get(lang_code, lang_code)
|
||||
|
||||
def translate_block(text, target_language, api_url, api_key, model):
|
||||
if not text.strip():
|
||||
return text
|
||||
|
||||
prompt = (
|
||||
f"You are a professional technical translator working on the OmniRoute proxy project documentation.\n"
|
||||
f"Translate the following Markdown text from English to {target_language}.\n"
|
||||
f"CRITICAL RULES:\n"
|
||||
f"- Do NOT translate code blocks (```...```).\n"
|
||||
f"- Do NOT translate markdown formatting elements, links syntax, or image syntax.\n"
|
||||
f"- Retain formatting perfectly.\n"
|
||||
f"- Only return the translated text without introductory phrases.\n\n"
|
||||
f"{text}"
|
||||
)
|
||||
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": [
|
||||
{"role": "system", "content": "You are a direct translator. Output only the requested translation."},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": 0.3,
|
||||
"stream": False
|
||||
}
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{api_url}/chat/completions",
|
||||
data=json.dumps(data).encode('utf-8'),
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {api_key}"
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
with urllib.request.urlopen(req) as response:
|
||||
result = json.loads(response.read().decode())
|
||||
if "choices" in result and len(result["choices"]) > 0:
|
||||
translated = result["choices"][0]["message"]["content"]
|
||||
return translated.strip()
|
||||
except Exception as e:
|
||||
print(f" ❌ API Error: {e}")
|
||||
return text
|
||||
|
||||
def process_file(file_path, target_language, api_url, api_key, model):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Simple heuristic: we look for English common words to identify if a block needs translation.
|
||||
# A true robust implementation would diff against the English source.
|
||||
# For now, we split by double newlines (markdown blocks)
|
||||
blocks = content.split('\n\n')
|
||||
translated_blocks = []
|
||||
|
||||
english_words = [" the ", " is ", " are ", " this ", " that ", " a ", " to "]
|
||||
|
||||
needs_update = False
|
||||
|
||||
for block in blocks:
|
||||
# Skip translation if it's a pure code block or doesn't have English markers
|
||||
if block.startswith('```') or block.startswith('<div') or block.startswith('🌐') or block.startswith('|'):
|
||||
translated_blocks.append(block)
|
||||
continue
|
||||
|
||||
is_english = any(w in block.lower() for w in english_words)
|
||||
|
||||
if is_english and len(block.strip()) > 10:
|
||||
print(f" 🔄 Translating paragraph (length {len(block)})...")
|
||||
new_block = translate_block(block, target_language, api_url, api_key, model)
|
||||
if new_block != block:
|
||||
needs_update = True
|
||||
translated_blocks.append(new_block)
|
||||
else:
|
||||
translated_blocks.append(block)
|
||||
|
||||
if needs_update:
|
||||
with open(file_path, 'w', encoding='utf-8') as f:
|
||||
f.write('\n\n'.join(translated_blocks))
|
||||
print(f" ✅ Updated translations in {file_path.name}")
|
||||
else:
|
||||
print(f" ⏩ {file_path.name} already fully translated or no English blocks found.")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="OmniRoute Auto-Translator for i18n Markdown")
|
||||
parser.add_argument("--api-url", default="http://localhost:20128/v1", help="Base URL of OmniRoute or target provider")
|
||||
parser.add_argument("--api-key", default="sk-test", help="API Key for the provider")
|
||||
parser.add_argument("--model", default="gc/gemini-3-flash", help="Model name to use")
|
||||
parser.add_argument("--lang", default=None, help="Process only a specific language code (e.g. pt-BR)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"🚀 Starting Auto-Translator")
|
||||
print(f"🔗 Target API: {args.api_url} | Model: {args.model}\n")
|
||||
|
||||
if args.lang:
|
||||
lang_dirs = [d for d in I18N_DIR.iterdir() if d.is_dir() and d.name == args.lang]
|
||||
else:
|
||||
lang_dirs = [d for d in I18N_DIR.iterdir() if d.is_dir()]
|
||||
|
||||
for lang_dir in lang_dirs:
|
||||
lang_code = lang_dir.name
|
||||
lang_name = get_language_name(lang_code)
|
||||
|
||||
print(f"\n🌍 Processing {lang_name} ({lang_code})")
|
||||
|
||||
md_files = list(lang_dir.glob("*.md"))
|
||||
for md_file in md_files:
|
||||
process_file(md_file, lang_name, args.api_url, args.api_key, args.model)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -13,6 +13,25 @@ import { AI_PROVIDERS, FREE_PROVIDERS, OAUTH_PROVIDERS } from "@/shared/constant
|
||||
import { useNotificationStore } from "@/store/notificationStore";
|
||||
import { copyToClipboard } from "@/shared/utils/clipboard";
|
||||
|
||||
type UpdateStep = {
|
||||
step: string;
|
||||
status: string;
|
||||
message: string;
|
||||
};
|
||||
|
||||
const wait = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
|
||||
|
||||
function mergeUpdateStep(steps: UpdateStep[], nextStep: UpdateStep) {
|
||||
const idx = steps.findIndex((step) => step.step === nextStep.step);
|
||||
if (idx === -1) {
|
||||
return [...steps, nextStep];
|
||||
}
|
||||
|
||||
const next = [...steps];
|
||||
next[idx] = nextStep;
|
||||
return next;
|
||||
}
|
||||
|
||||
export default function HomePageClient({ machineId }) {
|
||||
const t = useTranslations("home");
|
||||
const tc = useTranslations("common");
|
||||
@@ -26,9 +45,7 @@ export default function HomePageClient({ machineId }) {
|
||||
|
||||
const [versionInfo, setVersionInfo] = useState<any>(null);
|
||||
const [updating, setUpdating] = useState(false);
|
||||
const [updateSteps, setUpdateSteps] = useState<
|
||||
Array<{ step: string; status: string; message: string }>
|
||||
>([]);
|
||||
const [updateSteps, setUpdateSteps] = useState<UpdateStep[]>([]);
|
||||
const [updatePhase, setUpdatePhase] = useState<"idle" | "running" | "done" | "failed">("idle");
|
||||
|
||||
useEffect(() => {
|
||||
@@ -134,6 +151,155 @@ export default function HomePageClient({ machineId }) {
|
||||
},
|
||||
];
|
||||
|
||||
const pollBackgroundUpdate = useCallback(
|
||||
async ({
|
||||
channel,
|
||||
message,
|
||||
targetVersion,
|
||||
}: {
|
||||
channel: string;
|
||||
message: string;
|
||||
targetVersion: string;
|
||||
}) => {
|
||||
const notify = useNotificationStore.getState();
|
||||
const initialSteps =
|
||||
channel === "docker-compose"
|
||||
? [
|
||||
{
|
||||
step: "install",
|
||||
status: "done",
|
||||
message: message || `Queued update to v${targetVersion}.`,
|
||||
},
|
||||
{
|
||||
step: "rebuild",
|
||||
status: "running",
|
||||
message: "Docker image is rebuilding in the background.",
|
||||
},
|
||||
{
|
||||
step: "restart",
|
||||
status: "pending",
|
||||
message: "Waiting for OmniRoute to restart with the new version.",
|
||||
},
|
||||
]
|
||||
: [
|
||||
{
|
||||
step: "install",
|
||||
status: "running",
|
||||
message: message || `Installing v${targetVersion}.`,
|
||||
},
|
||||
{
|
||||
step: "restart",
|
||||
status: "pending",
|
||||
message: "Waiting for OmniRoute to restart with the new version.",
|
||||
},
|
||||
];
|
||||
|
||||
setUpdateSteps(initialSteps);
|
||||
|
||||
const maxAttempts = channel === "docker-compose" ? 72 : 36;
|
||||
|
||||
for (let attempt = 0; attempt < maxAttempts; attempt += 1) {
|
||||
await wait(5000);
|
||||
|
||||
try {
|
||||
const versionRes = await fetch("/api/system/version", { cache: "no-store" });
|
||||
if (!versionRes.ok) {
|
||||
throw new Error(`Version check returned ${versionRes.status}`);
|
||||
}
|
||||
|
||||
const latestInfo = await versionRes.json();
|
||||
setVersionInfo(latestInfo);
|
||||
|
||||
if (latestInfo.current === targetVersion) {
|
||||
setUpdateSteps((prev) => {
|
||||
let next = prev.map((step) => {
|
||||
if (step.step === "install" || step.step === "rebuild" || step.step === "restart") {
|
||||
return { ...step, status: "done" };
|
||||
}
|
||||
return step;
|
||||
});
|
||||
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "complete",
|
||||
status: "done",
|
||||
message: `OmniRoute is now running v${targetVersion}.`,
|
||||
});
|
||||
|
||||
return next;
|
||||
});
|
||||
setUpdating(false);
|
||||
setUpdatePhase("done");
|
||||
notify.success(`OmniRoute updated to v${targetVersion}.`);
|
||||
await fetchData();
|
||||
return;
|
||||
}
|
||||
|
||||
setUpdateSteps((prev) => {
|
||||
let next = prev;
|
||||
if (channel === "docker-compose") {
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "rebuild",
|
||||
status: "running",
|
||||
message: `Docker image is still rebuilding for v${targetVersion}.`,
|
||||
});
|
||||
} else {
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "install",
|
||||
status: "running",
|
||||
message: `Installing v${targetVersion} in the background.`,
|
||||
});
|
||||
}
|
||||
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "restart",
|
||||
status: "pending",
|
||||
message: `Waiting for OmniRoute to come back on v${targetVersion}.`,
|
||||
});
|
||||
|
||||
return next;
|
||||
});
|
||||
} catch {
|
||||
setUpdateSteps((prev) => {
|
||||
let next = prev;
|
||||
if (channel === "docker-compose") {
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "rebuild",
|
||||
status: "running",
|
||||
message: "Docker rebuild is still in progress.",
|
||||
});
|
||||
} else {
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "install",
|
||||
status: "running",
|
||||
message: `Installing v${targetVersion} in the background.`,
|
||||
});
|
||||
}
|
||||
|
||||
next = mergeUpdateStep(next, {
|
||||
step: "restart",
|
||||
status: "running",
|
||||
message: "Service restart in progress. Waiting for OmniRoute to come back online...",
|
||||
});
|
||||
|
||||
return next;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
setUpdateSteps((prev) =>
|
||||
mergeUpdateStep(prev, {
|
||||
step: "error",
|
||||
status: "failed",
|
||||
message: `Update started, but v${targetVersion} did not become available before timeout. Refresh the page or check server logs.`,
|
||||
})
|
||||
);
|
||||
setUpdating(false);
|
||||
setUpdatePhase("failed");
|
||||
notify.error(`Update to v${targetVersion} timed out.`);
|
||||
},
|
||||
[fetchData]
|
||||
);
|
||||
|
||||
const handleUpdate = async () => {
|
||||
const notify = useNotificationStore.getState();
|
||||
setUpdating(true);
|
||||
@@ -153,6 +319,13 @@ export default function HomePageClient({ machineId }) {
|
||||
setUpdatePhase("idle");
|
||||
return;
|
||||
}
|
||||
notify.success(data.message || "Update started.");
|
||||
await pollBackgroundUpdate({
|
||||
channel: data.channel || "docker-compose",
|
||||
message: data.message || "",
|
||||
targetVersion: data.to || data.latest,
|
||||
});
|
||||
return;
|
||||
}
|
||||
|
||||
// SSE stream — read progress events
|
||||
@@ -181,18 +354,12 @@ export default function HomePageClient({ machineId }) {
|
||||
const event = JSON.parse(line.slice(6));
|
||||
|
||||
setUpdateSteps((prev) => {
|
||||
// Replace existing step entry or add new one
|
||||
const idx = prev.findIndex((s) => s.step === event.step);
|
||||
if (idx >= 0) {
|
||||
const next = [...prev];
|
||||
next[idx] = event;
|
||||
return next;
|
||||
}
|
||||
return [...prev, event];
|
||||
return mergeUpdateStep(prev, event);
|
||||
});
|
||||
|
||||
if (event.step === "complete") {
|
||||
setUpdatePhase("done");
|
||||
setUpdating(false);
|
||||
notify.success(event.message || "Update complete!");
|
||||
} else if (event.step === "error") {
|
||||
setUpdatePhase("failed");
|
||||
@@ -242,6 +409,7 @@ export default function HomePageClient({ machineId }) {
|
||||
complete: "Complete",
|
||||
error: "Error",
|
||||
};
|
||||
const showUpdateOverlay = updatePhase !== "idle";
|
||||
|
||||
if (loading) {
|
||||
return (
|
||||
@@ -257,7 +425,7 @@ export default function HomePageClient({ machineId }) {
|
||||
return (
|
||||
<div className="flex flex-col gap-8">
|
||||
{/* Update Progress Overlay */}
|
||||
{updating && (
|
||||
{showUpdateOverlay && (
|
||||
<div className="fixed inset-0 z-[999] bg-black/60 backdrop-blur-sm flex items-center justify-center p-4">
|
||||
<div className="bg-bg-main border border-border rounded-2xl shadow-2xl max-w-md w-full p-6">
|
||||
<div className="flex items-center gap-3 mb-5">
|
||||
@@ -371,7 +539,7 @@ export default function HomePageClient({ machineId }) {
|
||||
)}
|
||||
|
||||
{/* Update Notification Banner */}
|
||||
{versionInfo?.updateAvailable && !updating && (
|
||||
{versionInfo?.updateAvailable && !showUpdateOverlay && (
|
||||
<div className="bg-primary/10 border border-primary/20 text-primary px-5 py-4 rounded-xl flex items-center justify-between min-h-[64px]">
|
||||
<div className="flex items-center gap-4">
|
||||
<span className="material-symbols-outlined text-[24px]">system_update_alt</span>
|
||||
|
||||
@@ -186,6 +186,9 @@ const COMBO_TEMPLATE_FALLBACK = {
|
||||
freeStackTitle: "Free Stack ($0)",
|
||||
freeStackDesc:
|
||||
"Round-robin across all free providers: Kiro, iFlow, Qwen, Gemini CLI. Zero cost, never stops.",
|
||||
paidPremiumTitle: "Paid Premium",
|
||||
paidPremiumDesc:
|
||||
"Round-robin across paid subscriptions: Cursor, Antigravity. Top-tier models, distributed load.",
|
||||
};
|
||||
|
||||
const COMBO_TEMPLATES = [
|
||||
@@ -250,6 +253,21 @@ const COMBO_TEMPLATES = [
|
||||
healthCheckEnabled: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "paid-premium",
|
||||
icon: "workspace_premium",
|
||||
titleKey: "templatePaidPremium",
|
||||
descKey: "templatePaidPremiumDesc",
|
||||
fallbackTitle: COMBO_TEMPLATE_FALLBACK.paidPremiumTitle,
|
||||
fallbackDesc: COMBO_TEMPLATE_FALLBACK.paidPremiumDesc,
|
||||
strategy: "round-robin",
|
||||
suggestedName: "paid-premium",
|
||||
config: {
|
||||
maxRetries: 2,
|
||||
retryDelayMs: 1000,
|
||||
healthCheckEnabled: true,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
function getStrategyMeta(strategy) {
|
||||
@@ -1425,18 +1443,27 @@ function ComboFormModal({ isOpen, combo, onClose, onSave, activeProviders }) {
|
||||
{ model: "kr/claude-sonnet-4.5", weight: 0 },
|
||||
{ model: "if/kimi-k2-thinking", weight: 0 },
|
||||
{ model: "if/qwen3-coder-plus", weight: 0 },
|
||||
{ model: "qw/qwen3-coder-plus", weight: 0 },
|
||||
{ model: "if/deepseek-v3.2", weight: 0 },
|
||||
{ model: "nvidia/llama-3.3-70b-instruct", weight: 0 },
|
||||
{ model: "groq/llama-3.3-70b-versatile", weight: 0 },
|
||||
];
|
||||
|
||||
const PAID_PREMIUM_PRESET_MODELS = [
|
||||
{ model: "cu/claude-4.6-opus-high", weight: 0 },
|
||||
{ model: "ag/claude-sonnet-4-6", weight: 0 },
|
||||
{ model: "cu/claude-4.6-sonnet-high", weight: 0 },
|
||||
{ model: "ag/gpt-5", weight: 0 },
|
||||
{ model: "ag/gemini-3.1-pro-preview", weight: 0 },
|
||||
];
|
||||
|
||||
const applyTemplate = (template) => {
|
||||
setStrategy(template.strategy);
|
||||
setConfig((prev) => ({ ...prev, ...template.config }));
|
||||
if (!name.trim()) setName(template.suggestedName);
|
||||
// Pre-fill Free Stack with 7 real free provider models
|
||||
if (template.id === "free-stack") {
|
||||
setModels(FREE_STACK_PRESET_MODELS);
|
||||
} else if (template.id === "paid-premium") {
|
||||
setModels(PAID_PREMIUM_PRESET_MODELS);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -4,69 +4,190 @@ import { useState, useEffect } from "react";
|
||||
import { Card } from "@/shared/components";
|
||||
import { useTranslations } from "next-intl";
|
||||
|
||||
interface CacheMetrics {
|
||||
totalRequests: number;
|
||||
requestsWithCacheControl: number;
|
||||
totalInputTokens: number;
|
||||
totalCachedTokens: number;
|
||||
totalCacheCreationTokens: number;
|
||||
tokensSaved: number;
|
||||
estimatedCostSaved: number;
|
||||
byProvider: Record<
|
||||
string,
|
||||
{
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
>;
|
||||
byStrategy: Record<
|
||||
string,
|
||||
{
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
>;
|
||||
lastUpdated: string;
|
||||
}
|
||||
|
||||
export default function CacheStatsCard() {
|
||||
const [cache, setCache] = useState(null);
|
||||
const [flushing, setFlushing] = useState(false);
|
||||
const [metrics, setMetrics] = useState<CacheMetrics | null>(null);
|
||||
const [resetting, setResetting] = useState(false);
|
||||
const t = useTranslations("settings");
|
||||
|
||||
const fetchStats = () => {
|
||||
fetch("/api/cache/stats")
|
||||
const fetchMetrics = () => {
|
||||
fetch("/api/settings/cache-metrics")
|
||||
.then((r) => r.json())
|
||||
.then(setCache)
|
||||
.then(setMetrics)
|
||||
.catch(() => {});
|
||||
};
|
||||
|
||||
useEffect(fetchStats, []);
|
||||
useEffect(fetchMetrics, []);
|
||||
|
||||
const handleFlush = async () => {
|
||||
setFlushing(true);
|
||||
const handleReset = async () => {
|
||||
setResetting(true);
|
||||
try {
|
||||
await fetch("/api/cache/stats", { method: "DELETE" });
|
||||
fetchStats();
|
||||
await fetch("/api/settings/cache-metrics", { method: "DELETE" });
|
||||
fetchMetrics();
|
||||
} finally {
|
||||
setFlushing(false);
|
||||
setResetting(false);
|
||||
}
|
||||
};
|
||||
|
||||
const cacheHitRate =
|
||||
metrics && metrics.totalInputTokens > 0
|
||||
? (metrics.totalCachedTokens / metrics.totalInputTokens) * 100
|
||||
: 0;
|
||||
|
||||
return (
|
||||
<Card className="p-6">
|
||||
<div className="flex items-center justify-between mb-4">
|
||||
<h3 className="text-lg font-semibold text-text-main flex items-center gap-2">
|
||||
<span className="material-symbols-outlined text-[20px]">cached</span>
|
||||
{t("promptCache")}
|
||||
<span className="material-symbols-outlined text-[20px]">insights</span>
|
||||
Prompt Cache Metrics
|
||||
</h3>
|
||||
<button
|
||||
onClick={handleFlush}
|
||||
disabled={flushing}
|
||||
onClick={handleReset}
|
||||
disabled={resetting}
|
||||
className="px-3 py-1.5 text-xs rounded-lg bg-red-500/10 text-red-400 hover:bg-red-500/20 transition-colors disabled:opacity-50"
|
||||
>
|
||||
{flushing ? t("flushing") : t("flushCache")}
|
||||
{resetting ? "Resetting..." : "Reset Metrics"}
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{cache ? (
|
||||
<div className="grid grid-cols-2 gap-4 text-sm">
|
||||
<div>
|
||||
<p className="text-text-muted">{t("size")}</p>
|
||||
<p className="font-mono text-lg text-text-main">
|
||||
{cache.size}/{cache.maxSize}
|
||||
</p>
|
||||
{metrics ? (
|
||||
<div className="space-y-4">
|
||||
{/* Overview Stats */}
|
||||
<div className="grid grid-cols-2 gap-4 text-sm">
|
||||
<div>
|
||||
<p className="text-text-muted">Total Requests</p>
|
||||
<p className="font-mono text-lg text-text-main">{metrics.totalRequests}</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">With Cache Control</p>
|
||||
<p className="font-mono text-lg text-text-main">{metrics.requestsWithCacheControl}</p>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">{t("hitRate")}</p>
|
||||
<p className="font-mono text-lg text-text-main">{cache.hitRate?.toFixed(1) ?? 0}%</p>
|
||||
|
||||
{/* Token Stats */}
|
||||
<div className="grid grid-cols-3 gap-4 text-sm">
|
||||
<div>
|
||||
<p className="text-text-muted">Input Tokens</p>
|
||||
<p className="font-mono text-lg text-text-main">
|
||||
{metrics.totalInputTokens.toLocaleString()}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">Cached Tokens (Read)</p>
|
||||
<p className="font-mono text-lg text-green-400">
|
||||
{metrics.totalCachedTokens.toLocaleString()}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">Cache Creation (Write)</p>
|
||||
<p className="font-mono text-lg text-blue-400">
|
||||
{metrics.totalCacheCreationTokens.toLocaleString()}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">{t("hits")}</p>
|
||||
<p className="font-mono text-text-main">{cache.hits ?? 0}</p>
|
||||
|
||||
{/* Cache Ratio */}
|
||||
<div className="rounded-lg bg-surface/50 border border-border/30 p-3">
|
||||
<div className="flex items-center justify-between">
|
||||
<div>
|
||||
<p className="text-sm font-medium text-text-main">Cache Reuse Ratio</p>
|
||||
<p className="text-xs text-text-muted">Cached tokens / Total input tokens</p>
|
||||
</div>
|
||||
<p className="font-mono text-xl text-green-400">{cacheHitRate.toFixed(1)}%</p>
|
||||
</div>
|
||||
{/* Progress bar */}
|
||||
<div className="mt-2 h-2 rounded-full bg-border/30 overflow-hidden">
|
||||
<div
|
||||
className="h-full bg-green-500 transition-all duration-300"
|
||||
style={{ width: `${Math.min(cacheHitRate, 100)}%` }}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">{t("evictions")}</p>
|
||||
<p className="font-mono text-text-main">{cache.evictions ?? 0}</p>
|
||||
|
||||
{/* Savings */}
|
||||
<div className="grid grid-cols-2 gap-4 text-sm">
|
||||
<div>
|
||||
<p className="text-text-muted">Tokens Saved</p>
|
||||
<p className="font-mono text-lg text-green-400">
|
||||
{metrics.tokensSaved.toLocaleString()}
|
||||
</p>
|
||||
</div>
|
||||
<div>
|
||||
<p className="text-text-muted">Est. Cost Saved</p>
|
||||
<p className="font-mono text-lg text-green-400">
|
||||
${metrics.estimatedCostSaved.toFixed(4)}
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* By Provider */}
|
||||
{Object.keys(metrics.byProvider).length > 0 && (
|
||||
<div className="pt-3 border-t border-border/30">
|
||||
<p className="text-xs font-medium text-text-muted mb-2">By Provider</p>
|
||||
<div className="space-y-2">
|
||||
{Object.entries(metrics.byProvider).map(([provider, stats]) => {
|
||||
const providerCacheRate =
|
||||
stats.inputTokens > 0 ? (stats.cachedTokens / stats.inputTokens) * 100 : 0;
|
||||
return (
|
||||
<div
|
||||
key={provider}
|
||||
className="flex items-center justify-between px-3 py-2 rounded bg-surface/30 text-xs"
|
||||
>
|
||||
<div className="flex items-center gap-3">
|
||||
<span className="text-text-main capitalize w-24">{provider}</span>
|
||||
<span className="text-text-muted">{stats.requests} reqs</span>
|
||||
</div>
|
||||
<div className="flex items-center gap-4 font-mono">
|
||||
<span className="text-text-muted" title="Input tokens">
|
||||
In: {stats.inputTokens.toLocaleString()}
|
||||
</span>
|
||||
<span className="text-green-400" title="Cached tokens (reads)">
|
||||
Cached: {stats.cachedTokens.toLocaleString()}
|
||||
</span>
|
||||
<span className="text-blue-400" title="Cache creation tokens (writes)">
|
||||
Write: {stats.cacheCreationTokens.toLocaleString()}
|
||||
</span>
|
||||
<span className="text-green-400 w-12 text-right">
|
||||
{providerCacheRate.toFixed(0)}%
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<p className="text-sm text-text-muted">{t("loadingCacheStats")}</p>
|
||||
<p className="text-sm text-text-muted">Loading cache metrics...</p>
|
||||
)}
|
||||
</Card>
|
||||
);
|
||||
|
||||
@@ -19,7 +19,10 @@ const STRATEGIES = ROUTING_STRATEGIES.filter((strategy) =>
|
||||
}));
|
||||
|
||||
export default function RoutingTab() {
|
||||
const [settings, setSettings] = useState<any>({ fallbackStrategy: "fill-first" });
|
||||
const [settings, setSettings] = useState<any>({
|
||||
fallbackStrategy: "fill-first",
|
||||
alwaysPreserveClientCache: "auto",
|
||||
});
|
||||
const [loading, setLoading] = useState(true);
|
||||
const [aliases, setAliases] = useState([]);
|
||||
const [newPattern, setNewPattern] = useState("");
|
||||
@@ -218,6 +221,74 @@ export default function RoutingTab() {
|
||||
|
||||
{/* Fallback Chains */}
|
||||
<FallbackChainsEditor />
|
||||
|
||||
{/* Client Cache Control */}
|
||||
<Card>
|
||||
<div className="flex items-center gap-3 mb-4">
|
||||
<div className="p-2 rounded-lg bg-green-500/10 text-green-500">
|
||||
<span className="material-symbols-outlined text-[20px]" aria-hidden="true">
|
||||
cached
|
||||
</span>
|
||||
</div>
|
||||
<div>
|
||||
<h3 className="text-lg font-semibold">Client Cache Control</h3>
|
||||
<p className="text-sm text-text-muted">
|
||||
Configure how client-side cache_control headers are handled
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="space-y-3">
|
||||
{[
|
||||
{
|
||||
value: "auto",
|
||||
label: "Auto (Recommended)",
|
||||
desc: "Preserve cache_control only for caching-aware clients (Claude Code) with deterministic routing",
|
||||
},
|
||||
{
|
||||
value: "always",
|
||||
label: "Always Preserve",
|
||||
desc: "Always forward client cache_control headers to upstream providers",
|
||||
},
|
||||
{
|
||||
value: "never",
|
||||
label: "Never Preserve",
|
||||
desc: "Always remove client cache_control headers, let OmniRoute manage caching",
|
||||
},
|
||||
].map((option) => (
|
||||
<button
|
||||
key={option.value}
|
||||
onClick={() => updateSetting({ alwaysPreserveClientCache: option.value })}
|
||||
disabled={loading}
|
||||
className={`w-full flex flex-col items-start gap-1 p-3 rounded-lg border text-left transition-all ${
|
||||
settings.alwaysPreserveClientCache === option.value
|
||||
? "border-green-500/50 bg-green-500/5 ring-1 ring-green-500/20"
|
||||
: "border-border/50 hover:border-border hover:bg-surface/30"
|
||||
}`}
|
||||
>
|
||||
<div className="flex items-center gap-2">
|
||||
<span
|
||||
className={`material-symbols-outlined text-[16px] ${
|
||||
settings.alwaysPreserveClientCache === option.value
|
||||
? "text-green-400"
|
||||
: "text-text-muted"
|
||||
}`}
|
||||
>
|
||||
{settings.alwaysPreserveClientCache === option.value
|
||||
? "check_circle"
|
||||
: "radio_button_unchecked"}
|
||||
</span>
|
||||
<span
|
||||
className={`text-sm font-medium ${settings.alwaysPreserveClientCache === option.value ? "text-green-400" : ""}`}
|
||||
>
|
||||
{option.label}
|
||||
</span>
|
||||
</div>
|
||||
<p className="text-xs text-text-muted ml-7">{option.desc}</p>
|
||||
</button>
|
||||
))}
|
||||
</div>
|
||||
</Card>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -98,7 +98,10 @@ export async function GET() {
|
||||
|
||||
await Promise.all(
|
||||
settingsTools.map(async (toolId) => {
|
||||
if (!statuses[toolId]?.installed || !statuses[toolId]?.runnable) {
|
||||
if (!statuses[toolId]) {
|
||||
return;
|
||||
}
|
||||
if (!statuses[toolId].installed || !statuses[toolId].runnable) {
|
||||
statuses[toolId].configStatus = "not_installed";
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { getSettings, updateSettings } from "@/lib/localDb";
|
||||
import { updateAutoDisableAccountsSchema } from "@/shared/validation/schemas";
|
||||
import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
|
||||
|
||||
export async function GET() {
|
||||
try {
|
||||
const settings = await getSettings();
|
||||
return NextResponse.json({
|
||||
enabled: settings.autoDisableBannedAccounts ?? false,
|
||||
threshold: settings.autoDisableBannedThreshold ?? 3,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error reading auto-disable accounts config:", error);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to read auto-disable accounts config" },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function PUT(request: Request) {
|
||||
let rawBody: unknown;
|
||||
try {
|
||||
rawBody = await request.json();
|
||||
} catch {
|
||||
return NextResponse.json(
|
||||
{ error: { message: "Invalid request", details: [{ field: "body", message: "Invalid JSON body" }] } },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const validation = validateBody(updateAutoDisableAccountsSchema, rawBody);
|
||||
if (isValidationFailure(validation)) {
|
||||
return NextResponse.json({ error: validation.error }, { status: 400 });
|
||||
}
|
||||
const body = validation.data;
|
||||
|
||||
await updateSettings({
|
||||
autoDisableBannedAccounts: body.enabled,
|
||||
...(body.threshold !== undefined && { autoDisableBannedThreshold: body.threshold }),
|
||||
});
|
||||
|
||||
const settings = await getSettings();
|
||||
return NextResponse.json({
|
||||
enabled: settings.autoDisableBannedAccounts ?? false,
|
||||
threshold: settings.autoDisableBannedThreshold ?? 3,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error updating auto-disable accounts config:", error);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to update auto-disable accounts config" },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { getCacheMetrics, resetCacheMetrics } from "@/lib/db/settings";
|
||||
|
||||
export async function GET() {
|
||||
try {
|
||||
const metrics = await getCacheMetrics();
|
||||
return NextResponse.json(metrics);
|
||||
} catch (error) {
|
||||
console.error("Error getting cache metrics:", error);
|
||||
return NextResponse.json({ error: "Failed to load cache metrics" }, { status: 500 });
|
||||
}
|
||||
}
|
||||
|
||||
export async function DELETE() {
|
||||
try {
|
||||
const metrics = await resetCacheMetrics();
|
||||
return NextResponse.json(metrics);
|
||||
} catch (error) {
|
||||
console.error("Error resetting cache metrics:", error);
|
||||
return NextResponse.json({ error: "Failed to reset cache metrics" }, { status: 500 });
|
||||
}
|
||||
}
|
||||
@@ -119,6 +119,12 @@ export async function PATCH(request) {
|
||||
invalidateCallLogsMaxCache();
|
||||
}
|
||||
|
||||
// Sync cache control settings to runtime cache
|
||||
if ("alwaysPreserveClientCache" in body) {
|
||||
const { invalidateCacheControlSettingsCache } = await import("@/lib/cacheControlSettings");
|
||||
invalidateCacheControlSettingsCache();
|
||||
}
|
||||
|
||||
const { password, ...safeSettings } = settings;
|
||||
return NextResponse.json(safeSettings);
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/**
|
||||
* GET /api/system/version — Returns current version and latest available on npm
|
||||
* POST /api/system/update — Triggers npm install -g omniroute@latest + pm2 restart
|
||||
* POST /api/system/version — Triggers a deployment-aware background update
|
||||
*
|
||||
* Security: Requires admin authentication (same as other management routes).
|
||||
* Safety: Update only runs if a newer version is available on npm.
|
||||
@@ -9,12 +9,16 @@ import { NextRequest, NextResponse } from "next/server";
|
||||
import { execFile } from "child_process";
|
||||
import { promisify } from "util";
|
||||
import { isAuthenticated } from "@/shared/utils/apiAuth";
|
||||
import {
|
||||
getAutoUpdateConfig,
|
||||
launchAutoUpdate,
|
||||
validateAutoUpdateRuntime,
|
||||
} from "@/lib/system/autoUpdate";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
export const dynamic = "force-dynamic";
|
||||
|
||||
/** Fetch latest version from npm registry (no install, just metadata) */
|
||||
async function getLatestNpmVersion(): Promise<string | null> {
|
||||
try {
|
||||
const { stdout } = await execFileAsync("npm", ["info", "omniroute", "version", "--json"], {
|
||||
@@ -27,7 +31,6 @@ async function getLatestNpmVersion(): Promise<string | null> {
|
||||
}
|
||||
}
|
||||
|
||||
/** Current installed version from package.json */
|
||||
function getCurrentVersion(): string {
|
||||
try {
|
||||
return require("../../../../../package.json").version as string;
|
||||
@@ -36,7 +39,6 @@ function getCurrentVersion(): string {
|
||||
}
|
||||
}
|
||||
|
||||
/** Compare semver strings — returns true if a > b */
|
||||
function isNewer(a: string | null, b: string): boolean {
|
||||
if (!a) return false;
|
||||
const parse = (v: string) => v.split(".").map(Number);
|
||||
@@ -48,24 +50,28 @@ function isNewer(a: string | null, b: string): boolean {
|
||||
}
|
||||
|
||||
export async function GET(req: NextRequest) {
|
||||
if (!isAuthenticated(req)) {
|
||||
if (!(await isAuthenticated(req))) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
const current = getCurrentVersion();
|
||||
const latest = await getLatestNpmVersion();
|
||||
const updateAvailable = isNewer(latest, current);
|
||||
const config = getAutoUpdateConfig();
|
||||
const validation = await validateAutoUpdateRuntime(config);
|
||||
|
||||
return NextResponse.json({
|
||||
current,
|
||||
latest: latest ?? "unavailable",
|
||||
updateAvailable,
|
||||
channel: "npm",
|
||||
channel: config.mode,
|
||||
autoUpdateSupported: validation.supported,
|
||||
autoUpdateError: validation.reason,
|
||||
});
|
||||
}
|
||||
|
||||
export async function POST(req: NextRequest) {
|
||||
if (!isAuthenticated(req)) {
|
||||
if (!(await isAuthenticated(req))) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
|
||||
@@ -88,7 +94,34 @@ export async function POST(req: NextRequest) {
|
||||
});
|
||||
}
|
||||
|
||||
// Stream progress events so the frontend can show real-time status
|
||||
const config = getAutoUpdateConfig();
|
||||
|
||||
// If we are in docker-compose mode, use the detached shell script background updates
|
||||
if (config.mode === "docker-compose") {
|
||||
const launched = await launchAutoUpdate({ latest });
|
||||
if (!launched.started) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
success: false,
|
||||
error: launched.error || "Failed to start auto-update.",
|
||||
channel: launched.channel,
|
||||
logPath: launched.logPath,
|
||||
},
|
||||
{ status: 503 }
|
||||
);
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
success: true,
|
||||
message: `Update to v${latest} started. Docker rebuild is running in the background.`,
|
||||
from: current,
|
||||
to: latest,
|
||||
channel: launched.channel,
|
||||
logPath: launched.logPath,
|
||||
});
|
||||
}
|
||||
|
||||
// Stream progress events so the frontend can show real-time status for NPM/PM2 mode
|
||||
const encoder = new TextEncoder();
|
||||
const stream = new ReadableStream({
|
||||
async start(controller) {
|
||||
|
||||
@@ -14,7 +14,7 @@ import {
|
||||
type EmbeddingProviderNodeRow,
|
||||
type EmbeddingProvider,
|
||||
} from "@omniroute/open-sse/config/embeddingRegistry.ts";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import * as log from "@/sse/utils/logger";
|
||||
import { toJsonErrorPayload } from "@/shared/utils/upstreamError";
|
||||
@@ -209,6 +209,14 @@ export async function POST(request) {
|
||||
`No credentials for embedding provider: ${provider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${provider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const result = await handleEmbedding({
|
||||
|
||||
@@ -11,7 +11,7 @@ import {
|
||||
getAllImageModels,
|
||||
getImageProvider,
|
||||
} from "@omniroute/open-sse/config/imageRegistry.ts";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import * as log from "@/sse/utils/logger";
|
||||
import { toJsonErrorPayload } from "@/shared/utils/upstreamError";
|
||||
@@ -156,8 +156,15 @@ export async function POST(request) {
|
||||
`No credentials for image provider: ${provider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${provider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
} else if (isCustomModel) {
|
||||
// Custom models need credentials from the provider connection
|
||||
credentials = await getProviderCredentials(provider);
|
||||
if (!credentials) {
|
||||
return errorResponse(
|
||||
@@ -165,6 +172,14 @@ export async function POST(request) {
|
||||
`No credentials for custom image provider: ${provider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${provider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const result = await handleImageGeneration({
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { CORS_ORIGIN } from "@/shared/utils/cors";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import { getRegistryEntry } from "@omniroute/open-sse/config/providerRegistry.ts";
|
||||
import {
|
||||
@@ -85,6 +85,14 @@ export async function POST(request, { params }) {
|
||||
if (!credentials) {
|
||||
return errorResponse(HTTP_STATUS.BAD_REQUEST, `No credentials for provider: ${rawProvider}`);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${rawProvider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
|
||||
const result = await handleEmbedding({ body, credentials, log });
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { CORS_ORIGIN } from "@/shared/utils/cors";
|
||||
import { handleImageGeneration } from "@omniroute/open-sse/handlers/imageGeneration.ts";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import {
|
||||
getProviderCredentials,
|
||||
@@ -85,6 +85,14 @@ export async function POST(request, { params }) {
|
||||
`No credentials for image provider: ${rawProvider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${rawProvider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
|
||||
const result = await handleImageGeneration({ body, credentials, log });
|
||||
|
||||
|
||||
@@ -0,0 +1,25 @@
|
||||
/**
|
||||
* Cache Control Settings
|
||||
*
|
||||
* Provides cached access to cache control settings for performance.
|
||||
* Settings are fetched once and cached to avoid repeated DB hits.
|
||||
*/
|
||||
|
||||
import { getSettings } from "./db/settings";
|
||||
import type { CacheControlMode } from "@omniroute/open-sse/utils/cacheControlPolicy";
|
||||
|
||||
let cachedSettings: CacheControlMode | null = null;
|
||||
|
||||
export async function getCacheControlSettings(): Promise<CacheControlMode> {
|
||||
if (cachedSettings !== null) {
|
||||
return cachedSettings;
|
||||
}
|
||||
|
||||
const settings = await getSettings();
|
||||
cachedSettings = (settings.alwaysPreserveClientCache as CacheControlMode) || "auto";
|
||||
return cachedSettings;
|
||||
}
|
||||
|
||||
export function invalidateCacheControlSettingsCache() {
|
||||
cachedSettings = null;
|
||||
}
|
||||
@@ -46,6 +46,7 @@ export async function getSettings() {
|
||||
stickyRoundRobinLimit: 3,
|
||||
requireLogin: true,
|
||||
hiddenSidebarItems: [],
|
||||
alwaysPreserveClientCache: "auto",
|
||||
};
|
||||
for (const row of rows) {
|
||||
const record = toRecord(row);
|
||||
@@ -486,3 +487,177 @@ export async function setProxyConfig(config: Record<string, unknown>) {
|
||||
backupDbFile("pre-write");
|
||||
return current;
|
||||
}
|
||||
|
||||
// ──────────────── Cache Control Metrics ────────────────
|
||||
// Cache metrics are now computed from usage_history table on-the-fly
|
||||
// This avoids race conditions and keeps a single source of truth for token data
|
||||
|
||||
export async function getCacheMetrics() {
|
||||
const db = getDbInstance();
|
||||
|
||||
try {
|
||||
// Aggregate totals from usage_history
|
||||
const totalsRow = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
COUNT(*) as totalRequests,
|
||||
SUM(tokens_input) as totalInputTokens,
|
||||
SUM(tokens_cache_read) as totalCachedTokens,
|
||||
SUM(tokens_cache_creation) as totalCacheCreationTokens
|
||||
FROM usage_history
|
||||
WHERE tokens_cache_read > 0 OR tokens_cache_creation > 0
|
||||
`
|
||||
)
|
||||
.get() as
|
||||
| {
|
||||
totalRequests: number;
|
||||
totalInputTokens: number | null;
|
||||
totalCachedTokens: number | null;
|
||||
totalCacheCreationTokens: number | null;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
// Get all requests count (including those without cache activity)
|
||||
const allRequestsRow = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT COUNT(*) as totalRequests
|
||||
FROM usage_history
|
||||
`
|
||||
)
|
||||
.get() as { totalRequests: number } | undefined;
|
||||
|
||||
// Aggregate by provider
|
||||
const byProviderRows = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
provider,
|
||||
COUNT(*) as requests,
|
||||
SUM(tokens_input) as inputTokens,
|
||||
SUM(tokens_cache_read) as cachedTokens,
|
||||
SUM(tokens_cache_creation) as cacheCreationTokens
|
||||
FROM usage_history
|
||||
WHERE (tokens_cache_read > 0 OR tokens_cache_creation > 0)
|
||||
AND provider IS NOT NULL
|
||||
GROUP BY provider
|
||||
`
|
||||
)
|
||||
.all() as Array<{
|
||||
provider: string;
|
||||
requests: number;
|
||||
inputTokens: number | null;
|
||||
cachedTokens: number | null;
|
||||
cacheCreationTokens: number | null;
|
||||
}>;
|
||||
|
||||
// Aggregate by strategy
|
||||
// Since combo_strategy isn't tracked in usage_history yet, we use 'direct' for all requests
|
||||
// TODO: Add combo_strategy column to usage_history for proper strategy tracking
|
||||
const byStrategyRows = db
|
||||
.prepare(
|
||||
`
|
||||
SELECT
|
||||
'direct' as strategy,
|
||||
COUNT(*) as requests,
|
||||
SUM(tokens_input) as inputTokens,
|
||||
SUM(tokens_cache_read) as cachedTokens,
|
||||
SUM(tokens_cache_creation) as cacheCreationTokens
|
||||
FROM usage_history
|
||||
WHERE (tokens_cache_read > 0 OR tokens_cache_creation > 0)
|
||||
GROUP BY 'direct'
|
||||
`
|
||||
)
|
||||
.all() as Array<{
|
||||
strategy: string;
|
||||
requests: number;
|
||||
inputTokens: number | null;
|
||||
cachedTokens: number | null;
|
||||
cacheCreationTokens: number | null;
|
||||
}>;
|
||||
|
||||
// Calculate tokens saved (cached tokens are reused, not charged at full price)
|
||||
const tokensSaved = totalsRow?.totalCachedTokens || 0;
|
||||
|
||||
// Build byProvider object
|
||||
const byProvider: Record<
|
||||
string,
|
||||
{
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
> = {};
|
||||
for (const row of byProviderRows) {
|
||||
byProvider[row.provider] = {
|
||||
requests: row.requests,
|
||||
inputTokens: row.inputTokens || 0,
|
||||
cachedTokens: row.cachedTokens || 0,
|
||||
cacheCreationTokens: row.cacheCreationTokens || 0,
|
||||
};
|
||||
}
|
||||
|
||||
// Build byStrategy object
|
||||
const byStrategy: Record<
|
||||
string,
|
||||
{
|
||||
requests: number;
|
||||
inputTokens: number;
|
||||
cachedTokens: number;
|
||||
cacheCreationTokens: number;
|
||||
}
|
||||
> = {};
|
||||
for (const row of byStrategyRows) {
|
||||
byStrategy[row.strategy] = {
|
||||
requests: row.requests,
|
||||
inputTokens: row.inputTokens || 0,
|
||||
cachedTokens: row.cachedTokens || 0,
|
||||
cacheCreationTokens: row.cacheCreationTokens || 0,
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
totalRequests: allRequestsRow?.totalRequests || totalsRow?.totalRequests || 0,
|
||||
requestsWithCacheControl: totalsRow?.totalRequests || 0,
|
||||
totalInputTokens: totalsRow?.totalInputTokens || 0,
|
||||
totalCachedTokens: totalsRow?.totalCachedTokens || 0,
|
||||
totalCacheCreationTokens: totalsRow?.totalCacheCreationTokens || 0,
|
||||
tokensSaved,
|
||||
estimatedCostSaved: 0, // Would need pricing data to calculate
|
||||
byProvider,
|
||||
byStrategy,
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
} catch (error) {
|
||||
console.error("Failed to fetch cache metrics from usage_history:", error);
|
||||
return {
|
||||
totalRequests: 0,
|
||||
requestsWithCacheControl: 0,
|
||||
totalInputTokens: 0,
|
||||
totalCachedTokens: 0,
|
||||
totalCacheCreationTokens: 0,
|
||||
tokensSaved: 0,
|
||||
estimatedCostSaved: 0,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
export async function updateCacheMetrics(_metrics: Record<string, unknown>) {
|
||||
// No-op: metrics are now computed from usage_history on-the-fly
|
||||
// The usage_history table is the single source of truth
|
||||
return getCacheMetrics();
|
||||
}
|
||||
|
||||
export async function resetCacheMetrics() {
|
||||
// No-op: cannot delete historical usage data
|
||||
// Cache metrics are computed from usage_history, so they reflect actual request history
|
||||
console.warn(
|
||||
"resetCacheMetrics is deprecated - cache metrics are now computed from usage_history"
|
||||
);
|
||||
return getCacheMetrics();
|
||||
}
|
||||
|
||||
@@ -0,0 +1,258 @@
|
||||
import { execFile, spawn } from "node:child_process";
|
||||
import { closeSync, mkdirSync, openSync } from "node:fs";
|
||||
import { access } from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
import { promisify } from "node:util";
|
||||
|
||||
const execFileAsync = promisify(execFile);
|
||||
|
||||
type ComposeCommand = "docker compose" | "docker-compose";
|
||||
export type AutoUpdateMode = "npm" | "docker-compose";
|
||||
|
||||
type ExecFileLike = typeof execFileAsync;
|
||||
type SpawnLike = typeof spawn;
|
||||
|
||||
export type AutoUpdateConfig = {
|
||||
mode: AutoUpdateMode;
|
||||
repoDir: string;
|
||||
composeFile: string;
|
||||
composeProfile: string;
|
||||
composeService: string;
|
||||
gitRemote: string;
|
||||
patchCommits: string[];
|
||||
logPath: string;
|
||||
};
|
||||
|
||||
export type AutoUpdateValidation = {
|
||||
supported: boolean;
|
||||
reason: string | null;
|
||||
composeCommand: ComposeCommand | null;
|
||||
};
|
||||
|
||||
export type AutoUpdateLaunchResult = {
|
||||
started: boolean;
|
||||
channel: AutoUpdateMode;
|
||||
logPath: string;
|
||||
composeCommand: ComposeCommand | null;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
function normalizeMode(raw: string | undefined): AutoUpdateMode {
|
||||
return raw === "docker-compose" ? "docker-compose" : "npm";
|
||||
}
|
||||
|
||||
async function pathExists(targetPath: string): Promise<boolean> {
|
||||
try {
|
||||
await access(targetPath);
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function shellQuote(value: string): string {
|
||||
return `'${value.replace(/'/g, `'"'"'`)}'`;
|
||||
}
|
||||
|
||||
function parsePatchCommits(raw: string | undefined): string[] {
|
||||
return (raw || "").split(/[\s,]+/).map((value) => value.trim()).filter(Boolean);
|
||||
}
|
||||
|
||||
export function getAutoUpdateConfig(env: NodeJS.ProcessEnv = process.env): AutoUpdateConfig {
|
||||
const dataDir = env.DATA_DIR || "/tmp/omniroute";
|
||||
const repoDir = env.AUTO_UPDATE_REPO_DIR || "/workspace/omniroute";
|
||||
|
||||
return {
|
||||
mode: normalizeMode(env.AUTO_UPDATE_MODE),
|
||||
repoDir,
|
||||
composeFile: env.AUTO_UPDATE_COMPOSE_FILE || path.join(repoDir, "docker-compose.yml"),
|
||||
composeProfile: env.AUTO_UPDATE_COMPOSE_PROFILE || "cli",
|
||||
composeService: env.AUTO_UPDATE_SERVICE || "omniroute-cli",
|
||||
gitRemote: env.AUTO_UPDATE_GIT_REMOTE || "origin",
|
||||
patchCommits: parsePatchCommits(env.AUTO_UPDATE_PATCH_COMMITS),
|
||||
logPath: env.AUTO_UPDATE_LOG_PATH || path.join(dataDir, "logs", "auto-update.log"),
|
||||
};
|
||||
}
|
||||
|
||||
export async function detectComposeCommand(
|
||||
execFileImpl: ExecFileLike = execFileAsync
|
||||
): Promise<ComposeCommand | null> {
|
||||
try {
|
||||
await execFileImpl("docker", ["compose", "version"], { timeout: 10_000 });
|
||||
return "docker compose";
|
||||
} catch {
|
||||
// Fall through.
|
||||
}
|
||||
|
||||
try {
|
||||
await execFileImpl("docker-compose", ["version"], { timeout: 10_000 });
|
||||
return "docker-compose";
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
export async function validateAutoUpdateRuntime(
|
||||
config: AutoUpdateConfig,
|
||||
execFileImpl: ExecFileLike = execFileAsync,
|
||||
existsImpl: (targetPath: string) => Promise<boolean> = pathExists
|
||||
): Promise<AutoUpdateValidation> {
|
||||
if (config.mode !== "docker-compose") {
|
||||
return { supported: true, reason: null, composeCommand: null };
|
||||
}
|
||||
|
||||
if (!(await existsImpl(config.repoDir))) {
|
||||
return {
|
||||
supported: false,
|
||||
reason: `Repository directory not found: ${config.repoDir}`,
|
||||
composeCommand: null,
|
||||
};
|
||||
}
|
||||
|
||||
if (!(await existsImpl(config.composeFile))) {
|
||||
return {
|
||||
supported: false,
|
||||
reason: `Compose file not found: ${config.composeFile}`,
|
||||
composeCommand: null,
|
||||
};
|
||||
}
|
||||
|
||||
if (!(await existsImpl("/var/run/docker.sock"))) {
|
||||
return {
|
||||
supported: false,
|
||||
reason: "Docker socket is not mounted into the OmniRoute container.",
|
||||
composeCommand: null,
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
await execFileImpl("git", ["--version"], { timeout: 10_000 });
|
||||
} catch {
|
||||
return {
|
||||
supported: false,
|
||||
reason: "git is not available inside the OmniRoute container.",
|
||||
composeCommand: null,
|
||||
};
|
||||
}
|
||||
|
||||
const composeCommand = await detectComposeCommand(execFileImpl);
|
||||
if (!composeCommand) {
|
||||
return {
|
||||
supported: false,
|
||||
reason: "Neither docker compose nor docker-compose is available inside the OmniRoute container.",
|
||||
composeCommand: null,
|
||||
};
|
||||
}
|
||||
|
||||
return { supported: true, reason: null, composeCommand };
|
||||
}
|
||||
|
||||
export function buildNpmUpdateScript(latest: string): string {
|
||||
return [
|
||||
"set -eu",
|
||||
`npm install -g omniroute@${latest} --ignore-scripts`,
|
||||
"if command -v pm2 >/dev/null 2>&1; then",
|
||||
" pm2 restart omniroute || true",
|
||||
"fi",
|
||||
`echo \"[AutoUpdate] Successfully updated to v${latest}.\"`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
export function buildDockerComposeUpdateScript({
|
||||
latest,
|
||||
config,
|
||||
composeCommand,
|
||||
}: {
|
||||
latest: string;
|
||||
config: AutoUpdateConfig;
|
||||
composeCommand: ComposeCommand;
|
||||
}): string {
|
||||
const targetTag = latest.startsWith("v") ? latest : `v${latest}`;
|
||||
const composeInvocation =
|
||||
composeCommand === "docker compose"
|
||||
? 'docker compose -f "$COMPOSE_FILE" up -d --build "$SERVICE"'
|
||||
: 'docker-compose -f "$COMPOSE_FILE" up -d --build "$SERVICE"';
|
||||
const patchLines = config.patchCommits.length
|
||||
? [`git cherry-pick --keep-redundant-commits ${config.patchCommits.map(shellQuote).join(' ')}`]
|
||||
: [];
|
||||
|
||||
return [
|
||||
"set -eu",
|
||||
'export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:$PATH"',
|
||||
`REPO_DIR=${shellQuote(config.repoDir)}`,
|
||||
`COMPOSE_FILE=${shellQuote(config.composeFile)}`,
|
||||
`PROFILE=${shellQuote(config.composeProfile)}`,
|
||||
`SERVICE=${shellQuote(config.composeService)}`,
|
||||
`REMOTE=${shellQuote(config.gitRemote)}`,
|
||||
`TARGET_TAG=${shellQuote(targetTag)}`,
|
||||
'cd "$REPO_DIR"',
|
||||
'git config --global --add safe.directory "$REPO_DIR" >/dev/null 2>&1 || true',
|
||||
'if [ -n "$(git status --porcelain)" ]; then',
|
||||
' echo "[AutoUpdate] Refusing update: git worktree has local changes." >&2',
|
||||
' exit 1',
|
||||
'fi',
|
||||
'git fetch --tags "$REMOTE"',
|
||||
'if ! git rev-parse -q --verify "refs/tags/$TARGET_TAG" >/dev/null 2>&1; then',
|
||||
' echo "[AutoUpdate] Tag $TARGET_TAG not found on remote $REMOTE." >&2',
|
||||
' exit 1',
|
||||
'fi',
|
||||
'backup_branch="autoupdate/pre-${TARGET_TAG#v}-$(date +%Y%m%d-%H%M%S)"',
|
||||
'git branch "$backup_branch" >/dev/null 2>&1 || true',
|
||||
'git checkout -B "autoupdate/${TARGET_TAG#v}" "$TARGET_TAG"',
|
||||
...patchLines,
|
||||
'export COMPOSE_PROFILES="$PROFILE"',
|
||||
composeInvocation,
|
||||
`echo "[AutoUpdate] Successfully switched to ${targetTag} via ${composeCommand}."`,
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
export async function launchAutoUpdate({
|
||||
latest,
|
||||
env = process.env,
|
||||
execFileImpl = execFileAsync,
|
||||
spawnImpl = spawn,
|
||||
}: {
|
||||
latest: string;
|
||||
env?: NodeJS.ProcessEnv;
|
||||
execFileImpl?: ExecFileLike;
|
||||
spawnImpl?: SpawnLike;
|
||||
}): Promise<AutoUpdateLaunchResult> {
|
||||
const config = getAutoUpdateConfig(env);
|
||||
const validation = await validateAutoUpdateRuntime(config, execFileImpl);
|
||||
|
||||
if (!validation.supported) {
|
||||
return {
|
||||
started: false,
|
||||
channel: config.mode,
|
||||
logPath: config.logPath,
|
||||
composeCommand: validation.composeCommand,
|
||||
error: validation.reason || "Auto-update runtime is not available.",
|
||||
};
|
||||
}
|
||||
|
||||
const script =
|
||||
config.mode === "docker-compose"
|
||||
? buildDockerComposeUpdateScript({
|
||||
latest,
|
||||
config,
|
||||
composeCommand: validation.composeCommand || "docker-compose",
|
||||
})
|
||||
: buildNpmUpdateScript(latest);
|
||||
|
||||
mkdirSync(path.dirname(config.logPath), { recursive: true });
|
||||
const logFd = openSync(config.logPath, "a");
|
||||
const child = spawnImpl("sh", ["-lc", script], {
|
||||
detached: true,
|
||||
stdio: ["ignore", logFd, logFd],
|
||||
env: { ...process.env, ...env },
|
||||
});
|
||||
closeSync(logFd);
|
||||
child.unref();
|
||||
|
||||
return {
|
||||
started: true,
|
||||
channel: config.mode,
|
||||
logPath: config.logPath,
|
||||
composeCommand: validation.composeCommand,
|
||||
};
|
||||
}
|
||||
+1
-1
@@ -9,7 +9,7 @@ import { isModelSyncInternalRequest } from "./shared/services/modelSyncScheduler
|
||||
|
||||
const SECRET = new TextEncoder().encode(process.env.JWT_SECRET || "");
|
||||
|
||||
export async function proxy(request) {
|
||||
export async function proxy(request: any) {
|
||||
const { pathname } = request.nextUrl;
|
||||
|
||||
// Pipeline: Add request ID header for end-to-end tracing
|
||||
|
||||
@@ -327,8 +327,15 @@ const getExpectedParentPaths = (): string[] => {
|
||||
|
||||
const npmPrefix = getNpmGlobalPrefix();
|
||||
|
||||
// Add common user bin directories
|
||||
const userBinPaths = [
|
||||
path.join(home, "bin"),
|
||||
path.join(home, ".local", "bin"),
|
||||
];
|
||||
|
||||
return [
|
||||
home,
|
||||
...userBinPaths,
|
||||
userProfile,
|
||||
validatedAppData,
|
||||
validatedLocalAppData,
|
||||
@@ -374,7 +381,10 @@ const getKnownToolPaths = (toolId: string): string[] => {
|
||||
["claude.exe", "claude"],
|
||||
],
|
||||
codex: [["codex.cmd", "codex"]],
|
||||
droid: [["droid.cmd", "droid"]],
|
||||
droid: [
|
||||
["droid.cmd", "droid"],
|
||||
["droid.exe", "droid"],
|
||||
],
|
||||
openclaw: [["openclaw.cmd", "openclaw"]],
|
||||
cursor: [
|
||||
["agent.cmd", "agent"],
|
||||
@@ -404,6 +414,10 @@ const getKnownToolPaths = (toolId: string): string[] => {
|
||||
}
|
||||
}
|
||||
|
||||
if (toolId === "droid") {
|
||||
paths.push(path.join(home, "bin", "droid.exe"));
|
||||
}
|
||||
|
||||
for (const [winName] of bins) {
|
||||
if (npmPrefix) paths.push(path.join(npmPrefix, winName));
|
||||
if (appData) {
|
||||
|
||||
@@ -89,6 +89,10 @@ export async function verifyAuth(request: any): Promise<string | null> {
|
||||
* need to conditionally skip auth should check that separately.
|
||||
*/
|
||||
export async function isAuthenticated(request: Request): Promise<boolean> {
|
||||
// If settings say login/auth is disabled, treat all requests as authenticated
|
||||
if (!(await isAuthRequired())) {
|
||||
return true;
|
||||
}
|
||||
// 1. Check API key (for external clients)
|
||||
const authHeader = request.headers.get("authorization");
|
||||
if (authHeader?.startsWith("Bearer ")) {
|
||||
|
||||
@@ -1313,3 +1313,11 @@ export const v1SearchResponseSchema = z.object({
|
||||
)
|
||||
.optional(),
|
||||
});
|
||||
|
||||
// ─── Auto-disable banned/error accounts ───────────────────────────────────
|
||||
export const updateAutoDisableAccountsSchema = z
|
||||
.object({
|
||||
enabled: z.boolean(),
|
||||
threshold: z.number().int().min(1).max(10).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
@@ -47,6 +47,8 @@ export const updateSettingsSchema = z.object({
|
||||
cliCompatProviders: z.array(z.string().max(100)).optional(),
|
||||
// Strip provider/model prefix at proxy layer (e.g. "openai/gpt-4" → "gpt-4")
|
||||
stripModelPrefix: z.boolean().optional(),
|
||||
// Cache control preservation mode
|
||||
alwaysPreserveClientCache: z.enum(["auto", "always", "never"]).optional(),
|
||||
// Custom CLI agent definitions for ACP
|
||||
customAgents: z
|
||||
.array(
|
||||
|
||||
+80
-17
@@ -144,8 +144,8 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
}
|
||||
|
||||
// Optional strict API key mode for /v1 endpoints (require key on every request).
|
||||
const isInternalTest = request.headers?.get?.("x-internal-test") === "combo-health-check";
|
||||
if (process.env.REQUIRE_API_KEY === "true" && !isInternalTest) {
|
||||
const isComboLiveTest = request.headers?.get?.("x-internal-test") === "combo-health-check";
|
||||
if (process.env.REQUIRE_API_KEY === "true" && !isComboLiveTest) {
|
||||
if (!apiKey) {
|
||||
log.warn("AUTH", "Missing API key while REQUIRE_API_KEY=true");
|
||||
return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Missing API key");
|
||||
@@ -155,7 +155,7 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
log.warn("AUTH", "Invalid API key while REQUIRE_API_KEY=true");
|
||||
return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Invalid API key");
|
||||
}
|
||||
} else if (apiKey && !isInternalTest) {
|
||||
} else if (apiKey && !isComboLiveTest) {
|
||||
// Client sent a Bearer key — it must exist in DB (otherwise reject to avoid "key ignored" confusion).
|
||||
const valid = await isValidApiKey(apiKey);
|
||||
if (!valid) {
|
||||
@@ -238,9 +238,11 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
`Combo "${modelStr}" [${combo.strategy || "priority"}] with ${combo.models.length} models`
|
||||
);
|
||||
|
||||
// Pre-check function: skip models where all accounts are in cooldown
|
||||
// Uses modelAvailability module for TTL-based cooldowns
|
||||
// Pre-check function used by combo routing. For explicit combo live tests,
|
||||
// avoid pre-skipping so each model gets a real execution attempt.
|
||||
const checkModelAvailable = async (modelString: string) => {
|
||||
if (isComboLiveTest) return true;
|
||||
|
||||
// Use getModelInfo to properly resolve custom prefixes
|
||||
const modelInfo = await getModelInfo(modelString);
|
||||
const provider = modelInfo.provider;
|
||||
@@ -273,9 +275,21 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
body,
|
||||
combo,
|
||||
handleSingleModel: (b: any, m: string) =>
|
||||
handleSingleModelChat(b, m, clientRawRequest, request, combo.name, apiKeyInfo, telemetry, {
|
||||
sessionId,
|
||||
}),
|
||||
handleSingleModelChat(
|
||||
b,
|
||||
m,
|
||||
clientRawRequest,
|
||||
request,
|
||||
combo.name,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{
|
||||
sessionId,
|
||||
forceLiveComboTest: isComboLiveTest,
|
||||
},
|
||||
combo.strategy,
|
||||
true
|
||||
),
|
||||
isModelAvailable: checkModelAvailable,
|
||||
log,
|
||||
settings,
|
||||
@@ -304,7 +318,9 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
combo.name,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{ sessionId, emergencyFallbackTried: true }
|
||||
{ sessionId, emergencyFallbackTried: true, forceLiveComboTest: isComboLiveTest },
|
||||
combo.strategy,
|
||||
true
|
||||
);
|
||||
if (fallbackResponse.ok) {
|
||||
log.info("GLOBAL_FALLBACK", `Global fallback ${fallbackModel} succeeded`);
|
||||
@@ -336,7 +352,9 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
null,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{ sessionId }
|
||||
{ sessionId, forceLiveComboTest: isComboLiveTest },
|
||||
null,
|
||||
false
|
||||
);
|
||||
recordTelemetry(telemetry);
|
||||
return withSessionHeader(response, sessionId);
|
||||
@@ -366,16 +384,26 @@ async function handleSingleModelChat(
|
||||
comboName: string | null = null,
|
||||
apiKeyInfo: any = null,
|
||||
telemetry: any = null,
|
||||
runtimeOptions: { emergencyFallbackTried?: boolean; sessionId?: string | null } = {}
|
||||
runtimeOptions: {
|
||||
emergencyFallbackTried?: boolean;
|
||||
forceLiveComboTest?: boolean;
|
||||
sessionId?: string | null;
|
||||
} = {},
|
||||
comboStrategy: string | null = null,
|
||||
isCombo: boolean = false
|
||||
) {
|
||||
// 1. Resolve model → provider/model
|
||||
const resolved = await resolveModelOrError(modelStr, body, clientRawRequest?.endpoint);
|
||||
if (resolved.error) return resolved.error;
|
||||
|
||||
const { provider, model, sourceFormat, targetFormat, extendedContext } = resolved;
|
||||
const forceLiveComboTest = runtimeOptions.forceLiveComboTest === true;
|
||||
|
||||
// 2. Pipeline gates (availability + circuit breaker)
|
||||
const gate = checkPipelineGates(provider, model);
|
||||
const gate = checkPipelineGates(provider, model, {
|
||||
ignoreCircuitBreaker: forceLiveComboTest,
|
||||
ignoreModelCooldown: forceLiveComboTest,
|
||||
});
|
||||
if (gate) return gate;
|
||||
|
||||
const breaker = getCircuitBreaker(provider, {
|
||||
@@ -397,7 +425,13 @@ async function handleSingleModelChat(
|
||||
provider,
|
||||
excludeConnectionId,
|
||||
apiKeyInfo?.allowedConnections ?? null,
|
||||
model
|
||||
model,
|
||||
forceLiveComboTest
|
||||
? {
|
||||
allowSuppressedConnections: true,
|
||||
bypassQuotaPolicy: true,
|
||||
}
|
||||
: undefined
|
||||
);
|
||||
|
||||
if (!credentials || credentials.allRateLimited) {
|
||||
@@ -431,6 +465,7 @@ async function handleSingleModelChat(
|
||||
// 4. Execute chat via core (with circuit breaker + optional TLS)
|
||||
if (telemetry) telemetry.startPhase("connect");
|
||||
const { result, tlsFingerprintUsed } = await executeChatWithBreaker({
|
||||
bypassCircuitBreaker: forceLiveComboTest,
|
||||
breaker,
|
||||
body,
|
||||
provider,
|
||||
@@ -443,6 +478,8 @@ async function handleSingleModelChat(
|
||||
apiKeyInfo,
|
||||
userAgent,
|
||||
comboName,
|
||||
comboStrategy,
|
||||
isCombo,
|
||||
extendedContext,
|
||||
});
|
||||
if (telemetry) telemetry.endPhase();
|
||||
@@ -512,7 +549,9 @@ async function handleSingleModelChat(
|
||||
comboName,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{ ...runtimeOptions, emergencyFallbackTried: true }
|
||||
{ ...runtimeOptions, emergencyFallbackTried: true },
|
||||
null, // no strategy for emergency fallback
|
||||
Boolean(comboName) // isCombo if comboName exists
|
||||
);
|
||||
|
||||
if (fallbackResponse.ok) {
|
||||
@@ -602,8 +641,15 @@ async function resolveModelOrError(modelStr: string, body: any, endpointPath: st
|
||||
* Check pipeline gates: model availability + circuit breaker state.
|
||||
* Returns an error Response if blocked, or null if OK to proceed.
|
||||
*/
|
||||
function checkPipelineGates(provider: string, model: string) {
|
||||
if (!isModelAvailable(provider, model)) {
|
||||
function checkPipelineGates(
|
||||
provider: string,
|
||||
model: string,
|
||||
options: { ignoreCircuitBreaker?: boolean; ignoreModelCooldown?: boolean } = {}
|
||||
) {
|
||||
const modelAvailable = isModelAvailable(provider, model);
|
||||
if (!modelAvailable && options.ignoreModelCooldown) {
|
||||
log.info("AVAILABILITY", `${provider}/${model} cooldown bypassed for combo live test`);
|
||||
} else if (!modelAvailable) {
|
||||
log.warn("AVAILABILITY", `${provider}/${model} is in cooldown, rejecting request`);
|
||||
return (unavailableResponse as any)(
|
||||
HTTP_STATUS.SERVICE_UNAVAILABLE,
|
||||
@@ -618,7 +664,9 @@ function checkPipelineGates(provider: string, model: string) {
|
||||
onStateChange: (name: string, from: string, to: string) =>
|
||||
log.info("CIRCUIT", `${name}: ${from} → ${to}`),
|
||||
});
|
||||
if (!breaker.canExecute()) {
|
||||
if (options.ignoreCircuitBreaker && !breaker.canExecute()) {
|
||||
log.info("CIRCUIT", `Bypassing OPEN circuit breaker for combo live test: ${provider}`);
|
||||
} else if (!breaker.canExecute()) {
|
||||
log.warn("CIRCUIT", `Circuit breaker OPEN for ${provider}, rejecting request`);
|
||||
return (unavailableResponse as any)(
|
||||
HTTP_STATUS.SERVICE_UNAVAILABLE,
|
||||
@@ -636,6 +684,7 @@ function checkPipelineGates(provider: string, model: string) {
|
||||
* Execute chat core wrapped in circuit breaker + optional TLS tracking.
|
||||
*/
|
||||
async function executeChatWithBreaker({
|
||||
bypassCircuitBreaker,
|
||||
breaker,
|
||||
body,
|
||||
provider,
|
||||
@@ -648,6 +697,8 @@ async function executeChatWithBreaker({
|
||||
apiKeyInfo,
|
||||
userAgent,
|
||||
comboName,
|
||||
comboStrategy,
|
||||
isCombo,
|
||||
extendedContext,
|
||||
}: any): Promise<{ result: any; tlsFingerprintUsed: boolean }> {
|
||||
let tlsFingerprintUsed = false;
|
||||
@@ -665,6 +716,8 @@ async function executeChatWithBreaker({
|
||||
apiKeyInfo,
|
||||
userAgent,
|
||||
comboName,
|
||||
comboStrategy,
|
||||
isCombo,
|
||||
onCredentialsRefreshed: async (newCreds: any) => {
|
||||
await updateProviderCredentials(credentials.connectionId, {
|
||||
accessToken: newCreds.accessToken,
|
||||
@@ -679,6 +732,16 @@ async function executeChatWithBreaker({
|
||||
})
|
||||
);
|
||||
|
||||
if (bypassCircuitBreaker) {
|
||||
if (!proxyInfo?.proxy && isTlsFingerprintActive()) {
|
||||
const tracked = await runWithTlsTracking(chatFn);
|
||||
return { result: tracked.result, tlsFingerprintUsed: tracked.tlsFingerprintUsed };
|
||||
}
|
||||
|
||||
const result = await chatFn();
|
||||
return { result, tlsFingerprintUsed: false };
|
||||
}
|
||||
|
||||
if (!proxyInfo?.proxy && isTlsFingerprintActive()) {
|
||||
const tracked = await breaker.execute(async () => runWithTlsTracking(chatFn));
|
||||
return { result: tracked.result, tlsFingerprintUsed: tracked.tlsFingerprintUsed };
|
||||
|
||||
+66
-17
@@ -3,6 +3,7 @@ import {
|
||||
validateApiKey,
|
||||
updateProviderConnection,
|
||||
getSettings,
|
||||
getCachedSettings,
|
||||
} from "@/lib/localDb";
|
||||
import { getQuotaWindowStatus, isAccountQuotaExhausted } from "@/domain/quotaCache";
|
||||
import {
|
||||
@@ -54,6 +55,11 @@ interface RecoverableConnectionState {
|
||||
lastErrorSource?: string | null;
|
||||
}
|
||||
|
||||
interface CredentialSelectionOptions {
|
||||
allowSuppressedConnections?: boolean;
|
||||
bypassQuotaPolicy?: boolean;
|
||||
}
|
||||
|
||||
const CODEX_QUOTA_THRESHOLD_PERCENT = 90;
|
||||
const MIN_QUOTA_THRESHOLD_PERCENT = 1;
|
||||
const MAX_QUOTA_THRESHOLD_PERCENT = 100;
|
||||
@@ -311,7 +317,8 @@ export async function getProviderCredentials(
|
||||
provider: string,
|
||||
excludeConnectionId: string | null = null,
|
||||
allowedConnections: string[] | null = null,
|
||||
requestedModel: string | null = null
|
||||
requestedModel: string | null = null,
|
||||
options: CredentialSelectionOptions = {}
|
||||
) {
|
||||
// Acquire mutex to prevent race conditions
|
||||
const currentMutex = selectionMutex;
|
||||
@@ -323,6 +330,9 @@ export async function getProviderCredentials(
|
||||
try {
|
||||
await currentMutex;
|
||||
|
||||
const allowSuppressedConnections = options.allowSuppressedConnections === true;
|
||||
const bypassQuotaPolicy = options.bypassQuotaPolicy === true;
|
||||
|
||||
const connectionsRaw = await getProviderConnections({ provider, isActive: true });
|
||||
let connections = (Array.isArray(connectionsRaw) ? connectionsRaw : [])
|
||||
.map(toProviderConnection)
|
||||
@@ -394,9 +404,11 @@ export async function getProviderCredentials(
|
||||
// Filter out unavailable accounts and excluded connection
|
||||
const availableConnections = connections.filter((c) => {
|
||||
if (excludeConnectionId && c.id === excludeConnectionId) return false;
|
||||
if (isAccountUnavailable(c.rateLimitedUntil)) return false;
|
||||
if (isTerminalConnectionStatus(c)) return false;
|
||||
if (provider === "codex" && isCodexScopeUnavailable(c, requestedModel)) return false;
|
||||
if (!allowSuppressedConnections) {
|
||||
if (isAccountUnavailable(c.rateLimitedUntil)) return false;
|
||||
if (isTerminalConnectionStatus(c)) return false;
|
||||
if (provider === "codex" && isCodexScopeUnavailable(c, requestedModel)) return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -412,13 +424,23 @@ export async function getProviderCredentials(
|
||||
if (excluded || rateLimited) {
|
||||
log.debug(
|
||||
"AUTH",
|
||||
` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}`
|
||||
` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}${allowSuppressedConnections && rateLimited ? " (retained for combo live test)" : ""}`
|
||||
);
|
||||
} else if (terminalStatus) {
|
||||
log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | skipped terminal status=${c.testStatus}`);
|
||||
log.debug(
|
||||
"AUTH",
|
||||
allowSuppressedConnections
|
||||
? ` → ${c.id?.slice(0, 8)} | retained terminal status=${c.testStatus} for combo live test`
|
||||
: ` → ${c.id?.slice(0, 8)} | skipped terminal status=${c.testStatus}`
|
||||
);
|
||||
} else if (codexScopeLimited) {
|
||||
const scopeUntil = getCodexScopeRateLimitedUntil(c.providerSpecificData, requestedModel);
|
||||
log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | codex scope-limited until ${scopeUntil}`);
|
||||
log.debug(
|
||||
"AUTH",
|
||||
allowSuppressedConnections
|
||||
? ` → ${c.id?.slice(0, 8)} | retained codex scope-limited account until ${scopeUntil} for combo live test`
|
||||
: ` → ${c.id?.slice(0, 8)} | codex scope-limited until ${scopeUntil}`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -461,17 +483,21 @@ export async function getProviderCredentials(
|
||||
resetAt: string | null;
|
||||
}> = [];
|
||||
|
||||
policyEligibleConnections = availableConnections.filter((connection) => {
|
||||
const evaluation = evaluateQuotaLimitPolicy(provider, connection);
|
||||
if (!evaluation.blocked) return true;
|
||||
if (!bypassQuotaPolicy) {
|
||||
policyEligibleConnections = availableConnections.filter((connection) => {
|
||||
const evaluation = evaluateQuotaLimitPolicy(provider, connection);
|
||||
if (!evaluation.blocked) return true;
|
||||
|
||||
blockedByPolicy.push({
|
||||
id: connection.id,
|
||||
reasons: evaluation.reasons,
|
||||
resetAt: evaluation.resetAt,
|
||||
blockedByPolicy.push({
|
||||
id: connection.id,
|
||||
reasons: evaluation.reasons,
|
||||
resetAt: evaluation.resetAt,
|
||||
});
|
||||
return false;
|
||||
});
|
||||
return false;
|
||||
});
|
||||
} else if (availableConnections.length > 0) {
|
||||
log.debug("AUTH", `${provider} | bypassing quota policy for combo live test`);
|
||||
}
|
||||
|
||||
if (blockedByPolicy.length > 0) {
|
||||
log.info(
|
||||
@@ -748,13 +774,14 @@ export async function markAccountUnavailable(
|
||||
}
|
||||
}
|
||||
|
||||
const { shouldFallback, cooldownMs, newBackoffLevel, reason } = checkFallbackError(
|
||||
const result = checkFallbackError(
|
||||
status,
|
||||
errorText,
|
||||
backoffLevel,
|
||||
model,
|
||||
provider // ← Now passes provider for profile-aware cooldowns
|
||||
);
|
||||
const { shouldFallback, cooldownMs, newBackoffLevel, reason } = result;
|
||||
if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 };
|
||||
|
||||
// ── Local provider 404: model-only lockout, connection stays active ──
|
||||
@@ -820,6 +847,28 @@ export async function markAccountUnavailable(
|
||||
backoffLevel: newBackoffLevel ?? backoffLevel,
|
||||
});
|
||||
|
||||
// T-AUTODISABLE: If auto-disable setting is enabled and error is permanent/terminal,
|
||||
// mark account as inactive so it is never retried again.
|
||||
// Uses getCachedSettings() to avoid DB overhead on hot error path.
|
||||
// NOTE: For permanent bans we disable immediately — no threshold needed,
|
||||
// because a permanent ban (403 "Verify your account" / ToS violation) will
|
||||
// NEVER recover, so retrying is pointless regardless of attempt count.
|
||||
if (result.permanent) {
|
||||
try {
|
||||
const settings = await getCachedSettings();
|
||||
const autoDisableEnabled = settings.autoDisableBannedAccounts ?? false;
|
||||
if (autoDisableEnabled) {
|
||||
await updateProviderConnection(connectionId, { isActive: false });
|
||||
log.info(
|
||||
"AUTH",
|
||||
`Auto-disabled ${connectionId.slice(0, 8)} — permanent ban detected (autoDisableBannedAccounts=true)`
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
log.info("AUTH", `Auto-disable check failed (non-fatal): ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Per-model lockout: lock the specific model if known
|
||||
if (provider && model && cooldownMs > 0) {
|
||||
lockModel(provider, connectionId, model, reason || "unknown", cooldownMs);
|
||||
|
||||
+207
@@ -0,0 +1,207 @@
|
||||
[CREDENTIALS] No external credentials file found, using defaults.
|
||||
[DB] SQLite database ready: /home/diegosouzapw/.omniroute/storage.sqlite
|
||||
[MODEL] Ambiguous model 'claude-haiku-4.5'. Use provider/model prefix (ex: gh/claude-haiku-4.5 or kr/claude-haiku-4.5). Candidates: gh, kr, anthropic
|
||||
TAP version 13
|
||||
# Subtest: getModelInfoCore resolves unique non-openai unprefixed model
|
||||
ok 1 - getModelInfoCore resolves unique non-openai unprefixed model
|
||||
---
|
||||
duration_ms: 3.403766
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore keeps openai fallback for gpt-4o
|
||||
ok 2 - getModelInfoCore keeps openai fallback for gpt-4o
|
||||
---
|
||||
duration_ms: 0.535726
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore resolves gpt-5.4 to codex
|
||||
ok 3 - getModelInfoCore resolves gpt-5.4 to codex
|
||||
---
|
||||
duration_ms: 0.321781
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore returns explicit ambiguity metadata for ambiguous unprefixed model
|
||||
ok 4 - getModelInfoCore returns explicit ambiguity metadata for ambiguous unprefixed model
|
||||
---
|
||||
duration_ms: 1.079896
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore canonicalizes github legacy alias with explicit provider prefix
|
||||
ok 5 - getModelInfoCore canonicalizes github legacy alias with explicit provider prefix
|
||||
---
|
||||
duration_ms: 0.370547
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: GithubExecutor routes codex-family model to /responses
|
||||
ok 6 - GithubExecutor routes codex-family model to /responses
|
||||
---
|
||||
duration_ms: 0.47113
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: GithubExecutor keeps non-codex model on /chat/completions
|
||||
ok 7 - GithubExecutor keeps non-codex model on /chat/completions
|
||||
---
|
||||
duration_ms: 0.38457
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: DefaultExecutor uses x-api-key for kimi-coding-apikey
|
||||
ok 8 - DefaultExecutor uses x-api-key for kimi-coding-apikey
|
||||
---
|
||||
duration_ms: 0.451443
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor forces stream=true for upstream compatibility
|
||||
ok 9 - CodexExecutor forces stream=true for upstream compatibility
|
||||
---
|
||||
duration_ms: 1.203259
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: Claude native messages can be round-tripped through OpenAI into Claude OAuth format
|
||||
ok 10 - Claude native messages can be round-tripped through OpenAI into Claude OAuth format
|
||||
---
|
||||
duration_ms: 7.232512
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor maps fast service tier to priority
|
||||
ok 11 - CodexExecutor maps fast service tier to priority
|
||||
---
|
||||
duration_ms: 0.489993
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: shouldUseNativeCodexPassthrough only enables responses-native Codex requests
|
||||
ok 12 - shouldUseNativeCodexPassthrough only enables responses-native Codex requests
|
||||
---
|
||||
duration_ms: 0.441911
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor can force fast service tier from settings
|
||||
ok 13 - CodexExecutor can force fast service tier from settings
|
||||
---
|
||||
duration_ms: 0.299575
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor always requests SSE accept header
|
||||
ok 14 - CodexExecutor always requests SSE accept header
|
||||
---
|
||||
duration_ms: 0.602914
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor does not request SSE accept header for compact requests
|
||||
ok 15 - CodexExecutor does not request SSE accept header for compact requests
|
||||
---
|
||||
duration_ms: 0.322611
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor preserves native responses payloads for Codex passthrough
|
||||
not ok 16 - CodexExecutor preserves native responses payloads for Codex passthrough
|
||||
---
|
||||
duration_ms: 1.856261
|
||||
type: 'test'
|
||||
location: '/home/diegosouzapw/dev/proxys/9router/tests/unit/plan3-p0.test.mjs:221:1'
|
||||
failureType: 'testCodeFailure'
|
||||
error: |-
|
||||
Expected values to be strictly equal:
|
||||
|
||||
false !== true
|
||||
|
||||
code: 'ERR_ASSERTION'
|
||||
name: 'AssertionError'
|
||||
expected: true
|
||||
actual: false
|
||||
operator: 'strictEqual'
|
||||
stack: |-
|
||||
TestContext.<anonymous> (file:///home/diegosouzapw/dev/proxys/9router/tests/unit/plan3-p0.test.mjs:242:10)
|
||||
Test.runInAsyncScope (node:async_hooks:214:14)
|
||||
Test.run (node:internal/test_runner/test:1047:25)
|
||||
Test.processPendingSubtests (node:internal/test_runner/test:744:18)
|
||||
Test.postRun (node:internal/test_runner/test:1173:19)
|
||||
Test.run (node:internal/test_runner/test:1101:12)
|
||||
async Test.processPendingSubtests (node:internal/test_runner/test:744:7)
|
||||
...
|
||||
# Subtest: CodexExecutor strips streaming fields for compact passthrough
|
||||
ok 17 - CodexExecutor strips streaming fields for compact passthrough
|
||||
---
|
||||
duration_ms: 0.296176
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor routes responses subpaths to matching upstream paths
|
||||
ok 18 - CodexExecutor routes responses subpaths to matching upstream paths
|
||||
---
|
||||
duration_ms: 0.546657
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: translateNonStreamingResponse converts Responses API payload to OpenAI chat.completion
|
||||
ok 19 - translateNonStreamingResponse converts Responses API payload to OpenAI chat.completion
|
||||
---
|
||||
duration_ms: 1.483788
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: extractUsageFromResponse reads usage from Responses API payload
|
||||
ok 20 - extractUsageFromResponse reads usage from Responses API payload
|
||||
---
|
||||
duration_ms: 0.398039
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormat identifies OpenAI Responses when input is string
|
||||
ok 21 - detectFormat identifies OpenAI Responses when input is string
|
||||
---
|
||||
duration_ms: 0.359174
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormat identifies OpenAI Responses by max_output_tokens without input array
|
||||
ok 22 - detectFormat identifies OpenAI Responses by max_output_tokens without input array
|
||||
---
|
||||
duration_ms: 0.271215
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormatFromEndpoint forces OpenAI for /v1/chat/completions
|
||||
ok 23 - detectFormatFromEndpoint forces OpenAI for /v1/chat/completions
|
||||
---
|
||||
duration_ms: 0.52054
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormatFromEndpoint forces Claude for /v1/messages
|
||||
ok 24 - detectFormatFromEndpoint forces Claude for /v1/messages
|
||||
---
|
||||
duration_ms: 0.433035
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: translateRequest normalizes openai-responses input string into list payload
|
||||
ok 25 - translateRequest normalizes openai-responses input string into list payload
|
||||
---
|
||||
duration_ms: 0.358109
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: translateRequest preserves service_tier when converting openai to openai-responses
|
||||
ok 26 - translateRequest preserves service_tier when converting openai to openai-responses
|
||||
---
|
||||
duration_ms: 1.10454
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: parseSSEToResponsesOutput parses completed response from SSE payload
|
||||
ok 27 - parseSSEToResponsesOutput parses completed response from SSE payload
|
||||
---
|
||||
duration_ms: 0.575476
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: parseSSEToResponsesOutput returns null for invalid payload
|
||||
ok 28 - parseSSEToResponsesOutput returns null for invalid payload
|
||||
---
|
||||
duration_ms: 0.302714
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: parseSSEToOpenAIResponse merges split tool call chunks by id without duplication
|
||||
ok 29 - parseSSEToOpenAIResponse merges split tool call chunks by id without duplication
|
||||
---
|
||||
duration_ms: 0.916032
|
||||
type: 'test'
|
||||
...
|
||||
1..29
|
||||
# tests 29
|
||||
# suites 0
|
||||
# pass 28
|
||||
# fail 1
|
||||
# cancelled 0
|
||||
# skipped 0
|
||||
# todo 0
|
||||
# duration_ms 65.394285
|
||||
@@ -120,7 +120,11 @@ test("isAuthenticated accepts bearer API keys", async () => {
|
||||
assert.equal(result, true);
|
||||
});
|
||||
|
||||
test("isAuthenticated returns false without valid credentials", async () => {
|
||||
test("isAuthenticated returns false when auth is required without valid credentials", async () => {
|
||||
// Force requireLogin to be active
|
||||
process.env.INITIAL_PASSWORD = "bootstrap-password";
|
||||
await localDb.updateSettings({ requireLogin: true, password: "" });
|
||||
|
||||
const request = new Request("https://example.com/api/providers");
|
||||
|
||||
const result = await apiAuth.isAuthenticated(request);
|
||||
|
||||
@@ -62,6 +62,27 @@ test("getProviderCredentials returns null when all active connections are termin
|
||||
assert.equal(selected, null);
|
||||
});
|
||||
|
||||
test("getProviderCredentials can reuse a locally suppressed connection for combo live tests", async () => {
|
||||
await resetStorage();
|
||||
|
||||
const conn = await providersDb.createProviderConnection({
|
||||
provider: "openai",
|
||||
authType: "apikey",
|
||||
apiKey: "sk-live-test",
|
||||
isActive: true,
|
||||
testStatus: "credits_exhausted",
|
||||
rateLimitedUntil: new Date(Date.now() + 60_000).toISOString(),
|
||||
});
|
||||
|
||||
const selected = await auth.getProviderCredentials("openai", null, null, null, {
|
||||
allowSuppressedConnections: true,
|
||||
bypassQuotaPolicy: true,
|
||||
});
|
||||
|
||||
assert.ok(selected);
|
||||
assert.equal(selected.connectionId, conn.id);
|
||||
});
|
||||
|
||||
test("markAccountUnavailable does not overwrite terminal status", async () => {
|
||||
await resetStorage();
|
||||
|
||||
|
||||
@@ -0,0 +1,104 @@
|
||||
import { describe, it } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
const autoUpdate = await import("../../src/lib/system/autoUpdate.ts");
|
||||
|
||||
describe("getAutoUpdateConfig", () => {
|
||||
it("defaults to npm mode", () => {
|
||||
const config = autoUpdate.getAutoUpdateConfig({ DATA_DIR: "/tmp/omniroute" });
|
||||
assert.equal(config.mode, "npm");
|
||||
assert.equal(config.repoDir, "/workspace/omniroute");
|
||||
assert.equal(config.composeProfile, "cli");
|
||||
});
|
||||
|
||||
it("reads docker-compose settings from env", () => {
|
||||
const config = autoUpdate.getAutoUpdateConfig({
|
||||
DATA_DIR: "/tmp/custom-data",
|
||||
AUTO_UPDATE_MODE: "docker-compose",
|
||||
AUTO_UPDATE_REPO_DIR: "/srv/omniroute",
|
||||
AUTO_UPDATE_COMPOSE_FILE: "/srv/omniroute/docker-compose.yml",
|
||||
AUTO_UPDATE_COMPOSE_PROFILE: "base",
|
||||
AUTO_UPDATE_SERVICE: "omniroute-base",
|
||||
AUTO_UPDATE_GIT_REMOTE: "upstream",
|
||||
AUTO_UPDATE_PATCH_COMMITS: "abc123 def456,ghi789",
|
||||
AUTO_UPDATE_LOG_PATH: "/tmp/update.log",
|
||||
});
|
||||
|
||||
assert.equal(config.mode, "docker-compose");
|
||||
assert.equal(config.repoDir, "/srv/omniroute");
|
||||
assert.equal(config.composeFile, "/srv/omniroute/docker-compose.yml");
|
||||
assert.equal(config.composeProfile, "base");
|
||||
assert.equal(config.composeService, "omniroute-base");
|
||||
assert.equal(config.gitRemote, "upstream");
|
||||
assert.deepEqual(config.patchCommits, ["abc123", "def456", "ghi789"]);
|
||||
assert.equal(config.logPath, "/tmp/update.log");
|
||||
});
|
||||
});
|
||||
|
||||
describe("validateAutoUpdateRuntime", () => {
|
||||
it("reports missing docker socket for docker-compose mode", async () => {
|
||||
const config = autoUpdate.getAutoUpdateConfig({
|
||||
AUTO_UPDATE_MODE: "docker-compose",
|
||||
AUTO_UPDATE_REPO_DIR: "/repo",
|
||||
AUTO_UPDATE_COMPOSE_FILE: "/repo/docker-compose.yml",
|
||||
});
|
||||
|
||||
const result = await autoUpdate.validateAutoUpdateRuntime(
|
||||
config,
|
||||
async () => ({ stdout: "git version 2.0.0", stderr: "" }),
|
||||
async (targetPath) => targetPath !== "/var/run/docker.sock"
|
||||
);
|
||||
|
||||
assert.equal(result.supported, false);
|
||||
assert.match(result.reason, /Docker socket/);
|
||||
});
|
||||
|
||||
it("detects docker-compose command availability", async () => {
|
||||
const config = autoUpdate.getAutoUpdateConfig({
|
||||
AUTO_UPDATE_MODE: "docker-compose",
|
||||
AUTO_UPDATE_REPO_DIR: "/repo",
|
||||
AUTO_UPDATE_COMPOSE_FILE: "/repo/docker-compose.yml",
|
||||
});
|
||||
|
||||
const result = await autoUpdate.validateAutoUpdateRuntime(
|
||||
config,
|
||||
async (file, args) => {
|
||||
if (file === "git") return { stdout: "git version 2.0.0", stderr: "" };
|
||||
if (file === "docker" && args?.[0] === "compose") {
|
||||
return { stdout: "Docker Compose version v2.0.0", stderr: "" };
|
||||
}
|
||||
throw new Error(`unexpected command: ${file}`);
|
||||
},
|
||||
async () => true
|
||||
);
|
||||
|
||||
assert.equal(result.supported, true);
|
||||
assert.equal(result.composeCommand, "docker compose");
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildDockerComposeUpdateScript", () => {
|
||||
it("includes git checkout and compose rebuild steps", () => {
|
||||
const config = autoUpdate.getAutoUpdateConfig({
|
||||
AUTO_UPDATE_MODE: "docker-compose",
|
||||
AUTO_UPDATE_REPO_DIR: "/repo",
|
||||
AUTO_UPDATE_COMPOSE_FILE: "/repo/docker-compose.yml",
|
||||
AUTO_UPDATE_COMPOSE_PROFILE: "cli",
|
||||
AUTO_UPDATE_SERVICE: "omniroute-cli",
|
||||
AUTO_UPDATE_GIT_REMOTE: "origin",
|
||||
AUTO_UPDATE_PATCH_COMMITS: "1501a87 e569e1c",
|
||||
});
|
||||
|
||||
const script = autoUpdate.buildDockerComposeUpdateScript({
|
||||
latest: "3.2.6",
|
||||
config,
|
||||
composeCommand: "docker compose",
|
||||
});
|
||||
|
||||
assert.match(script, /git fetch --tags/);
|
||||
assert.match(script, /git config --global --add safe\.directory/);
|
||||
assert.match(script, /git checkout -B "autoupdate\/\$\{TARGET_TAG#v\}" "\$TARGET_TAG"/);
|
||||
assert.match(script, /git cherry-pick --keep-redundant-commits '1501a87' 'e569e1c'/);
|
||||
assert.match(script, /docker compose -f "\$COMPOSE_FILE" up -d --build "\$SERVICE"/);
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,598 @@
|
||||
import { describe, test } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import {
|
||||
isClaudeCodeClient,
|
||||
providerSupportsCaching,
|
||||
isDeterministicStrategy,
|
||||
shouldPreserveCacheControl,
|
||||
trackCacheMetrics,
|
||||
updateCacheTokenMetrics,
|
||||
} from "../../open-sse/utils/cacheControlPolicy.ts";
|
||||
|
||||
describe("Cache Control Policy", () => {
|
||||
describe("isClaudeCodeClient", () => {
|
||||
test("detects claude-code user agent", () => {
|
||||
assert.equal(isClaudeCodeClient("claude-code/0.1.0"), true);
|
||||
assert.equal(isClaudeCodeClient("claude_code/0.1.0"), true);
|
||||
assert.equal(isClaudeCodeClient("Anthropic CLI/1.0"), true);
|
||||
});
|
||||
|
||||
test("rejects non-Claude clients", () => {
|
||||
assert.equal(isClaudeCodeClient("curl/7.68.0"), false);
|
||||
assert.equal(isClaudeCodeClient("OpenAI/1.0"), false);
|
||||
assert.equal(isClaudeCodeClient(null), false);
|
||||
assert.equal(isClaudeCodeClient(undefined), false);
|
||||
assert.equal(isClaudeCodeClient(""), false);
|
||||
});
|
||||
|
||||
test("is case-insensitive", () => {
|
||||
assert.equal(isClaudeCodeClient("Claude-Code/0.1.0"), true);
|
||||
assert.equal(isClaudeCodeClient("CLAUDE-CODE/0.1.0"), true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("providerSupportsCaching", () => {
|
||||
test("detects caching providers", () => {
|
||||
assert.equal(providerSupportsCaching("claude"), true);
|
||||
assert.equal(providerSupportsCaching("anthropic"), true);
|
||||
assert.equal(providerSupportsCaching("zai"), true);
|
||||
assert.equal(providerSupportsCaching("qwen"), true);
|
||||
});
|
||||
|
||||
test("rejects non-caching providers", () => {
|
||||
assert.equal(providerSupportsCaching("openai"), false);
|
||||
assert.equal(providerSupportsCaching("gemini"), false);
|
||||
assert.equal(providerSupportsCaching("unknown"), false);
|
||||
assert.equal(providerSupportsCaching(null), false);
|
||||
assert.equal(providerSupportsCaching(undefined), false);
|
||||
});
|
||||
|
||||
test("is case-insensitive", () => {
|
||||
assert.equal(providerSupportsCaching("Claude"), true);
|
||||
assert.equal(providerSupportsCaching("ANTHROPIC"), true);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isDeterministicStrategy", () => {
|
||||
test("identifies deterministic strategies", () => {
|
||||
assert.equal(isDeterministicStrategy("priority"), true);
|
||||
assert.equal(isDeterministicStrategy("cost-optimized"), true);
|
||||
});
|
||||
|
||||
test("identifies non-deterministic strategies", () => {
|
||||
assert.equal(isDeterministicStrategy("weighted"), false);
|
||||
assert.equal(isDeterministicStrategy("round-robin"), false);
|
||||
assert.equal(isDeterministicStrategy("random"), false);
|
||||
assert.equal(isDeterministicStrategy("fill-first"), false);
|
||||
assert.equal(isDeterministicStrategy("p2c"), false);
|
||||
assert.equal(isDeterministicStrategy("least-used"), false);
|
||||
assert.equal(isDeterministicStrategy("strict-random"), false);
|
||||
});
|
||||
|
||||
test("handles null/undefined", () => {
|
||||
assert.equal(isDeterministicStrategy(null), false);
|
||||
assert.equal(isDeterministicStrategy(undefined), false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldPreserveCacheControl", () => {
|
||||
test("preserves for single model + Claude client + caching provider", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
true
|
||||
);
|
||||
});
|
||||
|
||||
test("preserves for combo with priority strategy + Claude client + caching provider", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "priority",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
true
|
||||
);
|
||||
});
|
||||
|
||||
test("preserves for combo with cost-optimized strategy + Claude client + caching provider", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "cost-optimized",
|
||||
targetProvider: "anthropic",
|
||||
}),
|
||||
true
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects non-Claude clients", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "curl/7.68.0",
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects non-caching providers", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: false,
|
||||
targetProvider: "openai",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with non-deterministic strategy (weighted)", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "weighted",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with non-deterministic strategy (round-robin)", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "round-robin",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with non-deterministic strategy (random)", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "random",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with fill-first strategy", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "fill-first",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with p2c strategy", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "p2c",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with least-used strategy", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "least-used",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with strict-random strategy", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: "strict-random",
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects combo with null strategy", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: true,
|
||||
comboStrategy: null,
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects when userAgent is null", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: null,
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("rejects when targetProvider is null", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: false,
|
||||
targetProvider: null,
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
describe("settings override", () => {
|
||||
test("alwaysPreserveClientCache=always overrides auto detection", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "curl/7.68.0", // non-Claude client
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
settings: { alwaysPreserveClientCache: "always" },
|
||||
}),
|
||||
true
|
||||
);
|
||||
});
|
||||
|
||||
test("alwaysPreserveClientCache=never overrides auto detection", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0", // Claude client
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
settings: { alwaysPreserveClientCache: "never" },
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("alwaysPreserveClientCache=auto uses automatic detection", () => {
|
||||
// Should preserve for Claude client + caching provider
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
settings: { alwaysPreserveClientCache: "auto" },
|
||||
}),
|
||||
true
|
||||
);
|
||||
|
||||
// Should NOT preserve for non-Claude client
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "curl/7.68.0",
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
settings: { alwaysPreserveClientCache: "auto" },
|
||||
}),
|
||||
false
|
||||
);
|
||||
});
|
||||
|
||||
test("undefined settings uses automatic detection", () => {
|
||||
assert.equal(
|
||||
shouldPreserveCacheControl({
|
||||
userAgent: "claude-code/0.1.0",
|
||||
isCombo: false,
|
||||
targetProvider: "claude",
|
||||
settings: undefined,
|
||||
}),
|
||||
true
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("trackCacheMetrics", () => {
|
||||
test("initializes empty metrics", () => {
|
||||
const result = trackCacheMetrics({
|
||||
preserved: true,
|
||||
provider: "claude",
|
||||
strategy: "priority",
|
||||
metrics: undefined,
|
||||
inputTokens: 1000,
|
||||
cachedTokens: 500,
|
||||
cacheCreationTokens: 200,
|
||||
});
|
||||
|
||||
assert.equal(result.totalRequests, 1);
|
||||
assert.equal(result.requestsWithCacheControl, 1);
|
||||
assert.equal(result.totalInputTokens, 1000);
|
||||
assert.equal(result.totalCachedTokens, 500);
|
||||
assert.equal(result.totalCacheCreationTokens, 200);
|
||||
assert.equal(result.tokensSaved, 500);
|
||||
});
|
||||
|
||||
test("increments total requests without cache control", () => {
|
||||
const metrics = {
|
||||
totalRequests: 10,
|
||||
requestsWithCacheControl: 5,
|
||||
totalInputTokens: 5000,
|
||||
totalCachedTokens: 2000,
|
||||
totalCacheCreationTokens: 1000,
|
||||
tokensSaved: 2000,
|
||||
estimatedCostSaved: 0.5,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const result = trackCacheMetrics({
|
||||
preserved: false,
|
||||
provider: "claude",
|
||||
strategy: null,
|
||||
metrics,
|
||||
inputTokens: 500,
|
||||
cachedTokens: 0,
|
||||
cacheCreationTokens: 0,
|
||||
});
|
||||
|
||||
assert.equal(result.totalRequests, 11);
|
||||
assert.equal(result.requestsWithCacheControl, 5); // unchanged
|
||||
assert.equal(result.totalInputTokens, 5500);
|
||||
});
|
||||
|
||||
test("tracks requests with cache control preserved", () => {
|
||||
const metrics = {
|
||||
totalRequests: 0,
|
||||
requestsWithCacheControl: 0,
|
||||
totalInputTokens: 0,
|
||||
totalCachedTokens: 0,
|
||||
totalCacheCreationTokens: 0,
|
||||
tokensSaved: 0,
|
||||
estimatedCostSaved: 0,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const result = trackCacheMetrics({
|
||||
preserved: true,
|
||||
provider: "claude",
|
||||
strategy: "priority",
|
||||
metrics,
|
||||
inputTokens: 1000,
|
||||
cachedTokens: 400,
|
||||
cacheCreationTokens: 100,
|
||||
});
|
||||
|
||||
assert.equal(result.totalRequests, 1);
|
||||
assert.equal(result.requestsWithCacheControl, 1);
|
||||
assert.equal(result.byProvider.claude.requests, 1);
|
||||
assert.equal(result.byProvider.claude.inputTokens, 1000);
|
||||
assert.equal(result.byProvider.claude.cachedTokens, 400);
|
||||
assert.equal(result.byProvider.claude.cacheCreationTokens, 100);
|
||||
assert.equal(result.byStrategy.priority.requests, 1);
|
||||
});
|
||||
|
||||
test("tracks by provider", () => {
|
||||
const metrics = {
|
||||
totalRequests: 0,
|
||||
requestsWithCacheControl: 0,
|
||||
totalInputTokens: 0,
|
||||
totalCachedTokens: 0,
|
||||
totalCacheCreationTokens: 0,
|
||||
tokensSaved: 0,
|
||||
estimatedCostSaved: 0,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
let result = trackCacheMetrics({
|
||||
preserved: true,
|
||||
provider: "claude",
|
||||
strategy: null,
|
||||
metrics,
|
||||
inputTokens: 1000,
|
||||
cachedTokens: 300,
|
||||
cacheCreationTokens: 100,
|
||||
});
|
||||
|
||||
result = trackCacheMetrics({
|
||||
preserved: true,
|
||||
provider: "zai",
|
||||
strategy: null,
|
||||
metrics: result,
|
||||
inputTokens: 800,
|
||||
cachedTokens: 200,
|
||||
cacheCreationTokens: 50,
|
||||
});
|
||||
|
||||
assert.equal(result.byProvider.claude.requests, 1);
|
||||
assert.equal(result.byProvider.claude.inputTokens, 1000);
|
||||
assert.equal(result.byProvider.claude.cachedTokens, 300);
|
||||
assert.equal(result.byProvider.zai.requests, 1);
|
||||
assert.equal(result.byProvider.zai.inputTokens, 800);
|
||||
assert.equal(result.byProvider.zai.cachedTokens, 200);
|
||||
});
|
||||
|
||||
test("tracks by strategy", () => {
|
||||
const metrics = {
|
||||
totalRequests: 0,
|
||||
requestsWithCacheControl: 0,
|
||||
totalInputTokens: 0,
|
||||
totalCachedTokens: 0,
|
||||
totalCacheCreationTokens: 0,
|
||||
tokensSaved: 0,
|
||||
estimatedCostSaved: 0,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
let result = trackCacheMetrics({
|
||||
preserved: true,
|
||||
provider: "claude",
|
||||
strategy: "priority",
|
||||
metrics,
|
||||
inputTokens: 1000,
|
||||
cachedTokens: 300,
|
||||
cacheCreationTokens: 100,
|
||||
});
|
||||
|
||||
result = trackCacheMetrics({
|
||||
preserved: true,
|
||||
provider: "claude",
|
||||
strategy: "cost-optimized",
|
||||
metrics: result,
|
||||
inputTokens: 800,
|
||||
cachedTokens: 200,
|
||||
cacheCreationTokens: 50,
|
||||
});
|
||||
|
||||
assert.equal(result.byStrategy.priority.requests, 1);
|
||||
assert.equal(result.byStrategy.priority.cachedTokens, 300);
|
||||
assert.equal(result.byStrategy["cost-optimized"].requests, 1);
|
||||
assert.equal(result.byStrategy["cost-optimized"].cachedTokens, 200);
|
||||
});
|
||||
});
|
||||
|
||||
describe("updateCacheTokenMetrics", () => {
|
||||
test("updates token counts", () => {
|
||||
const metrics = {
|
||||
totalRequests: 10,
|
||||
requestsWithCacheControl: 5,
|
||||
totalInputTokens: 5000,
|
||||
totalCachedTokens: 2000,
|
||||
totalCacheCreationTokens: 1000,
|
||||
tokensSaved: 2000,
|
||||
estimatedCostSaved: 0.5,
|
||||
byProvider: {
|
||||
claude: {
|
||||
requests: 3,
|
||||
inputTokens: 3000,
|
||||
cachedTokens: 1200,
|
||||
cacheCreationTokens: 600,
|
||||
},
|
||||
},
|
||||
byStrategy: {
|
||||
priority: {
|
||||
requests: 4,
|
||||
inputTokens: 4000,
|
||||
cachedTokens: 1600,
|
||||
cacheCreationTokens: 800,
|
||||
},
|
||||
},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const result = updateCacheTokenMetrics({
|
||||
metrics,
|
||||
provider: "claude",
|
||||
strategy: "priority",
|
||||
inputTokens: 1000,
|
||||
cachedTokens: 400,
|
||||
cacheCreationTokens: 200,
|
||||
costSaved: 0.02,
|
||||
});
|
||||
|
||||
assert.equal(result.totalInputTokens, 6000);
|
||||
assert.equal(result.totalCachedTokens, 2400);
|
||||
assert.equal(result.totalCacheCreationTokens, 1200);
|
||||
assert.equal(result.tokensSaved, 2400);
|
||||
assert.equal(result.estimatedCostSaved, 0.52);
|
||||
});
|
||||
|
||||
test("updates provider breakdown", () => {
|
||||
const metrics = {
|
||||
totalRequests: 10,
|
||||
requestsWithCacheControl: 5,
|
||||
totalInputTokens: 5000,
|
||||
totalCachedTokens: 2000,
|
||||
totalCacheCreationTokens: 1000,
|
||||
tokensSaved: 2000,
|
||||
estimatedCostSaved: 0.5,
|
||||
byProvider: {
|
||||
claude: {
|
||||
requests: 3,
|
||||
inputTokens: 3000,
|
||||
cachedTokens: 1200,
|
||||
cacheCreationTokens: 600,
|
||||
},
|
||||
},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const result = updateCacheTokenMetrics({
|
||||
metrics,
|
||||
provider: "claude",
|
||||
strategy: null,
|
||||
inputTokens: 500,
|
||||
cachedTokens: 200,
|
||||
cacheCreationTokens: 100,
|
||||
});
|
||||
|
||||
assert.equal(result.byProvider.claude.inputTokens, 3500);
|
||||
assert.equal(result.byProvider.claude.cachedTokens, 1400);
|
||||
assert.equal(result.byProvider.claude.cacheCreationTokens, 700);
|
||||
});
|
||||
|
||||
test("updates strategy breakdown", () => {
|
||||
const metrics = {
|
||||
totalRequests: 10,
|
||||
requestsWithCacheControl: 5,
|
||||
totalInputTokens: 5000,
|
||||
totalCachedTokens: 2000,
|
||||
totalCacheCreationTokens: 1000,
|
||||
tokensSaved: 2000,
|
||||
estimatedCostSaved: 0.5,
|
||||
byProvider: {},
|
||||
byStrategy: {
|
||||
priority: {
|
||||
requests: 4,
|
||||
inputTokens: 4000,
|
||||
cachedTokens: 1600,
|
||||
cacheCreationTokens: 800,
|
||||
},
|
||||
},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
};
|
||||
|
||||
const result = updateCacheTokenMetrics({
|
||||
metrics,
|
||||
provider: "claude",
|
||||
strategy: "priority",
|
||||
inputTokens: 500,
|
||||
cachedTokens: 200,
|
||||
cacheCreationTokens: 100,
|
||||
});
|
||||
|
||||
assert.equal(result.byStrategy.priority.inputTokens, 4500);
|
||||
assert.equal(result.byStrategy.priority.cachedTokens, 1800);
|
||||
assert.equal(result.byStrategy.priority.cacheCreationTokens, 900);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,134 @@
|
||||
import { describe, test, before, after } from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import { getCacheMetrics } from "../../src/lib/db/settings.ts";
|
||||
import { getDbInstance } from "../../src/lib/db/core.ts";
|
||||
|
||||
describe("Cache Metrics Database", () => {
|
||||
let db;
|
||||
|
||||
before(() => {
|
||||
db = getDbInstance();
|
||||
// Create usage_history table if it doesn't exist (mimicking production schema)
|
||||
db.prepare(
|
||||
`
|
||||
CREATE TABLE IF NOT EXISTS usage_history (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
provider TEXT,
|
||||
model TEXT,
|
||||
connection_id TEXT,
|
||||
api_key_id TEXT,
|
||||
api_key_name TEXT,
|
||||
tokens_input INTEGER DEFAULT 0,
|
||||
tokens_output INTEGER DEFAULT 0,
|
||||
tokens_cache_read INTEGER DEFAULT 0,
|
||||
tokens_cache_creation INTEGER DEFAULT 0,
|
||||
tokens_reasoning INTEGER DEFAULT 0,
|
||||
status TEXT,
|
||||
timestamp TEXT,
|
||||
success INTEGER,
|
||||
latency_ms INTEGER DEFAULT 0,
|
||||
ttft_ms INTEGER DEFAULT 0,
|
||||
error_code TEXT
|
||||
)
|
||||
`
|
||||
).run();
|
||||
});
|
||||
|
||||
after(async () => {
|
||||
// Clean up test data
|
||||
db.prepare("DELETE FROM usage_history WHERE provider = 'test-provider'").run();
|
||||
});
|
||||
|
||||
describe("getCacheMetrics", () => {
|
||||
test("returns metrics even with no cache activity", async () => {
|
||||
// Verify the function works even if usage_history has data but no cache activity
|
||||
const metrics = await getCacheMetrics();
|
||||
|
||||
assert.ok(metrics.totalRequests >= 0);
|
||||
assert.ok(metrics.totalInputTokens >= 0);
|
||||
assert.ok(metrics.totalCachedTokens >= 0);
|
||||
assert.ok(metrics.totalCacheCreationTokens >= 0);
|
||||
assert.ok(metrics.tokensSaved >= 0);
|
||||
assert.ok(metrics.lastUpdated);
|
||||
});
|
||||
|
||||
test("returns aggregated metrics from usage_history", async () => {
|
||||
// Clean up any existing test data first
|
||||
db.prepare("DELETE FROM usage_history WHERE provider = 'test-provider'").run();
|
||||
|
||||
const now = new Date().toISOString();
|
||||
|
||||
db.prepare(
|
||||
`
|
||||
INSERT INTO usage_history (provider, model, connection_id, api_key_id, api_key_name,
|
||||
tokens_input, tokens_output, tokens_cache_read, tokens_cache_creation, tokens_reasoning,
|
||||
status, success, latency_ms, ttft_ms, error_code, timestamp)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
).run(
|
||||
"test-provider",
|
||||
"test-model",
|
||||
"test-connection",
|
||||
"test-key-id",
|
||||
"test-key",
|
||||
1000, // tokens_input
|
||||
500, // tokens_output
|
||||
400, // tokens_cache_read
|
||||
200, // tokens_cache_creation
|
||||
0, // tokens_reasoning
|
||||
"200", // status
|
||||
1, // success
|
||||
100, // latency_ms
|
||||
50, // ttft_ms
|
||||
null, // error_code
|
||||
now // timestamp
|
||||
);
|
||||
|
||||
// Insert another row
|
||||
db.prepare(
|
||||
`
|
||||
INSERT INTO usage_history (provider, model, connection_id, api_key_id, api_key_name,
|
||||
tokens_input, tokens_output, tokens_cache_read, tokens_cache_creation, tokens_reasoning,
|
||||
status, success, latency_ms, ttft_ms, error_code, timestamp)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
).run(
|
||||
"test-provider",
|
||||
"test-model",
|
||||
"test-connection",
|
||||
"test-key-id",
|
||||
"test-key",
|
||||
500, // tokens_input
|
||||
300, // tokens_output
|
||||
200, // tokens_cache_read
|
||||
100, // tokens_cache_creation
|
||||
0, // tokens_reasoning
|
||||
"200", // status
|
||||
1, // success
|
||||
80, // latency_ms
|
||||
40, // ttft_ms
|
||||
null, // error_code
|
||||
now // timestamp
|
||||
);
|
||||
|
||||
const metrics = await getCacheMetrics();
|
||||
|
||||
// Should have at least the 2 test requests with cache activity
|
||||
assert.ok(metrics.requestsWithCacheControl >= 2);
|
||||
assert.ok(metrics.totalInputTokens >= 1500);
|
||||
assert.ok(metrics.totalCachedTokens >= 600);
|
||||
assert.ok(metrics.totalCacheCreationTokens >= 300);
|
||||
assert.ok(metrics.tokensSaved >= 600);
|
||||
|
||||
// Check provider breakdown
|
||||
assert.ok(metrics.byProvider["test-provider"]);
|
||||
assert.ok(metrics.byProvider["test-provider"].requests >= 2);
|
||||
assert.ok(metrics.byProvider["test-provider"].inputTokens >= 1500);
|
||||
assert.ok(metrics.byProvider["test-provider"].cachedTokens >= 600);
|
||||
assert.ok(metrics.byProvider["test-provider"].cacheCreationTokens >= 300);
|
||||
|
||||
// Clean up
|
||||
db.prepare("DELETE FROM usage_history WHERE provider = 'test-provider'").run();
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,128 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
const TEST_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "omniroute-chat-combo-live-"));
|
||||
process.env.DATA_DIR = TEST_DATA_DIR;
|
||||
|
||||
const core = await import("../../src/lib/db/core.ts");
|
||||
const providersDb = await import("../../src/lib/db/providers.ts");
|
||||
const chatRoute = await import("../../src/app/api/v1/chat/completions/route.ts");
|
||||
const {
|
||||
clearModelUnavailability,
|
||||
resetAllAvailability,
|
||||
setModelUnavailable,
|
||||
} = await import("../../src/domain/modelAvailability.ts");
|
||||
const {
|
||||
getCircuitBreaker,
|
||||
resetAllCircuitBreakers,
|
||||
STATE,
|
||||
} = await import("../../src/shared/utils/circuitBreaker.ts");
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
async function resetStorage() {
|
||||
core.resetDbInstance();
|
||||
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
||||
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
||||
resetAllAvailability();
|
||||
resetAllCircuitBreakers();
|
||||
}
|
||||
|
||||
async function seedSuppressedConnection() {
|
||||
return providersDb.createProviderConnection({
|
||||
provider: "openai",
|
||||
authType: "apikey",
|
||||
name: "openai-live-test",
|
||||
apiKey: "sk-live-test",
|
||||
isActive: true,
|
||||
testStatus: "credits_exhausted",
|
||||
rateLimitedUntil: new Date(Date.now() + 60_000).toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
function makeRequest(extraHeaders = {}) {
|
||||
return new Request("http://localhost/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...extraHeaders,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "openai/gpt-4o-mini",
|
||||
messages: [{ role: "user", content: "Reply with OK only." }],
|
||||
max_tokens: 16,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
test.beforeEach(async () => {
|
||||
globalThis.fetch = originalFetch;
|
||||
await resetStorage();
|
||||
});
|
||||
|
||||
test.afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
resetAllAvailability();
|
||||
resetAllCircuitBreakers();
|
||||
});
|
||||
|
||||
test.after(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
resetAllAvailability();
|
||||
resetAllCircuitBreakers();
|
||||
core.resetDbInstance();
|
||||
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("combo live test bypasses local cooldown and breaker state to perform a real upstream request", async () => {
|
||||
const created = await seedSuppressedConnection();
|
||||
|
||||
setModelUnavailable("openai", "gpt-4o-mini", 60_000, "test cooldown");
|
||||
const breaker = getCircuitBreaker("openai");
|
||||
breaker.state = STATE.OPEN;
|
||||
breaker.lastFailureTime = Date.now();
|
||||
|
||||
const fetchCalls = [];
|
||||
globalThis.fetch = async (url, init = {}) => {
|
||||
fetchCalls.push({ url: String(url), init });
|
||||
return Response.json({
|
||||
id: "chatcmpl-live-test",
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "OK",
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
};
|
||||
|
||||
const blockedByCooldown = await chatRoute.POST(makeRequest());
|
||||
assert.equal(blockedByCooldown.status, 503);
|
||||
assert.equal(fetchCalls.length, 0);
|
||||
|
||||
clearModelUnavailability("openai", "gpt-4o-mini");
|
||||
|
||||
const blockedByBreaker = await chatRoute.POST(makeRequest());
|
||||
assert.equal(blockedByBreaker.status, 503);
|
||||
assert.equal(fetchCalls.length, 0);
|
||||
|
||||
const liveResponse = await chatRoute.POST(
|
||||
makeRequest({ "X-Internal-Test": "combo-health-check" })
|
||||
);
|
||||
const liveBody = await liveResponse.json();
|
||||
|
||||
assert.equal(liveResponse.status, 200);
|
||||
assert.equal(fetchCalls.length, 1);
|
||||
assert.match(fetchCalls[0].url, /\/chat\/completions$/);
|
||||
assert.equal(fetchCalls[0].init.headers.Authorization, "Bearer sk-live-test");
|
||||
assert.equal(liveBody.choices[0].message.content, "OK");
|
||||
|
||||
const updated = await providersDb.getProviderConnectionById(created.id);
|
||||
assert.equal(updated.testStatus, "active");
|
||||
});
|
||||
@@ -26,7 +26,7 @@ function mockLog() {
|
||||
function mockHandler(statusSequence) {
|
||||
let callIndex = 0;
|
||||
return async (body, modelStr) => {
|
||||
const status = statusSequence[callIndex] ?? 200;
|
||||
const status = statusSequence[callIndex] ?? statusSequence[statusSequence.length - 1] ?? 200;
|
||||
callIndex++;
|
||||
if (status === 200) {
|
||||
return new Response(JSON.stringify({ ok: true }), { status: 200 });
|
||||
@@ -55,6 +55,7 @@ test("handleComboChat: circuit breaker opens after repeated 502 errors", async (
|
||||
name: "test-combo",
|
||||
models: [{ model: "groq/llama-3.3-70b", weight: 0 }],
|
||||
strategy: "priority",
|
||||
config: { maxRetries: 0 },
|
||||
};
|
||||
|
||||
const log = mockLog();
|
||||
@@ -74,6 +75,7 @@ test("handleComboChat: circuit breaker opens after repeated 502 errors", async (
|
||||
|
||||
// Breaker should now be OPEN
|
||||
const status = breaker.getStatus();
|
||||
console.log("=== BREAKER STATUS AFTER 3 CALLS ===", status);
|
||||
assert.equal(status.state, STATE.OPEN, "Breaker should be OPEN after 3 failures");
|
||||
assert.equal(status.failureCount, 3, "Failure count should be 3");
|
||||
});
|
||||
|
||||
@@ -239,7 +239,7 @@ test("CodexExecutor preserves native responses payloads for Codex passthrough",
|
||||
assert.equal(transformed.stream, true);
|
||||
assert.equal(transformed.service_tier, "priority");
|
||||
assert.equal(transformed.instructions, "custom system prompt");
|
||||
assert.equal(transformed.store, true);
|
||||
assert.equal(transformed.store, false);
|
||||
assert.deepEqual(transformed.metadata, { source: "codex-client" });
|
||||
assert.equal(transformed.reasoning_effort, "high");
|
||||
assert.ok(!("_nativeCodexPassthrough" in transformed));
|
||||
@@ -503,3 +503,29 @@ test("parseSSEToOpenAIResponse merges split tool call chunks by id without dupli
|
||||
assert.equal(parsed.choices[0].message.tool_calls[0].function.name, "sum");
|
||||
assert.equal(parsed.choices[0].message.tool_calls[0].function.arguments, '{"a":1}');
|
||||
});
|
||||
|
||||
test("parseSSEToOpenAIResponse normalizes delta.reasoning alias to reasoning_content", () => {
|
||||
const rawSSE = [
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl_2",
|
||||
object: "chat.completion.chunk",
|
||||
choices: [{ index: 0, delta: { reasoning: "Let me think..." } }],
|
||||
})}`,
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl_2",
|
||||
object: "chat.completion.chunk",
|
||||
choices: [{ index: 0, delta: { reasoning: " The answer is 4." } }],
|
||||
})}`,
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl_2",
|
||||
object: "chat.completion.chunk",
|
||||
choices: [{ index: 0, delta: { content: "2+2=4" }, finish_reason: "stop" }],
|
||||
})}`,
|
||||
"data: [DONE]",
|
||||
].join("\n");
|
||||
|
||||
const parsed = parseSSEToOpenAIResponse(rawSSE, "moonshotai/kimi-k2.5");
|
||||
assert.ok(parsed);
|
||||
assert.equal(parsed.choices[0].message.reasoning_content, "Let me think... The answer is 4.");
|
||||
assert.equal(parsed.choices[0].message.content, "2+2=4");
|
||||
});
|
||||
|
||||
@@ -155,3 +155,51 @@ test("builds compact Claude stream summary for detailed logs", () => {
|
||||
assert.equal(compact.usage.output_tokens, 7);
|
||||
assert.equal(compact._omniroute_stream.eventCount, 4);
|
||||
});
|
||||
|
||||
test("builds compact OpenAI summary with reasoning alias (delta.reasoning)", () => {
|
||||
const collector = createStructuredSSECollector({ stage: "provider_response" });
|
||||
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: { role: "assistant" } }],
|
||||
});
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: { reasoning: "Let me think..." } }],
|
||||
});
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: { content: "The answer is 4." } }],
|
||||
});
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||
});
|
||||
|
||||
const summary = buildStreamSummaryFromEvents(
|
||||
collector.getEvents(),
|
||||
FORMATS.OPENAI,
|
||||
"moonshotai/kimi-k2.5"
|
||||
);
|
||||
const compact = compactStructuredStreamPayload(
|
||||
collector.build(summary, { includeEvents: false })
|
||||
);
|
||||
|
||||
assert.equal(compact.object, "chat.completion");
|
||||
assert.equal(compact.choices[0].message.content, "The answer is 4.");
|
||||
assert.equal(compact.choices[0].message.reasoning_content, "Let me think...");
|
||||
assert.equal(compact.choices[0].finish_reason, "stop");
|
||||
});
|
||||
|
||||
@@ -0,0 +1,422 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
|
||||
const { convertResponsesApiFormat } = await import(
|
||||
"../../open-sse/translator/helpers/responsesApiHelper.ts"
|
||||
);
|
||||
const { openaiResponsesToOpenAIRequest, openaiToOpenAIResponsesRequest } = await import(
|
||||
"../../open-sse/translator/request/openai-responses.ts"
|
||||
);
|
||||
|
||||
test("convertResponsesApiFormat filters orphaned function_call_output items", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: [
|
||||
{
|
||||
type: "function_call_output",
|
||||
call_id: "orphaned_call",
|
||||
output: "result",
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = convertResponsesApiFormat(body);
|
||||
const toolMsgs = result.messages.filter((m) => m.role === "tool");
|
||||
assert.equal(toolMsgs.length, 0);
|
||||
});
|
||||
|
||||
test("convertResponsesApiFormat skips function_call items with empty names", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: [
|
||||
{ type: "function_call", call_id: "c1", name: "", arguments: "{}" },
|
||||
{ type: "function_call", call_id: "c2", name: " ", arguments: "{}" },
|
||||
],
|
||||
};
|
||||
const result = convertResponsesApiFormat(body);
|
||||
const assistantMsgs = result.messages.filter((m) => m.role === "assistant");
|
||||
assert.equal(assistantMsgs.length, 0);
|
||||
});
|
||||
|
||||
test("Responses→Chat: input_image converted to image_url with detail", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "input_text", text: "What is this?" },
|
||||
{ type: "input_image", image_url: "https://example.com/img.png", detail: "high" },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
const userMsg = result.messages.find((m) => m.role === "user");
|
||||
const imgPart = userMsg.content.find((c) => c.type === "image_url");
|
||||
assert.ok(imgPart, "should have image_url content part");
|
||||
assert.equal(imgPart.image_url.url, "https://example.com/img.png");
|
||||
assert.equal(imgPart.image_url.detail, "high");
|
||||
});
|
||||
|
||||
test("Responses→Chat: input_image without detail omits detail field", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [{ type: "input_image", image_url: "https://example.com/img.png" }],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
const userMsg = result.messages.find((m) => m.role === "user");
|
||||
const imgPart = userMsg.content.find((c) => c.type === "image_url");
|
||||
assert.ok(imgPart);
|
||||
assert.equal(imgPart.image_url.url, "https://example.com/img.png");
|
||||
assert.equal(imgPart.image_url.detail, undefined);
|
||||
});
|
||||
|
||||
test("Chat→Responses: image_url detail preserved as input_image", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "Describe" },
|
||||
{ type: "image_url", image_url: { url: "https://example.com/img.png", detail: "low" } },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
const userItem = result.input.find((i) => i.type === "message" && i.role === "user");
|
||||
const imgPart = userItem.content.find((c) => c.type === "input_image");
|
||||
assert.ok(imgPart, "should have input_image content part");
|
||||
assert.equal(imgPart.image_url, "https://example.com/img.png");
|
||||
assert.equal(imgPart.detail, "low");
|
||||
});
|
||||
|
||||
test("Chat→Responses: image_url without detail omits detail", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "image_url", image_url: { url: "https://example.com/img.png" } },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
const userItem = result.input.find((i) => i.type === "message" && i.role === "user");
|
||||
const imgPart = userItem.content.find((c) => c.type === "input_image");
|
||||
assert.ok(imgPart);
|
||||
assert.equal(imgPart.detail, undefined);
|
||||
});
|
||||
|
||||
test("Responses→Chat: input_file converted to file content part", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: [
|
||||
{
|
||||
type: "message",
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "input_file", file_id: "file-abc", filename: "data.csv" },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
const userMsg = result.messages.find((m) => m.role === "user");
|
||||
const filePart = userMsg.content.find((c) => c.type === "file");
|
||||
assert.ok(filePart, "should have file content part");
|
||||
assert.equal(filePart.file.file_id, "file-abc");
|
||||
assert.equal(filePart.file.filename, "data.csv");
|
||||
});
|
||||
|
||||
test("Chat→Responses: file content part converted to input_file", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "file", file: { file_id: "file-abc", filename: "data.csv" } },
|
||||
],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
const userItem = result.input.find((i) => i.type === "message" && i.role === "user");
|
||||
const filePart = userItem.content.find((c) => c.type === "input_file");
|
||||
assert.ok(filePart, "should have input_file content part");
|
||||
assert.equal(filePart.file_id, "file-abc");
|
||||
assert.equal(filePart.filename, "data.csv");
|
||||
});
|
||||
|
||||
test("Responses→Chat: tool_choice {type:'function', name} wrapped to {type:'function', function:{name}}", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: "hello",
|
||||
tool_choice: { type: "function", name: "get_weather" },
|
||||
tools: [{ type: "function", name: "get_weather", parameters: {} }],
|
||||
};
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
assert.deepEqual(result.tool_choice, {
|
||||
type: "function",
|
||||
function: { name: "get_weather" },
|
||||
});
|
||||
});
|
||||
|
||||
test("Chat→Responses: tool_choice {type:'function', function:{name}} unwrapped to {type:'function', name}", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
tool_choice: { type: "function", function: { name: "get_weather" } },
|
||||
tools: [{ type: "function", function: { name: "get_weather", parameters: {} } }],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
assert.deepEqual(result.tool_choice, {
|
||||
type: "function",
|
||||
name: "get_weather",
|
||||
});
|
||||
});
|
||||
|
||||
test("Responses→Chat: string tool_choice passes through unchanged", () => {
|
||||
const body = { model: "gpt-4", input: "hello", tool_choice: "auto" };
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
assert.equal(result.tool_choice, "auto");
|
||||
});
|
||||
|
||||
test("Chat→Responses: string tool_choice passes through unchanged", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [{ role: "user", content: "hello" }],
|
||||
tool_choice: "required",
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
assert.equal(result.tool_choice, "required");
|
||||
});
|
||||
|
||||
test("Responses→Chat: built-in tool_choice type throws unsupported error", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: "hello",
|
||||
tool_choice: { type: "web_search_preview" },
|
||||
};
|
||||
assert.throws(
|
||||
() => openaiResponsesToOpenAIRequest(null, body, null, null),
|
||||
(err) => err.message.includes("web_search_preview")
|
||||
);
|
||||
});
|
||||
|
||||
test("Responses→Chat: web_search tool type throws unsupported error", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: "search for cats",
|
||||
tools: [{ type: "web_search", search_context_size: "medium" }],
|
||||
};
|
||||
assert.throws(
|
||||
() => openaiResponsesToOpenAIRequest(null, body, null, null),
|
||||
(err) => err.message.includes("web_search")
|
||||
);
|
||||
});
|
||||
|
||||
test("Responses→Chat: computer tool type throws unsupported error", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: "click button",
|
||||
tools: [{ type: "computer" }],
|
||||
};
|
||||
assert.throws(
|
||||
() => openaiResponsesToOpenAIRequest(null, body, null, null),
|
||||
(err) => err.message.includes("computer")
|
||||
);
|
||||
});
|
||||
|
||||
test("Responses→Chat: mcp tool type throws unsupported error", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: "hello",
|
||||
tools: [{ type: "mcp", server_label: "test", server_url: "https://example.com" }],
|
||||
};
|
||||
assert.throws(
|
||||
() => openaiResponsesToOpenAIRequest(null, body, null, null),
|
||||
(err) => err.message.includes("mcp")
|
||||
);
|
||||
});
|
||||
|
||||
test("Responses→Chat: non-string arguments are JSON-stringified", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: [
|
||||
{ type: "function_call", call_id: "c1", name: "fn", arguments: { key: "val" } },
|
||||
{ type: "function_call_output", call_id: "c1", output: "ok" },
|
||||
],
|
||||
};
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
const assistantMsg = result.messages.find((m) => m.role === "assistant");
|
||||
assert.equal(typeof assistantMsg.tool_calls[0].function.arguments, "string");
|
||||
assert.equal(assistantMsg.tool_calls[0].function.arguments, '{"key":"val"}');
|
||||
});
|
||||
|
||||
test("Chat→Responses: array tool content converts text→input_text types", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [
|
||||
{ role: "user", content: "hello" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: null,
|
||||
tool_calls: [{ id: "c1", type: "function", function: { name: "fn", arguments: "{}" } }],
|
||||
},
|
||||
{
|
||||
role: "tool",
|
||||
tool_call_id: "c1",
|
||||
content: [{ type: "text", text: "result data" }],
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
const outputItem = result.input.find((i) => i.type === "function_call_output");
|
||||
assert.ok(Array.isArray(outputItem.output), "output should be array");
|
||||
assert.equal(outputItem.output[0].type, "input_text");
|
||||
assert.equal(outputItem.output[0].text, "result data");
|
||||
});
|
||||
|
||||
test("Responses→Chat: function tool type passes through", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
input: "hello",
|
||||
tools: [{ type: "function", name: "greet", parameters: {} }],
|
||||
};
|
||||
const result = openaiResponsesToOpenAIRequest(null, body, null, null);
|
||||
assert.equal(result.tools.length, 1);
|
||||
assert.equal(result.tools[0].type, "function");
|
||||
});
|
||||
|
||||
test("Chat→Responses: deprecated function_call field on assistant converted to function_call item", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [
|
||||
{ role: "user", content: "weather?" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: null,
|
||||
function_call: { name: "get_weather", arguments: '{"city":"NYC"}' },
|
||||
},
|
||||
],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
const fcItem = result.input.find((i) => i.type === "function_call");
|
||||
assert.ok(fcItem, "should have function_call input item");
|
||||
assert.equal(fcItem.name, "get_weather");
|
||||
assert.equal(fcItem.arguments, '{"city":"NYC"}');
|
||||
assert.ok(fcItem.call_id, "should have a call_id");
|
||||
});
|
||||
|
||||
test("Chat→Responses: deprecated function role message converted to function_call_output", () => {
|
||||
const body = {
|
||||
model: "gpt-4",
|
||||
messages: [
|
||||
{ role: "user", content: "weather?" },
|
||||
{
|
||||
role: "assistant",
|
||||
content: null,
|
||||
function_call: { name: "get_weather", arguments: '{"city":"NYC"}' },
|
||||
},
|
||||
{ role: "function", name: "get_weather", content: '{"temp":72}' },
|
||||
],
|
||||
};
|
||||
const result = openaiToOpenAIResponsesRequest("gpt-4", body, true, null);
|
||||
const fcOutput = result.input.find((i) => i.type === "function_call_output");
|
||||
assert.ok(fcOutput, "should have function_call_output item");
|
||||
assert.equal(fcOutput.output, '{"temp":72}');
|
||||
// The call_ids should match between function_call and function_call_output
|
||||
const fcItem = result.input.find((i) => i.type === "function_call");
|
||||
assert.equal(fcOutput.call_id, fcItem.call_id);
|
||||
});
|
||||
|
||||
const { openaiToOpenAIResponsesResponse, openaiResponsesToOpenAIResponse } = await import(
|
||||
"../../open-sse/translator/response/openai-responses.ts"
|
||||
);
|
||||
const { initState } = await import("../../open-sse/translator/index.ts");
|
||||
const { FORMATS } = await import("../../open-sse/translator/formats.ts");
|
||||
|
||||
test("Chat→Responses streaming: usage-only chunk is captured (not dropped)", () => {
|
||||
const state = initState(FORMATS.OPENAI_RESPONSES);
|
||||
|
||||
// First chunk with content
|
||||
const chunk1 = { choices: [{ index: 0, delta: { content: "hello" }, finish_reason: null }], id: "c1" };
|
||||
openaiToOpenAIResponsesResponse(chunk1, state);
|
||||
|
||||
// Usage-only chunk (empty choices, has usage)
|
||||
const usageChunk = {
|
||||
choices: [],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 5, total_tokens: 15 },
|
||||
};
|
||||
const usageEvents = openaiToOpenAIResponsesResponse(usageChunk, state);
|
||||
assert.ok(Array.isArray(usageEvents));
|
||||
|
||||
// Finish chunk
|
||||
const finishChunk = { choices: [{ index: 0, delta: {}, finish_reason: "stop" }] };
|
||||
const finishEvents = openaiToOpenAIResponsesResponse(finishChunk, state);
|
||||
const completedEvent = finishEvents.find((e) => e.event === "response.completed");
|
||||
assert.ok(completedEvent, "should have completed event");
|
||||
assert.ok(completedEvent.data.response.usage, "completed event should include usage");
|
||||
assert.equal(completedEvent.data.response.usage.prompt_tokens, 10);
|
||||
});
|
||||
|
||||
test("Chat→Responses streaming: completed event includes accumulated output", () => {
|
||||
const state = initState(FORMATS.OPENAI_RESPONSES);
|
||||
|
||||
// Text content
|
||||
const chunk = { choices: [{ index: 0, delta: { content: "hello world" }, finish_reason: null }], id: "c1" };
|
||||
openaiToOpenAIResponsesResponse(chunk, state);
|
||||
|
||||
// Finish
|
||||
const finishChunk = { choices: [{ index: 0, delta: {}, finish_reason: "stop" }] };
|
||||
const events = openaiToOpenAIResponsesResponse(finishChunk, state);
|
||||
const completedEvent = events.find((e) => e.event === "response.completed");
|
||||
assert.ok(completedEvent.data.response.output, "completed should have output");
|
||||
assert.ok(completedEvent.data.response.output.length > 0, "output should not be empty");
|
||||
const msgOutput = completedEvent.data.response.output.find((o) => o.type === "message");
|
||||
assert.ok(msgOutput, "should have message output item");
|
||||
});
|
||||
|
||||
test("Responses→Chat streaming: reasoning delta emits reasoning_content in Chat chunk", () => {
|
||||
const state = { started: false, chatId: null, created: null, toolCallIndex: 0, finishReasonSent: false };
|
||||
|
||||
const chunk = {
|
||||
type: "response.reasoning_summary_text.delta",
|
||||
delta: "thinking step...",
|
||||
item_id: "rs_1",
|
||||
output_index: 0,
|
||||
summary_index: 0,
|
||||
};
|
||||
const result = openaiResponsesToOpenAIResponse(chunk, state);
|
||||
assert.ok(result, "should return a chunk");
|
||||
assert.equal(result.choices[0].delta.reasoning_content, "thinking step...");
|
||||
});
|
||||
|
||||
test("Chat→Responses streaming: multiple <think> tags in one chunk handled", () => {
|
||||
const state = initState(FORMATS.OPENAI_RESPONSES);
|
||||
|
||||
// Chunk with multiple think tags
|
||||
const chunk = {
|
||||
choices: [{ index: 0, delta: { content: "<think>first</think>middle<think>second</think>end" }, finish_reason: null }],
|
||||
id: "c1",
|
||||
};
|
||||
const events = openaiToOpenAIResponsesResponse(chunk, state);
|
||||
// Should not have literal <think> in any text delta
|
||||
const textDeltas = events
|
||||
.filter((e) => e.event === "response.output_text.delta")
|
||||
.map((e) => e.data.delta);
|
||||
const combined = textDeltas.join("");
|
||||
assert.ok(!combined.includes("<think>"), `text should not contain <think> tag, got: ${combined}`);
|
||||
});
|
||||
Reference in New Issue
Block a user