Compare commits
10 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 370070f489 | |||
| 7168f4014d | |||
| f0912feefb | |||
| af338d447b | |||
| 6fad06f659 | |||
| 1d51d8ff27 | |||
| 8af9bd1ac3 | |||
| 9fc3845d92 | |||
| 93bbe8e7a8 | |||
| 46acd16999 |
@@ -2,6 +2,51 @@
|
||||
|
||||
## [Unreleased]
|
||||
|
||||
---
|
||||
|
||||
## [3.3.0] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Release Stabilization** — Finalized v3.2.9 release (combo diagnostics, quality gates, Gemini tool fix) and created missing git tag. Consolidated all staged changes into a single atomic release commit.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Auto-Update Test** — Fixed `buildDockerComposeUpdateScript` test assertion to match unexpanded shell variable references (`$TARGET_TAG`, `${TARGET_TAG#v}`) in the generated deploy script, aligning with the refactored template from v3.2.8.
|
||||
- **Circuit Breaker Test** — Hardened `combo-circuit-breaker.test.mjs` by injecting `maxRetries: 0` to prevent retry inflation from skewing failure count assertions during breaker state transitions.
|
||||
|
||||
---
|
||||
|
||||
## [3.2.9] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Combo Diagnostics** — Introduced a live test bypass flag (`forceLiveComboTest`) allowing administrators to execute real upstream health checks that bypass all local circuit-breaker and cooldown state mechanisms, enabling precise diagnostics during rolling outages (PR #759)
|
||||
- **Quality Gates** — Added automated response quality validation for combos and officially integrated `claude-4.6` model support into the core routing schemas (PR #762)
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Tool Definition Validation** — Repaired Gemini API integration by normalizing enum types inside tool definitions, preventing upstream HTTP 400 parameter errors (PR #760)
|
||||
|
||||
---
|
||||
|
||||
## [3.2.8] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Docker Auto-Update UI** — Integrated a detached background update process for Docker Compose deployments. The Dashboard UI now seamlessly tracks update lifecycle events combining JSON REST responses with SSE streaming progress overlays for robust cross-environment reliability.
|
||||
- **Cache Analytics** — Repaired zero-metrics visualization mapping by migrating Semantic Cache telemetry logs directly into the centralized tracking SQLite module.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Authentication Logic** — Fixed a bug where saving dashboard settings or adding models failed with a 401 Unauthorized error when `requireLogin` was disabled. API endpoints now correctly evaluate the global authentication toggle. Resolved global redirection by reactivating `src/middleware.ts`.
|
||||
- **CLI Tool Detection (Windows)** — Prevented fatal initialization exceptions during CLI environment detection by catching `cross-spawn` ENOENT errors correctly. Adds explicit detection paths for `\AppData\Local\droid\droid.exe`.
|
||||
- **Codex Native Passthrough** — Normalized model translation parameters preventing context poisoning in proxy pass-through mode, enforcing generic `store: false` constraints explicitly for all Codex-originated requests.
|
||||
- **SSE Token Reporting** — Normalized provider tool-call chunk `finish_reason` detection, fixing 0% Usage analytics for stream-only responses missing strict `<DONE>` indicators.
|
||||
- **DeepSeek <think> Tags** — Implemented an explicit `<think>` extraction mapping inside `responsesHandler.ts`, ensuring DeepSeek reasoning streams map equivalently to native Anthropic `<thinking>` structures.
|
||||
|
||||
---
|
||||
|
||||
## [3.2.7] - 2026-03-29
|
||||
|
||||
### Fixed
|
||||
|
||||
+1
-1
@@ -43,7 +43,7 @@ See [IDE Configs](integrations/ide-configs.md) for Antigravity, Cursor, Copilot,
|
||||
| `omniroute_simulate_route` | Dry-run routing simulation with fallback tree |
|
||||
| `omniroute_set_budget_guard` | Session budget with degrade/block/alert actions |
|
||||
| `omniroute_set_resilience_profile` | Apply conservative/balanced/aggressive preset |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo |
|
||||
| `omniroute_test_combo` | Live-test all models in a combo via a real upstream request |
|
||||
| `omniroute_get_provider_metrics` | Detailed metrics for one provider |
|
||||
| `omniroute_best_combo_for_task` | Task-fitness recommendation with alternatives |
|
||||
| `omniroute_explain_route` | Explain a past routing decision |
|
||||
|
||||
+1
-1
@@ -1,7 +1,7 @@
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: OmniRoute API
|
||||
version: 3.2.7
|
||||
version: 3.3.0
|
||||
description: |
|
||||
OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
|
||||
endpoint that routes requests to multiple AI providers with load balancing,
|
||||
|
||||
@@ -500,6 +500,12 @@ export const REGISTRY: Record<string, RegistryEntry> = {
|
||||
clientVersion: "1.1.3",
|
||||
models: [
|
||||
{ id: "default", name: "Auto (Server Picks)" },
|
||||
{ id: "claude-4.6-opus-high-thinking", name: "Claude 4.6 Opus High Thinking" },
|
||||
{ id: "claude-4.6-opus-high", name: "Claude 4.6 Opus High" },
|
||||
{ id: "claude-4.6-sonnet-high-thinking", name: "Claude 4.6 Sonnet High Thinking" },
|
||||
{ id: "claude-4.6-sonnet-high", name: "Claude 4.6 Sonnet High" },
|
||||
{ id: "claude-4.6-haiku", name: "Claude 4.6 Haiku" },
|
||||
{ id: "claude-4.6-opus", name: "Claude 4.6 Opus" },
|
||||
{ id: "claude-4.5-opus-high-thinking", name: "Claude 4.5 Opus High Thinking" },
|
||||
{ id: "claude-4.5-opus-high", name: "Claude 4.5 Opus High" },
|
||||
{ id: "claude-4.5-sonnet-thinking", name: "Claude 4.5 Sonnet Thinking" },
|
||||
|
||||
@@ -43,13 +43,8 @@ import {
|
||||
} from "@/lib/localDb";
|
||||
import { getExecutor } from "../executors/index.ts";
|
||||
import { getCacheControlSettings } from "@/lib/cacheControlSettings";
|
||||
import {
|
||||
shouldPreserveCacheControl,
|
||||
trackCacheMetrics,
|
||||
recordCacheHit,
|
||||
type CacheControlMetrics,
|
||||
} from "../utils/cacheControlPolicy.ts";
|
||||
import { getCacheMetrics, updateCacheMetrics } from "@/lib/db/settings.ts";
|
||||
import { shouldPreserveCacheControl } from "../utils/cacheControlPolicy.ts";
|
||||
import { getCacheMetrics } from "@/lib/db/settings.ts";
|
||||
|
||||
import {
|
||||
parseCodexQuotaHeaders,
|
||||
@@ -701,27 +696,6 @@ export async function handleChatCore({
|
||||
settings: { alwaysPreserveClientCache: cacheControlMode },
|
||||
});
|
||||
|
||||
// Track cache metrics for this request
|
||||
let currentMetrics = await getCacheMetrics().catch(() => ({
|
||||
totalRequests: 0,
|
||||
requestsWithCacheControl: 0,
|
||||
totalInputTokens: 0,
|
||||
totalCachedTokens: 0,
|
||||
totalCacheCreationTokens: 0,
|
||||
tokensSaved: 0,
|
||||
estimatedCostSaved: 0,
|
||||
byProvider: {},
|
||||
byStrategy: {},
|
||||
lastUpdated: new Date().toISOString(),
|
||||
}));
|
||||
|
||||
currentMetrics = trackCacheMetrics({
|
||||
preserved: preserveCacheControl,
|
||||
provider,
|
||||
strategy: comboStrategy,
|
||||
metrics: currentMetrics,
|
||||
});
|
||||
|
||||
if (preserveCacheControl) {
|
||||
log?.debug?.(
|
||||
"CACHE",
|
||||
@@ -1473,18 +1447,6 @@ export async function handleChatCore({
|
||||
(usage as any).prompt_tokens_details?.cache_creation_tokens
|
||||
);
|
||||
|
||||
if (cachedTokens > 0 || cacheCreationTokens > 0) {
|
||||
currentMetrics = updateCacheTokenMetrics({
|
||||
metrics: currentMetrics,
|
||||
provider,
|
||||
strategy: comboStrategy,
|
||||
inputTokens,
|
||||
cachedTokens,
|
||||
cacheCreationTokens,
|
||||
costSaved: 0, // Will be calculated based on pricing
|
||||
});
|
||||
}
|
||||
|
||||
saveRequestUsage({
|
||||
provider: provider || "unknown",
|
||||
model: model || "unknown",
|
||||
@@ -1592,11 +1554,6 @@ export async function handleChatCore({
|
||||
claudeCacheUsageMeta: cacheUsageLogMeta,
|
||||
});
|
||||
|
||||
// Persist cache metrics to database
|
||||
updateCacheMetrics(currentMetrics).catch((err) => {
|
||||
log?.debug?.("CACHE", `Failed to persist cache metrics: ${err?.message || "unknown"}`);
|
||||
});
|
||||
|
||||
return {
|
||||
success: true,
|
||||
response: new Response(JSON.stringify(translatedResponse), {
|
||||
@@ -1633,6 +1590,7 @@ export async function handleChatCore({
|
||||
responseBody: streamResponseBody,
|
||||
providerPayload,
|
||||
clientPayload,
|
||||
ttft,
|
||||
}) => {
|
||||
const cacheUsageLogMeta = buildCacheUsageLogMeta(streamUsage);
|
||||
|
||||
@@ -1649,17 +1607,22 @@ export async function handleChatCore({
|
||||
(streamUsage as any).prompt_tokens_details?.cache_creation_tokens
|
||||
);
|
||||
|
||||
if (cachedTokens > 0 || cacheCreationTokens > 0) {
|
||||
currentMetrics = updateCacheTokenMetrics({
|
||||
metrics: currentMetrics,
|
||||
provider,
|
||||
strategy: comboStrategy,
|
||||
inputTokens,
|
||||
cachedTokens,
|
||||
cacheCreationTokens,
|
||||
costSaved: 0,
|
||||
});
|
||||
}
|
||||
saveRequestUsage({
|
||||
provider: provider || "unknown",
|
||||
model: model || "unknown",
|
||||
tokens: streamUsage,
|
||||
status: String(streamStatus || 200),
|
||||
success: streamStatus === 200,
|
||||
latencyMs: Date.now() - startTime,
|
||||
timeToFirstTokenMs: ttft,
|
||||
errorCode: null,
|
||||
timestamp: new Date().toISOString(),
|
||||
connectionId: connectionId || undefined,
|
||||
apiKeyId: apiKeyInfo?.id || undefined,
|
||||
apiKeyName: apiKeyInfo?.name || undefined,
|
||||
}).catch((err) => {
|
||||
console.error("Failed to save usage stats:", err.message);
|
||||
});
|
||||
}
|
||||
|
||||
persistAttemptLogs({
|
||||
@@ -1673,11 +1636,6 @@ export async function handleChatCore({
|
||||
claudeCacheUsageMeta: cacheUsageLogMeta,
|
||||
});
|
||||
|
||||
// Persist cache metrics to database
|
||||
updateCacheMetrics(currentMetrics).catch((err) => {
|
||||
log?.debug?.("CACHE", `Failed to persist cache metrics: ${err?.message || "unknown"}`);
|
||||
});
|
||||
|
||||
if (apiKeyInfo?.id && streamUsage) {
|
||||
calculateCost(provider, model, streamUsage)
|
||||
.then((estimatedCost) => {
|
||||
|
||||
@@ -80,16 +80,24 @@ export async function handleEmbedding({
|
||||
};
|
||||
}
|
||||
|
||||
// Build upstream request
|
||||
// Build upstream request — start with standard fields, then forward any extras
|
||||
// the client sent (e.g. input_type, user, truncate for NVIDIA NIM asymmetric models).
|
||||
const KNOWN_FIELDS = new Set(["model", "input", "dimensions", "encoding_format"]);
|
||||
|
||||
const upstreamBody: Record<string, unknown> = {
|
||||
model: model,
|
||||
input: body.input,
|
||||
};
|
||||
|
||||
// Pass optional parameters
|
||||
if (body.dimensions !== undefined) upstreamBody.dimensions = body.dimensions;
|
||||
if (body.encoding_format !== undefined) upstreamBody.encoding_format = body.encoding_format;
|
||||
|
||||
for (const [key, value] of Object.entries(body)) {
|
||||
if (!KNOWN_FIELDS.has(key) && value !== undefined) {
|
||||
upstreamBody[key] = value;
|
||||
}
|
||||
}
|
||||
|
||||
// Build headers
|
||||
const headers = {
|
||||
"Content-Type": "application/json",
|
||||
@@ -104,6 +112,12 @@ export async function handleEmbedding({
|
||||
} else if (providerConfig.authHeader === "x-api-key") {
|
||||
headers["x-api-key"] = token;
|
||||
}
|
||||
} else if (providerConfig.authType !== "none") {
|
||||
return {
|
||||
success: false,
|
||||
status: 401,
|
||||
error: `No valid authentication token for provider ${provider}. Check provider credentials.`,
|
||||
};
|
||||
}
|
||||
|
||||
if (log) {
|
||||
|
||||
@@ -52,6 +52,10 @@ export function parseSSEToOpenAIResponse(rawSSE, fallbackModel) {
|
||||
if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) {
|
||||
reasoningParts.push(delta.reasoning_content);
|
||||
}
|
||||
// Normalize `reasoning` alias (NVIDIA kimi-k2.5 etc.)
|
||||
if (typeof delta.reasoning === "string" && delta.reasoning.length > 0 && !delta.reasoning_content) {
|
||||
reasoningParts.push(delta.reasoning);
|
||||
}
|
||||
|
||||
// T18: Accumulate tool calls correctly across streamed chunks
|
||||
if (delta.tool_calls) {
|
||||
@@ -94,12 +98,14 @@ export function parseSSEToOpenAIResponse(rawSSE, fallbackModel) {
|
||||
}
|
||||
}
|
||||
|
||||
const joinedContent = contentParts.length > 0 ? contentParts.join("").trim() : null;
|
||||
const joinedReasoning = reasoningParts.length > 0 ? reasoningParts.join("").trim() : null;
|
||||
const message: Record<string, unknown> = {
|
||||
role: "assistant",
|
||||
content: contentParts.length > 0 ? contentParts.join("") : null,
|
||||
content: joinedContent || null,
|
||||
};
|
||||
if (reasoningParts.length > 0) {
|
||||
message.reasoning_content = reasoningParts.join("");
|
||||
if (joinedReasoning) {
|
||||
message.reasoning_content = joinedReasoning;
|
||||
}
|
||||
|
||||
const finalToolCalls = [...accumulatedToolCalls.values()].filter(Boolean).sort((a, b) => {
|
||||
|
||||
@@ -137,7 +137,7 @@ omniroute --mcp
|
||||
| 9 | `omniroute_simulate_route` | `read:health`, `read:combos` | Dry-run routing simulation showing fallback tree and estimated costs |
|
||||
| 10 | `omniroute_set_budget_guard` | `write:budget` | Set session budget with action on exceed: `degrade`, `block`, or `alert` |
|
||||
| 11 | `omniroute_set_resilience_profile` | `write:resilience` | Apply resilience profile: `aggressive`, `balanced`, or `conservative` |
|
||||
| 12 | `omniroute_test_combo` | `execute:completions`, `read:combos` | Test each provider in a combo with a real prompt, report latency/cost |
|
||||
| 12 | `omniroute_test_combo` | `execute:completions`, `read:combos` | Test each provider in a combo with a real prompt and a real upstream call, report latency/cost |
|
||||
| 13 | `omniroute_get_provider_metrics` | `read:health` | Per-provider metrics with latency percentiles (p50/p95/p99), circuit breaker |
|
||||
| 14 | `omniroute_best_combo_for_task` | `read:combos`, `read:health` | AI-powered combo recommendation by task type with budget/latency constraints |
|
||||
| 15 | `omniroute_explain_route` | `read:health`, `read:usage` | Explain why a request was routed to a provider (scoring factors, fallbacks) |
|
||||
|
||||
@@ -17,6 +17,10 @@ export const ACCOUNT_DEACTIVATED_SIGNALS = [
|
||||
"account has been disabled",
|
||||
"your account has been suspended",
|
||||
"this account is deactivated",
|
||||
// AG (Antigravity/Google Cloud Code) permanent ban signals
|
||||
"verify your account to continue",
|
||||
"this service has been disabled in this account for violation",
|
||||
"this service has been disabled in this account",
|
||||
];
|
||||
|
||||
// T10 (sub2api PR #1169): Signals that indicate billing credits are exhausted.
|
||||
|
||||
+110
-2
@@ -45,6 +45,80 @@ const DEFAULT_MODEL_P95_MS = {
|
||||
};
|
||||
const MIN_HISTORY_SAMPLES = 10;
|
||||
|
||||
/**
|
||||
* Validate that a successful (HTTP 200) non-streaming response actually contains
|
||||
* meaningful content. Returns { valid: true } or { valid: false, reason }.
|
||||
*
|
||||
* Only inspects non-streaming JSON responses — streaming responses are passed through
|
||||
* because buffering the full stream would defeat the purpose of streaming.
|
||||
*
|
||||
* Checks:
|
||||
* 1. Body is valid JSON
|
||||
* 2. Has at least one choice with non-empty content or tool_calls
|
||||
*/
|
||||
async function validateResponseQuality(
|
||||
response: Response,
|
||||
isStreaming: boolean,
|
||||
log: { warn?: (...args: any[]) => void }
|
||||
): Promise<{ valid: boolean; reason?: string; clonedResponse?: Response }> {
|
||||
if (isStreaming) return { valid: true };
|
||||
|
||||
const contentType = response.headers.get("content-type") || "";
|
||||
if (!contentType.includes("application/json") && !contentType.includes("text/")) {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
let cloned: Response;
|
||||
try {
|
||||
cloned = response.clone();
|
||||
} catch {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
let text: string;
|
||||
try {
|
||||
text = await cloned.text();
|
||||
} catch {
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
if (!text || text.trim().length === 0) {
|
||||
return { valid: false, reason: "empty response body" };
|
||||
}
|
||||
|
||||
let json: any;
|
||||
try {
|
||||
json = JSON.parse(text);
|
||||
} catch {
|
||||
if (text.startsWith("data:")) return { valid: true };
|
||||
return { valid: false, reason: "response is not valid JSON" };
|
||||
}
|
||||
|
||||
const choices = json?.choices;
|
||||
if (!Array.isArray(choices) || choices.length === 0) {
|
||||
if (json?.output || json?.result || json?.data || json?.response) return { valid: true };
|
||||
if (json?.error) return { valid: false, reason: `upstream error in 200 body: ${json.error?.message || JSON.stringify(json.error).substring(0, 200)}` };
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
const firstChoice = choices[0];
|
||||
const message = firstChoice?.message || firstChoice?.delta;
|
||||
if (!message) {
|
||||
return { valid: false, reason: "choice has no message object" };
|
||||
}
|
||||
|
||||
const content = message.content;
|
||||
const toolCalls = message.tool_calls;
|
||||
const hasContent = content !== null && content !== undefined && content !== "";
|
||||
const hasToolCalls = Array.isArray(toolCalls) && toolCalls.length > 0;
|
||||
|
||||
if (!hasContent && !hasToolCalls) {
|
||||
return { valid: false, reason: "empty content and no tool_calls in response" };
|
||||
}
|
||||
|
||||
return { valid: true };
|
||||
}
|
||||
|
||||
// In-memory atomic counter per combo for round-robin distribution
|
||||
// Resets on server restart (by design — no stale state)
|
||||
const rrCounters = new Map();
|
||||
@@ -872,14 +946,31 @@ export async function handleComboChat({
|
||||
|
||||
const result = await handleSingleModelWrapped(body, modelStr);
|
||||
|
||||
// Success — return response
|
||||
// Success — validate response quality before returning
|
||||
if (result.ok) {
|
||||
const quality = await validateResponseQuality(result, !!body.stream, log);
|
||||
if (!quality.valid) {
|
||||
log.warn(
|
||||
"COMBO",
|
||||
`Model ${modelStr} returned 200 but failed quality check: ${quality.reason}`
|
||||
);
|
||||
breaker._onFailure();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: false,
|
||||
latencyMs: Date.now() - startTime,
|
||||
fallbackCount,
|
||||
strategy,
|
||||
});
|
||||
if (i > 0) fallbackCount++;
|
||||
break; // move to next model
|
||||
}
|
||||
resolvedByModel = modelStr;
|
||||
const latencyMs = Date.now() - startTime;
|
||||
log.info(
|
||||
"COMBO",
|
||||
`Model ${modelStr} succeeded (${latencyMs}ms, ${fallbackCount} fallbacks)`
|
||||
);
|
||||
breaker._onSuccess();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: true,
|
||||
latencyMs,
|
||||
@@ -1139,13 +1230,30 @@ async function handleRoundRobinCombo({
|
||||
|
||||
const result = await handleSingleModel(body, modelStr);
|
||||
|
||||
// Success
|
||||
// Success — validate response quality before returning
|
||||
if (result.ok) {
|
||||
const quality = await validateResponseQuality(result, !!body.stream, log);
|
||||
if (!quality.valid) {
|
||||
log.warn(
|
||||
"COMBO-RR",
|
||||
`${modelStr} returned 200 but failed quality check: ${quality.reason}`
|
||||
);
|
||||
breaker._onFailure();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: false,
|
||||
latencyMs: Date.now() - startTime,
|
||||
fallbackCount,
|
||||
strategy: "round-robin",
|
||||
});
|
||||
if (offset > 0) fallbackCount++;
|
||||
break; // move to next model
|
||||
}
|
||||
const latencyMs = Date.now() - startTime;
|
||||
log.info(
|
||||
"COMBO-RR",
|
||||
`${modelStr} succeeded (${latencyMs}ms, ${fallbackCount} fallbacks)`
|
||||
);
|
||||
breaker._onSuccess();
|
||||
recordComboRequest(combo.name, modelStr, {
|
||||
success: true,
|
||||
latencyMs,
|
||||
|
||||
@@ -48,3 +48,54 @@ export function supportsToolCalling(modelStr: string): boolean {
|
||||
|
||||
return !blocked;
|
||||
}
|
||||
|
||||
// Models that do NOT support reasoning/thinking parameters.
|
||||
// AG (Antigravity) claude-sonnet-4-6 routes through a Google internal API
|
||||
// that returns 400 if thinking params are included.
|
||||
const REASONING_UNSUPPORTED_PATTERNS = [
|
||||
"antigravity/claude-sonnet-4-6",
|
||||
"antigravity/claude-sonnet-4-5",
|
||||
"antigravity/claude-sonnet-4",
|
||||
"ag/claude-sonnet-4-6",
|
||||
"ag/claude-sonnet-4-5",
|
||||
"ag/claude-sonnet-4",
|
||||
];
|
||||
|
||||
function getRegistryReasoningFlag(providerIdOrAlias: string, modelId: string): boolean | null {
|
||||
const providerAlias = PROVIDER_ID_TO_ALIAS[providerIdOrAlias] || providerIdOrAlias;
|
||||
const models = PROVIDER_MODELS[providerAlias];
|
||||
if (!Array.isArray(models)) return null;
|
||||
const found = models.find((m) => m?.id === modelId);
|
||||
if (!found) return null;
|
||||
return typeof found.supportsReasoning === "boolean" ? found.supportsReasoning : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether a model supports reasoning/thinking parameters.
|
||||
*
|
||||
* Decision order:
|
||||
* 1) Provider registry metadata (supportsReasoning flag) when available.
|
||||
* 2) Explicit denylist for known unsupported models (e.g. AG Claude Sonnet).
|
||||
* 3) Default true (pass through — safe, provider will ignore if unsupported).
|
||||
*/
|
||||
export function supportsReasoning(modelStr: string): boolean {
|
||||
const parsed = parseModel(modelStr);
|
||||
const provider = parsed.provider || parsed.providerAlias || "";
|
||||
const model = parsed.model || modelStr;
|
||||
|
||||
if (provider) {
|
||||
const fromRegistry = getRegistryReasoningFlag(provider, model);
|
||||
if (fromRegistry !== null) return fromRegistry;
|
||||
}
|
||||
|
||||
const normalized = String(modelStr || "").toLowerCase();
|
||||
if (!normalized) return true;
|
||||
|
||||
const blocked = REASONING_UNSUPPORTED_PATTERNS.some((pattern) =>
|
||||
normalized === pattern ||
|
||||
normalized.endsWith(`/${pattern}`) ||
|
||||
normalized.includes(pattern)
|
||||
);
|
||||
|
||||
return !blocked;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,7 @@ export const ThinkingMode = {
|
||||
};
|
||||
|
||||
import { capThinkingBudget, getDefaultThinkingBudget } from "@/shared/constants/modelSpecs";
|
||||
import { supportsReasoning } from "./modelCapabilities.ts";
|
||||
|
||||
// Effort → budget token mapping
|
||||
export const EFFORT_BUDGETS = {
|
||||
@@ -151,6 +152,13 @@ export function applyThinkingBudget(body, config = null) {
|
||||
const cfg = config || _config;
|
||||
if (!body || typeof body !== "object") return body;
|
||||
|
||||
// Early exit: strip ALL reasoning/thinking params for models that don't support them.
|
||||
// Sending thinking params to unsupported models (e.g. AG claude-sonnet-4-6) causes 400 errors.
|
||||
const modelStr = typeof body.model === "string" ? body.model : "";
|
||||
if (modelStr && !supportsReasoning(modelStr)) {
|
||||
return stripThinkingConfig(body);
|
||||
}
|
||||
|
||||
// Pre-processing: convert string thinkingLevel to numeric budget
|
||||
let processed = normalizeThinkingLevel(body);
|
||||
|
||||
|
||||
@@ -167,13 +167,19 @@ function convertConstToEnum(obj) {
|
||||
}
|
||||
|
||||
// Convert enum values to strings (Gemini requires string enum values)
|
||||
// For integer types, remove enum entirely as Gemini doesn't support it
|
||||
function convertEnumValuesToStrings(obj) {
|
||||
if (!obj || typeof obj !== "object") return;
|
||||
|
||||
if (obj.enum && Array.isArray(obj.enum)) {
|
||||
obj.enum = obj.enum.map((v) => String(v));
|
||||
if (!obj.type) {
|
||||
obj.type = "string";
|
||||
// Gemini only supports enum for string types, not integer
|
||||
if (obj.type === "integer" || obj.type === "number") {
|
||||
delete obj.enum;
|
||||
} else {
|
||||
obj.enum = obj.enum.map((v) => String(v));
|
||||
if (!obj.type) {
|
||||
obj.type = "string";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -159,8 +159,9 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
|
||||
// Track content length for usage estimation (both modes)
|
||||
let totalContentLength = 0;
|
||||
// Passthrough: accumulate content for call log response body
|
||||
// Passthrough: accumulate content and reasoning separately for call log response body
|
||||
let passthroughAccumulatedContent = "";
|
||||
let passthroughAccumulatedReasoning = "";
|
||||
|
||||
// Guard against duplicate [DONE] events — ensures exactly one per stream
|
||||
let doneSent = false;
|
||||
@@ -304,6 +305,14 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
}
|
||||
} else {
|
||||
// Chat Completions: full sanitization pipeline
|
||||
|
||||
// Detect reasoning alias before sanitization strips it
|
||||
const hadReasoningAlias = !!(
|
||||
parsed.choices?.[0]?.delta?.reasoning &&
|
||||
typeof parsed.choices[0].delta.reasoning === "string" &&
|
||||
!parsed.choices[0].delta.reasoning_content
|
||||
);
|
||||
|
||||
parsed = sanitizeStreamingChunk(parsed);
|
||||
|
||||
const idFixed = fixInvalidId(parsed);
|
||||
@@ -323,6 +332,31 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
}
|
||||
}
|
||||
|
||||
// Split combined reasoning+content deltas into separate SSE events.
|
||||
// Standard OpenAI streaming never mixes both fields in one delta;
|
||||
// clients (e.g. LobeChat) may skip content when reasoning_content
|
||||
// is present, causing the first content token to be lost.
|
||||
if (delta?.reasoning_content && delta?.content) {
|
||||
const reasoningChunk = JSON.parse(JSON.stringify(parsed));
|
||||
const rDelta = reasoningChunk.choices[0].delta;
|
||||
delete rDelta.content;
|
||||
reasoningChunk.choices[0].finish_reason = null;
|
||||
delete reasoningChunk.usage;
|
||||
const rOutput = `data: ${JSON.stringify(reasoningChunk)}\n`;
|
||||
passthroughAccumulatedReasoning += delta.reasoning_content;
|
||||
totalContentLength += delta.reasoning_content.length;
|
||||
clientPayloadCollector.push(reasoningChunk);
|
||||
reqLogger?.appendConvertedChunk?.(rOutput);
|
||||
controller.enqueue(encoder.encode(rOutput));
|
||||
controller.enqueue(encoder.encode("\n"));
|
||||
delete delta.reasoning_content;
|
||||
}
|
||||
|
||||
// Track whether we need to re-serialize (separate from injectedUsage
|
||||
// to avoid blocking subsequent finish_reason / usage mutations)
|
||||
const needsReserialization =
|
||||
hadReasoningAlias || (delta?.content === "" && delta?.reasoning_content);
|
||||
|
||||
// T18: Track if we saw tool calls & accumulate for call log
|
||||
if (delta?.tool_calls && delta.tool_calls.length > 0) {
|
||||
passthroughHasToolCalls = true;
|
||||
@@ -365,7 +399,7 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
if (typeof delta?.content === "string")
|
||||
passthroughAccumulatedContent += delta.content;
|
||||
if (typeof delta?.reasoning_content === "string")
|
||||
passthroughAccumulatedContent += delta.reasoning_content;
|
||||
passthroughAccumulatedReasoning += delta.reasoning_content;
|
||||
|
||||
const extracted = extractUsage(parsed);
|
||||
if (extracted) {
|
||||
@@ -398,7 +432,7 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
parsed.usage = filterUsageForFormat(buffered, FORMATS.OPENAI);
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
} else if (idFixed) {
|
||||
} else if (idFixed || needsReserialization) {
|
||||
output = `data: ${JSON.stringify(parsed)}\n`;
|
||||
injectedUsage = true;
|
||||
}
|
||||
@@ -483,6 +517,19 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
if (state?.accumulatedContent !== undefined) state.accumulatedContent += r;
|
||||
}
|
||||
}
|
||||
// Normalize `reasoning` alias → `reasoning_content` (NVIDIA kimi-k2.5 etc.)
|
||||
if (
|
||||
parsed.choices?.[0]?.delta?.reasoning &&
|
||||
!parsed.choices?.[0]?.delta?.reasoning_content
|
||||
) {
|
||||
const r = parsed.choices[0].delta.reasoning;
|
||||
if (typeof r === "string") {
|
||||
parsed.choices[0].delta.reasoning_content = r;
|
||||
delete parsed.choices[0].delta.reasoning;
|
||||
totalContentLength += r.length;
|
||||
if (state?.accumulatedContent !== undefined) state.accumulatedContent += r;
|
||||
}
|
||||
}
|
||||
|
||||
// Gemini format - may have multiple parts
|
||||
if (parsed.candidates?.[0]?.content?.parts) {
|
||||
@@ -635,6 +682,10 @@ export function createSSEStream(options: StreamOptions = {}) {
|
||||
role: "assistant",
|
||||
content: content || null,
|
||||
};
|
||||
const reasoning = passthroughAccumulatedReasoning.trim();
|
||||
if (reasoning) {
|
||||
message.reasoning_content = reasoning;
|
||||
}
|
||||
if (passthroughToolCalls.size > 0) {
|
||||
message.tool_calls = [...passthroughToolCalls.values()].sort(
|
||||
(a, b) => a.index - b.index
|
||||
|
||||
@@ -157,6 +157,10 @@ function buildOpenAISummary(events: StructuredSSEEvent[], fallbackModel?: string
|
||||
if (typeof delta.reasoning_content === "string" && delta.reasoning_content.length > 0) {
|
||||
reasoningParts.push(delta.reasoning_content);
|
||||
}
|
||||
// Normalize `reasoning` alias (NVIDIA kimi-k2.5 etc.)
|
||||
if (typeof delta.reasoning === "string" && delta.reasoning.length > 0 && !delta.reasoning_content) {
|
||||
reasoningParts.push(delta.reasoning);
|
||||
}
|
||||
|
||||
if (Array.isArray(delta.tool_calls)) {
|
||||
for (const item of delta.tool_calls) {
|
||||
@@ -203,12 +207,14 @@ function buildOpenAISummary(events: StructuredSSEEvent[], fallbackModel?: string
|
||||
}
|
||||
}
|
||||
|
||||
const joinedContent = contentParts.length > 0 ? contentParts.join("").trim() : null;
|
||||
const joinedReasoning = reasoningParts.length > 0 ? reasoningParts.join("").trim() : null;
|
||||
const message: JsonRecord = {
|
||||
role: "assistant",
|
||||
content: contentParts.length > 0 ? contentParts.join("") : null,
|
||||
content: joinedContent || null,
|
||||
};
|
||||
if (reasoningParts.length > 0) {
|
||||
message.reasoning_content = reasoningParts.join("");
|
||||
if (joinedReasoning) {
|
||||
message.reasoning_content = joinedReasoning;
|
||||
}
|
||||
|
||||
const finalToolCalls = [...toolCalls.values()].sort((a, b) => a.index - b.index);
|
||||
|
||||
Generated
+2
-2
@@ -1,12 +1,12 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "3.2.6",
|
||||
"version": "3.3.0",
|
||||
"lockfileVersion": 3,
|
||||
"requires": true,
|
||||
"packages": {
|
||||
"": {
|
||||
"name": "omniroute",
|
||||
"version": "3.2.6",
|
||||
"version": "3.3.0",
|
||||
"hasInstallScript": true,
|
||||
"license": "MIT",
|
||||
"workspaces": [
|
||||
|
||||
+1
-1
@@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "omniroute",
|
||||
"version": "3.2.7",
|
||||
"version": "3.3.0",
|
||||
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
|
||||
"type": "module",
|
||||
"bin": {
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
## [3.2.8] - 2026-03-29
|
||||
|
||||
### ✨ Enhancements & Refactoring
|
||||
|
||||
- **Docker Auto-Update UI** — Integrated a detached background update process for Docker Compose deployments. The Dashboard UI now seamlessly tracks update lifecycle events combining JSON REST responses with SSE streaming progress overlays for robust cross-environment reliability.
|
||||
- **Cache Analytics** — Repaired zero-metrics visualization mapping by migrating Semantic Cache telemetry logs directly into the centralized tracking SQLite module.
|
||||
|
||||
### 🐛 Bug Fixes
|
||||
|
||||
- **Authentication Logic** — Fixed a bug where saving dashboard settings or adding models failed with a 401 Unauthorized error when `requireLogin` was disabled. API endpoints now correctly evaluate the global authentication toggle. Resolved global redirection by reactivating `src/middleware.ts`.
|
||||
- **CLI Tool Detection (Windows)** — Prevented fatal initialization exceptions during CLI environment detection by catching `cross-spawn` ENOENT errors correctly. Adds explicit detection paths for `\AppData\Local\droid\droid.exe`.
|
||||
- **Codex Native Passthrough** — Normalized model translation parameters preventing context poisoning in proxy pass-through mode, enforcing generic `store: false` constraints explicitly for all Codex-originated requests.
|
||||
- **SSE Token Reporting** — Normalized provider tool-call chunk `finish_reason` detection, fixing 0% Usage analytics for stream-only responses missing strict `<DONE>` indicators.
|
||||
- **DeepSeek <think> Tags** — Implemented an explicit `<think>` extraction mapping inside `responsesHandler.ts`, ensuring DeepSeek reasoning streams map equivalently to native Anthropic `<thinking>` structures.
|
||||
|
||||
---
|
||||
|
||||
@@ -186,6 +186,9 @@ const COMBO_TEMPLATE_FALLBACK = {
|
||||
freeStackTitle: "Free Stack ($0)",
|
||||
freeStackDesc:
|
||||
"Round-robin across all free providers: Kiro, iFlow, Qwen, Gemini CLI. Zero cost, never stops.",
|
||||
paidPremiumTitle: "Paid Premium",
|
||||
paidPremiumDesc:
|
||||
"Round-robin across paid subscriptions: Cursor, Antigravity. Top-tier models, distributed load.",
|
||||
};
|
||||
|
||||
const COMBO_TEMPLATES = [
|
||||
@@ -250,6 +253,21 @@ const COMBO_TEMPLATES = [
|
||||
healthCheckEnabled: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "paid-premium",
|
||||
icon: "workspace_premium",
|
||||
titleKey: "templatePaidPremium",
|
||||
descKey: "templatePaidPremiumDesc",
|
||||
fallbackTitle: COMBO_TEMPLATE_FALLBACK.paidPremiumTitle,
|
||||
fallbackDesc: COMBO_TEMPLATE_FALLBACK.paidPremiumDesc,
|
||||
strategy: "round-robin",
|
||||
suggestedName: "paid-premium",
|
||||
config: {
|
||||
maxRetries: 2,
|
||||
retryDelayMs: 1000,
|
||||
healthCheckEnabled: true,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
function getStrategyMeta(strategy) {
|
||||
@@ -1425,18 +1443,27 @@ function ComboFormModal({ isOpen, combo, onClose, onSave, activeProviders }) {
|
||||
{ model: "kr/claude-sonnet-4.5", weight: 0 },
|
||||
{ model: "if/kimi-k2-thinking", weight: 0 },
|
||||
{ model: "if/qwen3-coder-plus", weight: 0 },
|
||||
{ model: "qw/qwen3-coder-plus", weight: 0 },
|
||||
{ model: "if/deepseek-v3.2", weight: 0 },
|
||||
{ model: "nvidia/llama-3.3-70b-instruct", weight: 0 },
|
||||
{ model: "groq/llama-3.3-70b-versatile", weight: 0 },
|
||||
];
|
||||
|
||||
const PAID_PREMIUM_PRESET_MODELS = [
|
||||
{ model: "cu/claude-4.6-opus-high", weight: 0 },
|
||||
{ model: "ag/claude-sonnet-4-6", weight: 0 },
|
||||
{ model: "cu/claude-4.6-sonnet-high", weight: 0 },
|
||||
{ model: "ag/gpt-5", weight: 0 },
|
||||
{ model: "ag/gemini-3.1-pro-preview", weight: 0 },
|
||||
];
|
||||
|
||||
const applyTemplate = (template) => {
|
||||
setStrategy(template.strategy);
|
||||
setConfig((prev) => ({ ...prev, ...template.config }));
|
||||
if (!name.trim()) setName(template.suggestedName);
|
||||
// Pre-fill Free Stack with 7 real free provider models
|
||||
if (template.id === "free-stack") {
|
||||
setModels(FREE_STACK_PRESET_MODELS);
|
||||
} else if (template.id === "paid-premium") {
|
||||
setModels(PAID_PREMIUM_PRESET_MODELS);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
import { NextResponse } from "next/server";
|
||||
import { getSettings, updateSettings } from "@/lib/localDb";
|
||||
import { updateAutoDisableAccountsSchema } from "@/shared/validation/schemas";
|
||||
import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
|
||||
|
||||
export async function GET() {
|
||||
try {
|
||||
const settings = await getSettings();
|
||||
return NextResponse.json({
|
||||
enabled: settings.autoDisableBannedAccounts ?? false,
|
||||
threshold: settings.autoDisableBannedThreshold ?? 3,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error reading auto-disable accounts config:", error);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to read auto-disable accounts config" },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
export async function PUT(request: Request) {
|
||||
let rawBody: unknown;
|
||||
try {
|
||||
rawBody = await request.json();
|
||||
} catch {
|
||||
return NextResponse.json(
|
||||
{ error: { message: "Invalid request", details: [{ field: "body", message: "Invalid JSON body" }] } },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
const validation = validateBody(updateAutoDisableAccountsSchema, rawBody);
|
||||
if (isValidationFailure(validation)) {
|
||||
return NextResponse.json({ error: validation.error }, { status: 400 });
|
||||
}
|
||||
const body = validation.data;
|
||||
|
||||
await updateSettings({
|
||||
autoDisableBannedAccounts: body.enabled,
|
||||
...(body.threshold !== undefined && { autoDisableBannedThreshold: body.threshold }),
|
||||
});
|
||||
|
||||
const settings = await getSettings();
|
||||
return NextResponse.json({
|
||||
enabled: settings.autoDisableBannedAccounts ?? false,
|
||||
threshold: settings.autoDisableBannedThreshold ?? 3,
|
||||
});
|
||||
} catch (error) {
|
||||
console.error("Error updating auto-disable accounts config:", error);
|
||||
return NextResponse.json(
|
||||
{ error: "Failed to update auto-disable accounts config" },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -14,7 +14,7 @@ import {
|
||||
type EmbeddingProviderNodeRow,
|
||||
type EmbeddingProvider,
|
||||
} from "@omniroute/open-sse/config/embeddingRegistry.ts";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import * as log from "@/sse/utils/logger";
|
||||
import { toJsonErrorPayload } from "@/shared/utils/upstreamError";
|
||||
@@ -209,6 +209,14 @@ export async function POST(request) {
|
||||
`No credentials for embedding provider: ${provider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${provider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const result = await handleEmbedding({
|
||||
|
||||
@@ -11,7 +11,7 @@ import {
|
||||
getAllImageModels,
|
||||
getImageProvider,
|
||||
} from "@omniroute/open-sse/config/imageRegistry.ts";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import * as log from "@/sse/utils/logger";
|
||||
import { toJsonErrorPayload } from "@/shared/utils/upstreamError";
|
||||
@@ -156,8 +156,15 @@ export async function POST(request) {
|
||||
`No credentials for image provider: ${provider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${provider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
} else if (isCustomModel) {
|
||||
// Custom models need credentials from the provider connection
|
||||
credentials = await getProviderCredentials(provider);
|
||||
if (!credentials) {
|
||||
return errorResponse(
|
||||
@@ -165,6 +172,14 @@ export async function POST(request) {
|
||||
`No credentials for custom image provider: ${provider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${provider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const result = await handleImageGeneration({
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import { CORS_ORIGIN } from "@/shared/utils/cors";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import { getRegistryEntry } from "@omniroute/open-sse/config/providerRegistry.ts";
|
||||
import {
|
||||
@@ -85,6 +85,14 @@ export async function POST(request, { params }) {
|
||||
if (!credentials) {
|
||||
return errorResponse(HTTP_STATUS.BAD_REQUEST, `No credentials for provider: ${rawProvider}`);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${rawProvider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
|
||||
const result = await handleEmbedding({ body, credentials, log });
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import { CORS_ORIGIN } from "@/shared/utils/cors";
|
||||
import { handleImageGeneration } from "@omniroute/open-sse/handlers/imageGeneration.ts";
|
||||
import { errorResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { errorResponse, unavailableResponse } from "@omniroute/open-sse/utils/error.ts";
|
||||
import { HTTP_STATUS } from "@omniroute/open-sse/config/constants.ts";
|
||||
import {
|
||||
getProviderCredentials,
|
||||
@@ -85,6 +85,14 @@ export async function POST(request, { params }) {
|
||||
`No credentials for image provider: ${rawProvider}`
|
||||
);
|
||||
}
|
||||
if (credentials.allRateLimited) {
|
||||
return unavailableResponse(
|
||||
HTTP_STATUS.RATE_LIMITED,
|
||||
`[${rawProvider}] All accounts rate limited`,
|
||||
credentials.retryAfter,
|
||||
credentials.retryAfterHuman
|
||||
);
|
||||
}
|
||||
|
||||
const result = await handleImageGeneration({ body, credentials, log });
|
||||
|
||||
|
||||
+1
-1
@@ -9,7 +9,7 @@ import { isModelSyncInternalRequest } from "./shared/services/modelSyncScheduler
|
||||
|
||||
const SECRET = new TextEncoder().encode(process.env.JWT_SECRET || "");
|
||||
|
||||
export async function proxy(request) {
|
||||
export async function proxy(request: any) {
|
||||
const { pathname } = request.nextUrl;
|
||||
|
||||
// Pipeline: Add request ID header for end-to-end tracing
|
||||
|
||||
@@ -89,6 +89,10 @@ export async function verifyAuth(request: any): Promise<string | null> {
|
||||
* need to conditionally skip auth should check that separately.
|
||||
*/
|
||||
export async function isAuthenticated(request: Request): Promise<boolean> {
|
||||
// If settings say login/auth is disabled, treat all requests as authenticated
|
||||
if (!(await isAuthRequired())) {
|
||||
return true;
|
||||
}
|
||||
// 1. Check API key (for external clients)
|
||||
const authHeader = request.headers.get("authorization");
|
||||
if (authHeader?.startsWith("Bearer ")) {
|
||||
|
||||
@@ -1313,3 +1313,11 @@ export const v1SearchResponseSchema = z.object({
|
||||
)
|
||||
.optional(),
|
||||
});
|
||||
|
||||
// ─── Auto-disable banned/error accounts ───────────────────────────────────
|
||||
export const updateAutoDisableAccountsSchema = z
|
||||
.object({
|
||||
enabled: z.boolean(),
|
||||
threshold: z.number().int().min(1).max(10).optional(),
|
||||
})
|
||||
.strict();
|
||||
|
||||
+65
-16
@@ -144,8 +144,8 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
}
|
||||
|
||||
// Optional strict API key mode for /v1 endpoints (require key on every request).
|
||||
const isInternalTest = request.headers?.get?.("x-internal-test") === "combo-health-check";
|
||||
if (process.env.REQUIRE_API_KEY === "true" && !isInternalTest) {
|
||||
const isComboLiveTest = request.headers?.get?.("x-internal-test") === "combo-health-check";
|
||||
if (process.env.REQUIRE_API_KEY === "true" && !isComboLiveTest) {
|
||||
if (!apiKey) {
|
||||
log.warn("AUTH", "Missing API key while REQUIRE_API_KEY=true");
|
||||
return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Missing API key");
|
||||
@@ -155,7 +155,7 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
log.warn("AUTH", "Invalid API key while REQUIRE_API_KEY=true");
|
||||
return errorResponse(HTTP_STATUS.UNAUTHORIZED, "Invalid API key");
|
||||
}
|
||||
} else if (apiKey && !isInternalTest) {
|
||||
} else if (apiKey && !isComboLiveTest) {
|
||||
// Client sent a Bearer key — it must exist in DB (otherwise reject to avoid "key ignored" confusion).
|
||||
const valid = await isValidApiKey(apiKey);
|
||||
if (!valid) {
|
||||
@@ -238,9 +238,11 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
`Combo "${modelStr}" [${combo.strategy || "priority"}] with ${combo.models.length} models`
|
||||
);
|
||||
|
||||
// Pre-check function: skip models where all accounts are in cooldown
|
||||
// Uses modelAvailability module for TTL-based cooldowns
|
||||
// Pre-check function used by combo routing. For explicit combo live tests,
|
||||
// avoid pre-skipping so each model gets a real execution attempt.
|
||||
const checkModelAvailable = async (modelString: string) => {
|
||||
if (isComboLiveTest) return true;
|
||||
|
||||
// Use getModelInfo to properly resolve custom prefixes
|
||||
const modelInfo = await getModelInfo(modelString);
|
||||
const provider = modelInfo.provider;
|
||||
@@ -273,9 +275,21 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
body,
|
||||
combo,
|
||||
handleSingleModel: (b: any, m: string) =>
|
||||
handleSingleModelChat(b, m, clientRawRequest, request, combo.name, apiKeyInfo, telemetry, {
|
||||
sessionId,
|
||||
}, combo.strategy, true),
|
||||
handleSingleModelChat(
|
||||
b,
|
||||
m,
|
||||
clientRawRequest,
|
||||
request,
|
||||
combo.name,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{
|
||||
sessionId,
|
||||
forceLiveComboTest: isComboLiveTest,
|
||||
},
|
||||
combo.strategy,
|
||||
true
|
||||
),
|
||||
isModelAvailable: checkModelAvailable,
|
||||
log,
|
||||
settings,
|
||||
@@ -304,7 +318,7 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
combo.name,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{ sessionId, emergencyFallbackTried: true },
|
||||
{ sessionId, emergencyFallbackTried: true, forceLiveComboTest: isComboLiveTest },
|
||||
combo.strategy,
|
||||
true
|
||||
);
|
||||
@@ -338,7 +352,7 @@ export async function handleChat(request: any, clientRawRequest: any = null) {
|
||||
null,
|
||||
apiKeyInfo,
|
||||
telemetry,
|
||||
{ sessionId },
|
||||
{ sessionId, forceLiveComboTest: isComboLiveTest },
|
||||
null,
|
||||
false
|
||||
);
|
||||
@@ -370,7 +384,11 @@ async function handleSingleModelChat(
|
||||
comboName: string | null = null,
|
||||
apiKeyInfo: any = null,
|
||||
telemetry: any = null,
|
||||
runtimeOptions: { emergencyFallbackTried?: boolean; sessionId?: string | null } = {},
|
||||
runtimeOptions: {
|
||||
emergencyFallbackTried?: boolean;
|
||||
forceLiveComboTest?: boolean;
|
||||
sessionId?: string | null;
|
||||
} = {},
|
||||
comboStrategy: string | null = null,
|
||||
isCombo: boolean = false
|
||||
) {
|
||||
@@ -379,9 +397,13 @@ async function handleSingleModelChat(
|
||||
if (resolved.error) return resolved.error;
|
||||
|
||||
const { provider, model, sourceFormat, targetFormat, extendedContext } = resolved;
|
||||
const forceLiveComboTest = runtimeOptions.forceLiveComboTest === true;
|
||||
|
||||
// 2. Pipeline gates (availability + circuit breaker)
|
||||
const gate = checkPipelineGates(provider, model);
|
||||
const gate = checkPipelineGates(provider, model, {
|
||||
ignoreCircuitBreaker: forceLiveComboTest,
|
||||
ignoreModelCooldown: forceLiveComboTest,
|
||||
});
|
||||
if (gate) return gate;
|
||||
|
||||
const breaker = getCircuitBreaker(provider, {
|
||||
@@ -403,7 +425,13 @@ async function handleSingleModelChat(
|
||||
provider,
|
||||
excludeConnectionId,
|
||||
apiKeyInfo?.allowedConnections ?? null,
|
||||
model
|
||||
model,
|
||||
forceLiveComboTest
|
||||
? {
|
||||
allowSuppressedConnections: true,
|
||||
bypassQuotaPolicy: true,
|
||||
}
|
||||
: undefined
|
||||
);
|
||||
|
||||
if (!credentials || credentials.allRateLimited) {
|
||||
@@ -437,6 +465,7 @@ async function handleSingleModelChat(
|
||||
// 4. Execute chat via core (with circuit breaker + optional TLS)
|
||||
if (telemetry) telemetry.startPhase("connect");
|
||||
const { result, tlsFingerprintUsed } = await executeChatWithBreaker({
|
||||
bypassCircuitBreaker: forceLiveComboTest,
|
||||
breaker,
|
||||
body,
|
||||
provider,
|
||||
@@ -612,8 +641,15 @@ async function resolveModelOrError(modelStr: string, body: any, endpointPath: st
|
||||
* Check pipeline gates: model availability + circuit breaker state.
|
||||
* Returns an error Response if blocked, or null if OK to proceed.
|
||||
*/
|
||||
function checkPipelineGates(provider: string, model: string) {
|
||||
if (!isModelAvailable(provider, model)) {
|
||||
function checkPipelineGates(
|
||||
provider: string,
|
||||
model: string,
|
||||
options: { ignoreCircuitBreaker?: boolean; ignoreModelCooldown?: boolean } = {}
|
||||
) {
|
||||
const modelAvailable = isModelAvailable(provider, model);
|
||||
if (!modelAvailable && options.ignoreModelCooldown) {
|
||||
log.info("AVAILABILITY", `${provider}/${model} cooldown bypassed for combo live test`);
|
||||
} else if (!modelAvailable) {
|
||||
log.warn("AVAILABILITY", `${provider}/${model} is in cooldown, rejecting request`);
|
||||
return (unavailableResponse as any)(
|
||||
HTTP_STATUS.SERVICE_UNAVAILABLE,
|
||||
@@ -628,7 +664,9 @@ function checkPipelineGates(provider: string, model: string) {
|
||||
onStateChange: (name: string, from: string, to: string) =>
|
||||
log.info("CIRCUIT", `${name}: ${from} → ${to}`),
|
||||
});
|
||||
if (!breaker.canExecute()) {
|
||||
if (options.ignoreCircuitBreaker && !breaker.canExecute()) {
|
||||
log.info("CIRCUIT", `Bypassing OPEN circuit breaker for combo live test: ${provider}`);
|
||||
} else if (!breaker.canExecute()) {
|
||||
log.warn("CIRCUIT", `Circuit breaker OPEN for ${provider}, rejecting request`);
|
||||
return (unavailableResponse as any)(
|
||||
HTTP_STATUS.SERVICE_UNAVAILABLE,
|
||||
@@ -646,6 +684,7 @@ function checkPipelineGates(provider: string, model: string) {
|
||||
* Execute chat core wrapped in circuit breaker + optional TLS tracking.
|
||||
*/
|
||||
async function executeChatWithBreaker({
|
||||
bypassCircuitBreaker,
|
||||
breaker,
|
||||
body,
|
||||
provider,
|
||||
@@ -693,6 +732,16 @@ async function executeChatWithBreaker({
|
||||
})
|
||||
);
|
||||
|
||||
if (bypassCircuitBreaker) {
|
||||
if (!proxyInfo?.proxy && isTlsFingerprintActive()) {
|
||||
const tracked = await runWithTlsTracking(chatFn);
|
||||
return { result: tracked.result, tlsFingerprintUsed: tracked.tlsFingerprintUsed };
|
||||
}
|
||||
|
||||
const result = await chatFn();
|
||||
return { result, tlsFingerprintUsed: false };
|
||||
}
|
||||
|
||||
if (!proxyInfo?.proxy && isTlsFingerprintActive()) {
|
||||
const tracked = await breaker.execute(async () => runWithTlsTracking(chatFn));
|
||||
return { result: tracked.result, tlsFingerprintUsed: tracked.tlsFingerprintUsed };
|
||||
|
||||
+66
-17
@@ -3,6 +3,7 @@ import {
|
||||
validateApiKey,
|
||||
updateProviderConnection,
|
||||
getSettings,
|
||||
getCachedSettings,
|
||||
} from "@/lib/localDb";
|
||||
import { getQuotaWindowStatus, isAccountQuotaExhausted } from "@/domain/quotaCache";
|
||||
import {
|
||||
@@ -54,6 +55,11 @@ interface RecoverableConnectionState {
|
||||
lastErrorSource?: string | null;
|
||||
}
|
||||
|
||||
interface CredentialSelectionOptions {
|
||||
allowSuppressedConnections?: boolean;
|
||||
bypassQuotaPolicy?: boolean;
|
||||
}
|
||||
|
||||
const CODEX_QUOTA_THRESHOLD_PERCENT = 90;
|
||||
const MIN_QUOTA_THRESHOLD_PERCENT = 1;
|
||||
const MAX_QUOTA_THRESHOLD_PERCENT = 100;
|
||||
@@ -311,7 +317,8 @@ export async function getProviderCredentials(
|
||||
provider: string,
|
||||
excludeConnectionId: string | null = null,
|
||||
allowedConnections: string[] | null = null,
|
||||
requestedModel: string | null = null
|
||||
requestedModel: string | null = null,
|
||||
options: CredentialSelectionOptions = {}
|
||||
) {
|
||||
// Acquire mutex to prevent race conditions
|
||||
const currentMutex = selectionMutex;
|
||||
@@ -323,6 +330,9 @@ export async function getProviderCredentials(
|
||||
try {
|
||||
await currentMutex;
|
||||
|
||||
const allowSuppressedConnections = options.allowSuppressedConnections === true;
|
||||
const bypassQuotaPolicy = options.bypassQuotaPolicy === true;
|
||||
|
||||
const connectionsRaw = await getProviderConnections({ provider, isActive: true });
|
||||
let connections = (Array.isArray(connectionsRaw) ? connectionsRaw : [])
|
||||
.map(toProviderConnection)
|
||||
@@ -394,9 +404,11 @@ export async function getProviderCredentials(
|
||||
// Filter out unavailable accounts and excluded connection
|
||||
const availableConnections = connections.filter((c) => {
|
||||
if (excludeConnectionId && c.id === excludeConnectionId) return false;
|
||||
if (isAccountUnavailable(c.rateLimitedUntil)) return false;
|
||||
if (isTerminalConnectionStatus(c)) return false;
|
||||
if (provider === "codex" && isCodexScopeUnavailable(c, requestedModel)) return false;
|
||||
if (!allowSuppressedConnections) {
|
||||
if (isAccountUnavailable(c.rateLimitedUntil)) return false;
|
||||
if (isTerminalConnectionStatus(c)) return false;
|
||||
if (provider === "codex" && isCodexScopeUnavailable(c, requestedModel)) return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
@@ -412,13 +424,23 @@ export async function getProviderCredentials(
|
||||
if (excluded || rateLimited) {
|
||||
log.debug(
|
||||
"AUTH",
|
||||
` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}`
|
||||
` → ${c.id?.slice(0, 8)} | ${excluded ? "excluded" : ""} ${rateLimited ? `rateLimited until ${c.rateLimitedUntil}` : ""}${allowSuppressedConnections && rateLimited ? " (retained for combo live test)" : ""}`
|
||||
);
|
||||
} else if (terminalStatus) {
|
||||
log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | skipped terminal status=${c.testStatus}`);
|
||||
log.debug(
|
||||
"AUTH",
|
||||
allowSuppressedConnections
|
||||
? ` → ${c.id?.slice(0, 8)} | retained terminal status=${c.testStatus} for combo live test`
|
||||
: ` → ${c.id?.slice(0, 8)} | skipped terminal status=${c.testStatus}`
|
||||
);
|
||||
} else if (codexScopeLimited) {
|
||||
const scopeUntil = getCodexScopeRateLimitedUntil(c.providerSpecificData, requestedModel);
|
||||
log.debug("AUTH", ` → ${c.id?.slice(0, 8)} | codex scope-limited until ${scopeUntil}`);
|
||||
log.debug(
|
||||
"AUTH",
|
||||
allowSuppressedConnections
|
||||
? ` → ${c.id?.slice(0, 8)} | retained codex scope-limited account until ${scopeUntil} for combo live test`
|
||||
: ` → ${c.id?.slice(0, 8)} | codex scope-limited until ${scopeUntil}`
|
||||
);
|
||||
}
|
||||
});
|
||||
|
||||
@@ -461,17 +483,21 @@ export async function getProviderCredentials(
|
||||
resetAt: string | null;
|
||||
}> = [];
|
||||
|
||||
policyEligibleConnections = availableConnections.filter((connection) => {
|
||||
const evaluation = evaluateQuotaLimitPolicy(provider, connection);
|
||||
if (!evaluation.blocked) return true;
|
||||
if (!bypassQuotaPolicy) {
|
||||
policyEligibleConnections = availableConnections.filter((connection) => {
|
||||
const evaluation = evaluateQuotaLimitPolicy(provider, connection);
|
||||
if (!evaluation.blocked) return true;
|
||||
|
||||
blockedByPolicy.push({
|
||||
id: connection.id,
|
||||
reasons: evaluation.reasons,
|
||||
resetAt: evaluation.resetAt,
|
||||
blockedByPolicy.push({
|
||||
id: connection.id,
|
||||
reasons: evaluation.reasons,
|
||||
resetAt: evaluation.resetAt,
|
||||
});
|
||||
return false;
|
||||
});
|
||||
return false;
|
||||
});
|
||||
} else if (availableConnections.length > 0) {
|
||||
log.debug("AUTH", `${provider} | bypassing quota policy for combo live test`);
|
||||
}
|
||||
|
||||
if (blockedByPolicy.length > 0) {
|
||||
log.info(
|
||||
@@ -748,13 +774,14 @@ export async function markAccountUnavailable(
|
||||
}
|
||||
}
|
||||
|
||||
const { shouldFallback, cooldownMs, newBackoffLevel, reason } = checkFallbackError(
|
||||
const result = checkFallbackError(
|
||||
status,
|
||||
errorText,
|
||||
backoffLevel,
|
||||
model,
|
||||
provider // ← Now passes provider for profile-aware cooldowns
|
||||
);
|
||||
const { shouldFallback, cooldownMs, newBackoffLevel, reason } = result;
|
||||
if (!shouldFallback) return { shouldFallback: false, cooldownMs: 0 };
|
||||
|
||||
// ── Local provider 404: model-only lockout, connection stays active ──
|
||||
@@ -820,6 +847,28 @@ export async function markAccountUnavailable(
|
||||
backoffLevel: newBackoffLevel ?? backoffLevel,
|
||||
});
|
||||
|
||||
// T-AUTODISABLE: If auto-disable setting is enabled and error is permanent/terminal,
|
||||
// mark account as inactive so it is never retried again.
|
||||
// Uses getCachedSettings() to avoid DB overhead on hot error path.
|
||||
// NOTE: For permanent bans we disable immediately — no threshold needed,
|
||||
// because a permanent ban (403 "Verify your account" / ToS violation) will
|
||||
// NEVER recover, so retrying is pointless regardless of attempt count.
|
||||
if (result.permanent) {
|
||||
try {
|
||||
const settings = await getCachedSettings();
|
||||
const autoDisableEnabled = settings.autoDisableBannedAccounts ?? false;
|
||||
if (autoDisableEnabled) {
|
||||
await updateProviderConnection(connectionId, { isActive: false });
|
||||
log.info(
|
||||
"AUTH",
|
||||
`Auto-disabled ${connectionId.slice(0, 8)} — permanent ban detected (autoDisableBannedAccounts=true)`
|
||||
);
|
||||
}
|
||||
} catch (e) {
|
||||
log.info("AUTH", `Auto-disable check failed (non-fatal): ${e}`);
|
||||
}
|
||||
}
|
||||
|
||||
// Per-model lockout: lock the specific model if known
|
||||
if (provider && model && cooldownMs > 0) {
|
||||
lockModel(provider, connectionId, model, reason || "unknown", cooldownMs);
|
||||
|
||||
+207
@@ -0,0 +1,207 @@
|
||||
[CREDENTIALS] No external credentials file found, using defaults.
|
||||
[DB] SQLite database ready: /home/diegosouzapw/.omniroute/storage.sqlite
|
||||
[MODEL] Ambiguous model 'claude-haiku-4.5'. Use provider/model prefix (ex: gh/claude-haiku-4.5 or kr/claude-haiku-4.5). Candidates: gh, kr, anthropic
|
||||
TAP version 13
|
||||
# Subtest: getModelInfoCore resolves unique non-openai unprefixed model
|
||||
ok 1 - getModelInfoCore resolves unique non-openai unprefixed model
|
||||
---
|
||||
duration_ms: 3.403766
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore keeps openai fallback for gpt-4o
|
||||
ok 2 - getModelInfoCore keeps openai fallback for gpt-4o
|
||||
---
|
||||
duration_ms: 0.535726
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore resolves gpt-5.4 to codex
|
||||
ok 3 - getModelInfoCore resolves gpt-5.4 to codex
|
||||
---
|
||||
duration_ms: 0.321781
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore returns explicit ambiguity metadata for ambiguous unprefixed model
|
||||
ok 4 - getModelInfoCore returns explicit ambiguity metadata for ambiguous unprefixed model
|
||||
---
|
||||
duration_ms: 1.079896
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: getModelInfoCore canonicalizes github legacy alias with explicit provider prefix
|
||||
ok 5 - getModelInfoCore canonicalizes github legacy alias with explicit provider prefix
|
||||
---
|
||||
duration_ms: 0.370547
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: GithubExecutor routes codex-family model to /responses
|
||||
ok 6 - GithubExecutor routes codex-family model to /responses
|
||||
---
|
||||
duration_ms: 0.47113
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: GithubExecutor keeps non-codex model on /chat/completions
|
||||
ok 7 - GithubExecutor keeps non-codex model on /chat/completions
|
||||
---
|
||||
duration_ms: 0.38457
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: DefaultExecutor uses x-api-key for kimi-coding-apikey
|
||||
ok 8 - DefaultExecutor uses x-api-key for kimi-coding-apikey
|
||||
---
|
||||
duration_ms: 0.451443
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor forces stream=true for upstream compatibility
|
||||
ok 9 - CodexExecutor forces stream=true for upstream compatibility
|
||||
---
|
||||
duration_ms: 1.203259
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: Claude native messages can be round-tripped through OpenAI into Claude OAuth format
|
||||
ok 10 - Claude native messages can be round-tripped through OpenAI into Claude OAuth format
|
||||
---
|
||||
duration_ms: 7.232512
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor maps fast service tier to priority
|
||||
ok 11 - CodexExecutor maps fast service tier to priority
|
||||
---
|
||||
duration_ms: 0.489993
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: shouldUseNativeCodexPassthrough only enables responses-native Codex requests
|
||||
ok 12 - shouldUseNativeCodexPassthrough only enables responses-native Codex requests
|
||||
---
|
||||
duration_ms: 0.441911
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor can force fast service tier from settings
|
||||
ok 13 - CodexExecutor can force fast service tier from settings
|
||||
---
|
||||
duration_ms: 0.299575
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor always requests SSE accept header
|
||||
ok 14 - CodexExecutor always requests SSE accept header
|
||||
---
|
||||
duration_ms: 0.602914
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor does not request SSE accept header for compact requests
|
||||
ok 15 - CodexExecutor does not request SSE accept header for compact requests
|
||||
---
|
||||
duration_ms: 0.322611
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor preserves native responses payloads for Codex passthrough
|
||||
not ok 16 - CodexExecutor preserves native responses payloads for Codex passthrough
|
||||
---
|
||||
duration_ms: 1.856261
|
||||
type: 'test'
|
||||
location: '/home/diegosouzapw/dev/proxys/9router/tests/unit/plan3-p0.test.mjs:221:1'
|
||||
failureType: 'testCodeFailure'
|
||||
error: |-
|
||||
Expected values to be strictly equal:
|
||||
|
||||
false !== true
|
||||
|
||||
code: 'ERR_ASSERTION'
|
||||
name: 'AssertionError'
|
||||
expected: true
|
||||
actual: false
|
||||
operator: 'strictEqual'
|
||||
stack: |-
|
||||
TestContext.<anonymous> (file:///home/diegosouzapw/dev/proxys/9router/tests/unit/plan3-p0.test.mjs:242:10)
|
||||
Test.runInAsyncScope (node:async_hooks:214:14)
|
||||
Test.run (node:internal/test_runner/test:1047:25)
|
||||
Test.processPendingSubtests (node:internal/test_runner/test:744:18)
|
||||
Test.postRun (node:internal/test_runner/test:1173:19)
|
||||
Test.run (node:internal/test_runner/test:1101:12)
|
||||
async Test.processPendingSubtests (node:internal/test_runner/test:744:7)
|
||||
...
|
||||
# Subtest: CodexExecutor strips streaming fields for compact passthrough
|
||||
ok 17 - CodexExecutor strips streaming fields for compact passthrough
|
||||
---
|
||||
duration_ms: 0.296176
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: CodexExecutor routes responses subpaths to matching upstream paths
|
||||
ok 18 - CodexExecutor routes responses subpaths to matching upstream paths
|
||||
---
|
||||
duration_ms: 0.546657
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: translateNonStreamingResponse converts Responses API payload to OpenAI chat.completion
|
||||
ok 19 - translateNonStreamingResponse converts Responses API payload to OpenAI chat.completion
|
||||
---
|
||||
duration_ms: 1.483788
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: extractUsageFromResponse reads usage from Responses API payload
|
||||
ok 20 - extractUsageFromResponse reads usage from Responses API payload
|
||||
---
|
||||
duration_ms: 0.398039
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormat identifies OpenAI Responses when input is string
|
||||
ok 21 - detectFormat identifies OpenAI Responses when input is string
|
||||
---
|
||||
duration_ms: 0.359174
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormat identifies OpenAI Responses by max_output_tokens without input array
|
||||
ok 22 - detectFormat identifies OpenAI Responses by max_output_tokens without input array
|
||||
---
|
||||
duration_ms: 0.271215
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormatFromEndpoint forces OpenAI for /v1/chat/completions
|
||||
ok 23 - detectFormatFromEndpoint forces OpenAI for /v1/chat/completions
|
||||
---
|
||||
duration_ms: 0.52054
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: detectFormatFromEndpoint forces Claude for /v1/messages
|
||||
ok 24 - detectFormatFromEndpoint forces Claude for /v1/messages
|
||||
---
|
||||
duration_ms: 0.433035
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: translateRequest normalizes openai-responses input string into list payload
|
||||
ok 25 - translateRequest normalizes openai-responses input string into list payload
|
||||
---
|
||||
duration_ms: 0.358109
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: translateRequest preserves service_tier when converting openai to openai-responses
|
||||
ok 26 - translateRequest preserves service_tier when converting openai to openai-responses
|
||||
---
|
||||
duration_ms: 1.10454
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: parseSSEToResponsesOutput parses completed response from SSE payload
|
||||
ok 27 - parseSSEToResponsesOutput parses completed response from SSE payload
|
||||
---
|
||||
duration_ms: 0.575476
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: parseSSEToResponsesOutput returns null for invalid payload
|
||||
ok 28 - parseSSEToResponsesOutput returns null for invalid payload
|
||||
---
|
||||
duration_ms: 0.302714
|
||||
type: 'test'
|
||||
...
|
||||
# Subtest: parseSSEToOpenAIResponse merges split tool call chunks by id without duplication
|
||||
ok 29 - parseSSEToOpenAIResponse merges split tool call chunks by id without duplication
|
||||
---
|
||||
duration_ms: 0.916032
|
||||
type: 'test'
|
||||
...
|
||||
1..29
|
||||
# tests 29
|
||||
# suites 0
|
||||
# pass 28
|
||||
# fail 1
|
||||
# cancelled 0
|
||||
# skipped 0
|
||||
# todo 0
|
||||
# duration_ms 65.394285
|
||||
@@ -120,7 +120,11 @@ test("isAuthenticated accepts bearer API keys", async () => {
|
||||
assert.equal(result, true);
|
||||
});
|
||||
|
||||
test("isAuthenticated returns false without valid credentials", async () => {
|
||||
test("isAuthenticated returns false when auth is required without valid credentials", async () => {
|
||||
// Force requireLogin to be active
|
||||
process.env.INITIAL_PASSWORD = "bootstrap-password";
|
||||
await localDb.updateSettings({ requireLogin: true, password: "" });
|
||||
|
||||
const request = new Request("https://example.com/api/providers");
|
||||
|
||||
const result = await apiAuth.isAuthenticated(request);
|
||||
|
||||
@@ -62,6 +62,27 @@ test("getProviderCredentials returns null when all active connections are termin
|
||||
assert.equal(selected, null);
|
||||
});
|
||||
|
||||
test("getProviderCredentials can reuse a locally suppressed connection for combo live tests", async () => {
|
||||
await resetStorage();
|
||||
|
||||
const conn = await providersDb.createProviderConnection({
|
||||
provider: "openai",
|
||||
authType: "apikey",
|
||||
apiKey: "sk-live-test",
|
||||
isActive: true,
|
||||
testStatus: "credits_exhausted",
|
||||
rateLimitedUntil: new Date(Date.now() + 60_000).toISOString(),
|
||||
});
|
||||
|
||||
const selected = await auth.getProviderCredentials("openai", null, null, null, {
|
||||
allowSuppressedConnections: true,
|
||||
bypassQuotaPolicy: true,
|
||||
});
|
||||
|
||||
assert.ok(selected);
|
||||
assert.equal(selected.connectionId, conn.id);
|
||||
});
|
||||
|
||||
test("markAccountUnavailable does not overwrite terminal status", async () => {
|
||||
await resetStorage();
|
||||
|
||||
|
||||
@@ -97,7 +97,7 @@ describe("buildDockerComposeUpdateScript", () => {
|
||||
|
||||
assert.match(script, /git fetch --tags/);
|
||||
assert.match(script, /git config --global --add safe\.directory/);
|
||||
assert.match(script, /git checkout -B "autoupdate\/3\.2\.6" "v3\.2\.6"/);
|
||||
assert.match(script, /git checkout -B "autoupdate\/\$\{TARGET_TAG#v\}" "\$TARGET_TAG"/);
|
||||
assert.match(script, /git cherry-pick --keep-redundant-commits '1501a87' 'e569e1c'/);
|
||||
assert.match(script, /docker compose -f "\$COMPOSE_FILE" up -d --build "\$SERVICE"/);
|
||||
});
|
||||
|
||||
@@ -0,0 +1,128 @@
|
||||
import test from "node:test";
|
||||
import assert from "node:assert/strict";
|
||||
import fs from "node:fs";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
|
||||
const TEST_DATA_DIR = fs.mkdtempSync(path.join(os.tmpdir(), "omniroute-chat-combo-live-"));
|
||||
process.env.DATA_DIR = TEST_DATA_DIR;
|
||||
|
||||
const core = await import("../../src/lib/db/core.ts");
|
||||
const providersDb = await import("../../src/lib/db/providers.ts");
|
||||
const chatRoute = await import("../../src/app/api/v1/chat/completions/route.ts");
|
||||
const {
|
||||
clearModelUnavailability,
|
||||
resetAllAvailability,
|
||||
setModelUnavailable,
|
||||
} = await import("../../src/domain/modelAvailability.ts");
|
||||
const {
|
||||
getCircuitBreaker,
|
||||
resetAllCircuitBreakers,
|
||||
STATE,
|
||||
} = await import("../../src/shared/utils/circuitBreaker.ts");
|
||||
|
||||
const originalFetch = globalThis.fetch;
|
||||
|
||||
async function resetStorage() {
|
||||
core.resetDbInstance();
|
||||
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
||||
fs.mkdirSync(TEST_DATA_DIR, { recursive: true });
|
||||
resetAllAvailability();
|
||||
resetAllCircuitBreakers();
|
||||
}
|
||||
|
||||
async function seedSuppressedConnection() {
|
||||
return providersDb.createProviderConnection({
|
||||
provider: "openai",
|
||||
authType: "apikey",
|
||||
name: "openai-live-test",
|
||||
apiKey: "sk-live-test",
|
||||
isActive: true,
|
||||
testStatus: "credits_exhausted",
|
||||
rateLimitedUntil: new Date(Date.now() + 60_000).toISOString(),
|
||||
});
|
||||
}
|
||||
|
||||
function makeRequest(extraHeaders = {}) {
|
||||
return new Request("http://localhost/v1/chat/completions", {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
...extraHeaders,
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: "openai/gpt-4o-mini",
|
||||
messages: [{ role: "user", content: "Reply with OK only." }],
|
||||
max_tokens: 16,
|
||||
stream: false,
|
||||
}),
|
||||
});
|
||||
}
|
||||
|
||||
test.beforeEach(async () => {
|
||||
globalThis.fetch = originalFetch;
|
||||
await resetStorage();
|
||||
});
|
||||
|
||||
test.afterEach(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
resetAllAvailability();
|
||||
resetAllCircuitBreakers();
|
||||
});
|
||||
|
||||
test.after(() => {
|
||||
globalThis.fetch = originalFetch;
|
||||
resetAllAvailability();
|
||||
resetAllCircuitBreakers();
|
||||
core.resetDbInstance();
|
||||
fs.rmSync(TEST_DATA_DIR, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
test("combo live test bypasses local cooldown and breaker state to perform a real upstream request", async () => {
|
||||
const created = await seedSuppressedConnection();
|
||||
|
||||
setModelUnavailable("openai", "gpt-4o-mini", 60_000, "test cooldown");
|
||||
const breaker = getCircuitBreaker("openai");
|
||||
breaker.state = STATE.OPEN;
|
||||
breaker.lastFailureTime = Date.now();
|
||||
|
||||
const fetchCalls = [];
|
||||
globalThis.fetch = async (url, init = {}) => {
|
||||
fetchCalls.push({ url: String(url), init });
|
||||
return Response.json({
|
||||
id: "chatcmpl-live-test",
|
||||
choices: [
|
||||
{
|
||||
message: {
|
||||
role: "assistant",
|
||||
content: "OK",
|
||||
},
|
||||
},
|
||||
],
|
||||
});
|
||||
};
|
||||
|
||||
const blockedByCooldown = await chatRoute.POST(makeRequest());
|
||||
assert.equal(blockedByCooldown.status, 503);
|
||||
assert.equal(fetchCalls.length, 0);
|
||||
|
||||
clearModelUnavailability("openai", "gpt-4o-mini");
|
||||
|
||||
const blockedByBreaker = await chatRoute.POST(makeRequest());
|
||||
assert.equal(blockedByBreaker.status, 503);
|
||||
assert.equal(fetchCalls.length, 0);
|
||||
|
||||
const liveResponse = await chatRoute.POST(
|
||||
makeRequest({ "X-Internal-Test": "combo-health-check" })
|
||||
);
|
||||
const liveBody = await liveResponse.json();
|
||||
|
||||
assert.equal(liveResponse.status, 200);
|
||||
assert.equal(fetchCalls.length, 1);
|
||||
assert.match(fetchCalls[0].url, /\/chat\/completions$/);
|
||||
assert.equal(fetchCalls[0].init.headers.Authorization, "Bearer sk-live-test");
|
||||
assert.equal(liveBody.choices[0].message.content, "OK");
|
||||
|
||||
const updated = await providersDb.getProviderConnectionById(created.id);
|
||||
assert.equal(updated.testStatus, "active");
|
||||
});
|
||||
@@ -26,7 +26,7 @@ function mockLog() {
|
||||
function mockHandler(statusSequence) {
|
||||
let callIndex = 0;
|
||||
return async (body, modelStr) => {
|
||||
const status = statusSequence[callIndex] ?? 200;
|
||||
const status = statusSequence[callIndex] ?? statusSequence[statusSequence.length - 1] ?? 200;
|
||||
callIndex++;
|
||||
if (status === 200) {
|
||||
return new Response(JSON.stringify({ ok: true }), { status: 200 });
|
||||
@@ -55,6 +55,7 @@ test("handleComboChat: circuit breaker opens after repeated 502 errors", async (
|
||||
name: "test-combo",
|
||||
models: [{ model: "groq/llama-3.3-70b", weight: 0 }],
|
||||
strategy: "priority",
|
||||
config: { maxRetries: 0 },
|
||||
};
|
||||
|
||||
const log = mockLog();
|
||||
@@ -74,6 +75,7 @@ test("handleComboChat: circuit breaker opens after repeated 502 errors", async (
|
||||
|
||||
// Breaker should now be OPEN
|
||||
const status = breaker.getStatus();
|
||||
console.log("=== BREAKER STATUS AFTER 3 CALLS ===", status);
|
||||
assert.equal(status.state, STATE.OPEN, "Breaker should be OPEN after 3 failures");
|
||||
assert.equal(status.failureCount, 3, "Failure count should be 3");
|
||||
});
|
||||
|
||||
@@ -239,7 +239,7 @@ test("CodexExecutor preserves native responses payloads for Codex passthrough",
|
||||
assert.equal(transformed.stream, true);
|
||||
assert.equal(transformed.service_tier, "priority");
|
||||
assert.equal(transformed.instructions, "custom system prompt");
|
||||
assert.equal(transformed.store, true);
|
||||
assert.equal(transformed.store, false);
|
||||
assert.deepEqual(transformed.metadata, { source: "codex-client" });
|
||||
assert.equal(transformed.reasoning_effort, "high");
|
||||
assert.ok(!("_nativeCodexPassthrough" in transformed));
|
||||
@@ -503,3 +503,29 @@ test("parseSSEToOpenAIResponse merges split tool call chunks by id without dupli
|
||||
assert.equal(parsed.choices[0].message.tool_calls[0].function.name, "sum");
|
||||
assert.equal(parsed.choices[0].message.tool_calls[0].function.arguments, '{"a":1}');
|
||||
});
|
||||
|
||||
test("parseSSEToOpenAIResponse normalizes delta.reasoning alias to reasoning_content", () => {
|
||||
const rawSSE = [
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl_2",
|
||||
object: "chat.completion.chunk",
|
||||
choices: [{ index: 0, delta: { reasoning: "Let me think..." } }],
|
||||
})}`,
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl_2",
|
||||
object: "chat.completion.chunk",
|
||||
choices: [{ index: 0, delta: { reasoning: " The answer is 4." } }],
|
||||
})}`,
|
||||
`data: ${JSON.stringify({
|
||||
id: "chatcmpl_2",
|
||||
object: "chat.completion.chunk",
|
||||
choices: [{ index: 0, delta: { content: "2+2=4" }, finish_reason: "stop" }],
|
||||
})}`,
|
||||
"data: [DONE]",
|
||||
].join("\n");
|
||||
|
||||
const parsed = parseSSEToOpenAIResponse(rawSSE, "moonshotai/kimi-k2.5");
|
||||
assert.ok(parsed);
|
||||
assert.equal(parsed.choices[0].message.reasoning_content, "Let me think... The answer is 4.");
|
||||
assert.equal(parsed.choices[0].message.content, "2+2=4");
|
||||
});
|
||||
|
||||
@@ -155,3 +155,51 @@ test("builds compact Claude stream summary for detailed logs", () => {
|
||||
assert.equal(compact.usage.output_tokens, 7);
|
||||
assert.equal(compact._omniroute_stream.eventCount, 4);
|
||||
});
|
||||
|
||||
test("builds compact OpenAI summary with reasoning alias (delta.reasoning)", () => {
|
||||
const collector = createStructuredSSECollector({ stage: "provider_response" });
|
||||
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: { role: "assistant" } }],
|
||||
});
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: { reasoning: "Let me think..." } }],
|
||||
});
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: { content: "The answer is 4." } }],
|
||||
});
|
||||
collector.push({
|
||||
id: "chatcmpl_r1",
|
||||
object: "chat.completion.chunk",
|
||||
created: 100,
|
||||
model: "moonshotai/kimi-k2.5",
|
||||
choices: [{ index: 0, delta: {}, finish_reason: "stop" }],
|
||||
usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
|
||||
});
|
||||
|
||||
const summary = buildStreamSummaryFromEvents(
|
||||
collector.getEvents(),
|
||||
FORMATS.OPENAI,
|
||||
"moonshotai/kimi-k2.5"
|
||||
);
|
||||
const compact = compactStructuredStreamPayload(
|
||||
collector.build(summary, { includeEvents: false })
|
||||
);
|
||||
|
||||
assert.equal(compact.object, "chat.completion");
|
||||
assert.equal(compact.choices[0].message.content, "The answer is 4.");
|
||||
assert.equal(compact.choices[0].message.reasoning_content, "Let me think...");
|
||||
assert.equal(compact.choices[0].finish_reason, "stop");
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user