Compare commits

...

9 Commits

Author SHA1 Message Date
Diego Rodrigues de Sa e Souza f171b7de96 Merge pull request #730 from diegosouzapw/release/v3.2.3
Build Electron Desktop App / Validate version (push) Failing after 40s
Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped
Build Electron Desktop App / Build Electron (linux) (push) Has been skipped
Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped
Build Electron Desktop App / Build Electron (windows) (push) Has been skipped
Build Electron Desktop App / Create Release (push) Has been skipped
Build Electron Desktop App / Publish to npm (push) Has been skipped
chore(release): v3.2.3 — Enhancements and Bugfixes
2026-03-28 23:21:55 -03:00
diegosouzapw c0cbf00199 chore(release): v3.2.3 — Enhancements and Bugfixes 2026-03-28 23:19:01 -03:00
diegosouzapw 0cd6e59fb9 Merge cache-control fix and resolve changelog conflict 2026-03-28 23:13:03 -03:00
diegosouzapw 11a8adc71c Merge branch 'feat/issue-659-mobile-ui' 2026-03-28 23:12:26 -03:00
diegosouzapw b9c7fd879f fix(core): resolve routing schemas, CLI streaming leaks, and thinking tag extraction 2026-03-28 23:11:22 -03:00
Diego Rodrigues de Sa e Souza 2fc4c7ea33 Merge pull request #728 from rdself/codex/normalize-provider-limits-labels
normalize provider limits labels
2026-03-28 23:06:16 -03:00
R.D. 538028c150 normalize provider limits quota labels 2026-03-28 21:17:07 -04:00
diegosouzapw 94a00cb6d6 feat: improve dashboard layout for smaller screens (#659) 2026-03-28 21:53:07 -03:00
tombii b84c915b23 fix(sse): preserve cache_control in Claude passthrough mode
When Claude Code routes through OmniRoute (Claude → OmniRoute → Claude),
OmniRoute was stripping all cache_control markers and replacing them with
its own generic caching strategy. This broke Claude Code's carefully
placed cache breakpoints for plans and other features.

Changes:
- Add preserveCacheControl parameter to prepareClaudeRequest()
- Detect Claude passthrough mode (sourceFormat === targetFormat === CLAUDE)
- Skip cache_control normalization when preserveCacheControl=true
- Preserve client's cache_control markers in system, messages, and tools

This ensures Claude Code's prompt caching optimization works correctly
while maintaining OmniRoute's caching strategy for translation scenarios.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-28 16:30:41 +01:00
16 changed files with 341 additions and 54 deletions
+9
View File
@@ -47,3 +47,12 @@ AGENTS.md
# Build artifacts (pre-built goes inside app/)
.next/
node_modules/
# Ignore large binary files and other build directories
*.tgz
*.AppImage
*.deb
*.rpm
electron/
app/electron/
app/vscode-extension/
+14
View File
@@ -4,6 +4,20 @@
---
## [3.2.3] — 2026-03-29
### ✨ Enhancements & Refactoring
- **Provider Limits Quota UI (#728)** — Normalized quota limit logic and data labeling inside the Limits interface.
### 🐛 Bug Fixes
- **Core Routing Schemas & Leaks** — Expanded `comboStrategySchema` to natively support `fill-first` and `p2c` strategies to unblock complex combo editing natively.
- **Thinking Tags Extraction (CLI)** — Restructured CLI token responses sanitizer RegEx capturing model reasoning structures inside streams avoiding broken `<thinking>` extractions breaking response text output format.
- **Strict Format Enforcements** — Hardened pipeline sanitization execution making it universally apply to translation mode targets.
---
## [3.2.2] — 2026-03-29
### ✨ New Features
+1 -1
View File
@@ -1,7 +1,7 @@
openapi: 3.1.0
info:
title: OmniRoute API
version: 3.2.2
version: 3.2.3
description: |
OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
endpoint that routes requests to multiple AI providers with load balancing,
+5 -4
View File
@@ -531,10 +531,10 @@ export async function handleChatCore({
connectionId,
duration: Date.now() - startTime,
tokens: tokens || {},
requestBody: attachLogMeta(body, {
requestBody: attachLogMeta((body as Record<string, unknown>) ?? undefined, {
claudePromptCache: claudeCacheMeta,
}),
responseBody: attachLogMeta(responseBody ?? undefined, {
responseBody: attachLogMeta((responseBody as Record<string, unknown>) ?? undefined, {
claudePromptCache: claudeCacheMeta
? {
applied: claudeCacheMeta.applied,
@@ -1464,8 +1464,9 @@ export async function handleChatCore({
// Sanitize response for OpenAI SDK compatibility
// Strips non-standard fields (x_groq, usage_breakdown, service_tier, etc.)
// Extracts <think> tags into reasoning_content
if (sourceFormat === FORMATS.OPENAI) {
// Extracts <think> and <thinking> tags into reasoning_content
// Target format determines output shape. If we are outputting OpenAI shape or pseudo-OpenAI shape, sanitize.
if (targetFormat === FORMATS.OPENAI || targetFormat === FORMATS.OPENAI_RESPONSES) {
translatedResponse = sanitizeOpenAIResponse(translatedResponse);
}
+4 -5
View File
@@ -32,13 +32,12 @@ function toNumber(value: unknown): number | undefined {
return typeof value === "number" && Number.isFinite(value) ? value : undefined;
}
// ── Think tag regex ────────────────────────────────────────────────────────
// Matches <think>...</think> blocks (greedy, dotAll)
const THINK_TAG_REGEX = /<think>([\s\S]*?)<\/think>/gi;
// Matches <think>...</think> blocks and <thinking>...</thinking> (greedy, dotAll)
const THINK_TAG_REGEX = /<(?:think|thinking)>([\s\S]*?)<\/(?:think|thinking)>/gi;
// #638: Collapse runs of 3+ consecutive newlines into \n\n
// #638, #727: Collapse runs of 2+ consecutive newlines into \n\n
// Tool call responses from thinking models often accumulate excessive newlines
const EXCESSIVE_NEWLINES = /\n{3,}/g;
const EXCESSIVE_NEWLINES = /\n{2,}/g;
function collapseExcessiveNewlines(text: string): string {
return text.replace(EXCESSIVE_NEWLINES, "\n\n");
}
+23 -7
View File
@@ -528,31 +528,47 @@ export function createSSEStream(options: StreamOptions = {}) {
// Content for call log is accumulated only from parsed (above) to avoid double-counting;
// do not add again from item here.
// #723, #727: Sanitize intermediate stream chunks if target is OpenAI format loop
let itemSanitized: Record<string, unknown> = item;
if (targetFormat === FORMATS.OPENAI || targetFormat === FORMATS.OPENAI_RESPONSES) {
itemSanitized = sanitizeStreamingChunk(itemSanitized) as Record<string, unknown>;
// Extract reasoning tags from content if translation generated them
const delta = itemSanitized?.choices?.[0]?.delta;
if (delta?.content && typeof delta.content === "string") {
const { content, thinking } = extractThinkingFromContent(delta.content);
delta.content = content;
if (thinking && !delta.reasoning_content) {
delta.reasoning_content = thinking;
}
}
}
// Filter empty chunks
if (!hasValuableContent(item, sourceFormat)) {
if (!hasValuableContent(itemSanitized, sourceFormat)) {
continue; // Skip this empty chunk
}
// Inject estimated usage if finish chunk has no valid usage
const isFinishChunk =
item.type === "message_delta" || item.choices?.[0]?.finish_reason;
itemSanitized.type === "message_delta" || itemSanitized.choices?.[0]?.finish_reason;
if (
state.finishReason &&
isFinishChunk &&
!hasValidUsage(item.usage) &&
!hasValidUsage(itemSanitized.usage) &&
totalContentLength > 0
) {
const estimated = estimateUsage(body, totalContentLength, sourceFormat);
item.usage = filterUsageForFormat(estimated, sourceFormat); // Filter + already has buffer
itemSanitized.usage = filterUsageForFormat(estimated, sourceFormat); // Filter + already has buffer
state.usage = estimated;
} else if (state.finishReason && isFinishChunk && state.usage) {
// Add buffer and filter usage for client (but keep original in state.usage for logging)
const buffered = addBufferToUsage(state.usage);
item.usage = filterUsageForFormat(buffered, sourceFormat);
itemSanitized.usage = filterUsageForFormat(buffered, sourceFormat);
}
const output = formatSSE(item, sourceFormat);
clientPayloadCollector.push(item);
const output = formatSSE(itemSanitized, sourceFormat);
clientPayloadCollector.push(itemSanitized);
reqLogger?.appendConvertedChunk?.(output);
controller.enqueue(encoder.encode(output));
}
+2 -2
View File
@@ -1,12 +1,12 @@
{
"name": "omniroute",
"version": "3.2.2",
"version": "3.2.3",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "omniroute",
"version": "3.2.2",
"version": "3.2.3",
"hasInstallScript": true,
"license": "MIT",
"workspaces": [
+1 -1
View File
@@ -1,6 +1,6 @@
{
"name": "omniroute",
"version": "3.2.2",
"version": "3.2.3",
"description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
"type": "module",
"bin": {
@@ -4,7 +4,13 @@ import { useTranslations } from "next-intl";
import { useState, useEffect, useCallback, useMemo, useRef } from "react";
import Image from "next/image";
import { parseQuotaData, calculatePercentage, normalizePlanTier, resolvePlanValue } from "./utils";
import {
parseQuotaData,
calculatePercentage,
formatQuotaLabel,
normalizePlanTier,
resolvePlanValue,
} from "./utils";
import Card from "@/shared/components/Card";
import Badge from "@/shared/components/Badge";
import { CardSkeleton } from "@/shared/components/Loading";
@@ -26,7 +32,7 @@ const PROVIDER_CONFIG = {
kiro: { label: "Kiro AI", color: "#FF6B35" },
codex: { label: "OpenAI Codex", color: "#10A37F" },
claude: { label: "Claude Code", color: "#D97757" },
glm: { label: "GLM Coding", color: "#4A90D9" },
glm: { label: "GLM (Z.AI)", color: "#4A90D9" },
"kimi-coding": { label: "Kimi Coding", color: "#1E3A8A" },
};
@@ -42,29 +48,6 @@ const TIER_FILTERS = [
{ key: "unknown", labelKey: "tierUnknown" },
];
// Short model display names for quota bars
function getShortModelName(name) {
const map = {
"gemini-3-pro-high": "G3 Pro",
"gemini-3-pro-low": "G3 Pro Low",
"gemini-3-flash": "G3 Flash",
"gemini-2.5-flash": "G2.5 Flash",
"claude-opus-4-6-thinking": "Opus 4.6 Tk",
"claude-opus-4-5-thinking": "Opus 4.5 Tk",
"claude-opus-4-5": "Opus 4.5",
"claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
"claude-sonnet-4-5": "Sonnet 4.5",
chat: "Chat",
completions: "Completions",
premium_interactions: "Premium",
session: "Session",
weekly: "Weekly",
agentic_request: "Agentic",
agentic_request_freetrial: "Agentic (Trial)",
};
return map[name] || name;
}
// Get bar color based on remaining percentage
function getBarColor(remainingPercentage) {
if (remainingPercentage > QUOTA_BAR_GREEN_THRESHOLD) {
@@ -624,7 +607,7 @@ export default function ProviderLimits() {
const remainingPercentage = calculatePercentage(q.used, q.total);
const colors = getBarColor(remainingPercentage);
const cd = formatCountdown(q.resetAt);
const shortName = getShortModelName(q.name);
const shortName = formatQuotaLabel(q.name);
const staleAfterReset = q.staleAfterReset === true;
return (
@@ -10,6 +10,26 @@ const PROVIDER_PLAN_FALLBACKS = new Set([
"github copilot",
]);
const QUOTA_LABEL_MAP: Record<string, string> = {
"gemini-3-pro-high": "G3 Pro",
"gemini-3-pro-low": "G3 Pro Low",
"gemini-3-flash": "G3 Flash",
"gemini-2.5-flash": "G2.5 Flash",
"claude-opus-4-6-thinking": "Opus 4.6 Tk",
"claude-opus-4-5-thinking": "Opus 4.5 Tk",
"claude-opus-4-5": "Opus 4.5",
"claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
"claude-sonnet-4-5": "Sonnet 4.5",
chat: "Chat",
completions: "Completions",
premium_interactions: "Premium",
session: "Session",
weekly: "Weekly",
code_review: "Code Review",
agentic_request: "Agentic",
agentic_request_freetrial: "Agentic (Trial)",
};
function toRecord(value: unknown): Record<string, unknown> {
return value && typeof value === "object" && !Array.isArray(value)
? (value as Record<string, unknown>)
@@ -25,6 +45,37 @@ function normalizePlanCandidate(value: unknown) {
return trimmed;
}
function toTitleCaseWords(value: string) {
return value
.split(/[\s_-]+/)
.filter(Boolean)
.map((part) => part.charAt(0).toUpperCase() + part.slice(1))
.join(" ");
}
export function formatQuotaLabel(name: string) {
const trimmed = typeof name === "string" ? name.trim() : "";
if (!trimmed) return "";
const mapped = QUOTA_LABEL_MAP[trimmed];
if (mapped) return mapped;
if (/^session\s*\(\d+[hm]\)$/i.test(trimmed)) {
return "Session";
}
if (/^weekly\s*\(\d+d\)$/i.test(trimmed)) {
return "Weekly";
}
const weeklyModelMatch = trimmed.match(/^weekly\s+(.+?)\s*\(\d+d\)$/i);
if (weeklyModelMatch) {
return `Weekly ${toTitleCaseWords(weeklyModelMatch[1])}`;
}
return trimmed;
}
/**
* Format ISO date string to countdown format (inspired by vscode-antigravity-cockpit)
* @param {string|Date} date - ISO date string or Date object
@@ -204,6 +255,7 @@ export function parseQuotaData(provider, data) {
break;
default:
// Generic fallback for unknown providers
if (data.quotas) {
Object.entries(data.quotas).forEach(([name, quota]: [string, any]) => {
normalizedQuotas.push(normalizeQuotaEntry(name, quota));
+13
View File
@@ -337,3 +337,16 @@ button .material-symbols-outlined,
.traffic-light.green {
background: var(--color-traffic-green);
}
/* ── Mobile Layout Fixes (Issue #659) ── */
@media (max-width: 768px) {
.ant-table-wrapper {
overflow-x: auto;
-webkit-overflow-scrolling: touch;
max-width: 100vw;
}
.ant-table {
min-width: 600px; /* Prevent columns from crushing together */
}
}
+7 -5
View File
@@ -157,13 +157,15 @@ async function getAntigravityUsage(accessToken) {
}
/**
* Claude Usage
* Claude Usage (legacy fallback)
* Real Claude OAuth quota windows are fetched in @omniroute/open-sse/services/usage.ts.
*/
async function getClaudeUsage(accessToken) {
async function getClaudeUsage() {
try {
// Claude OAuth doesn't expose usage API directly
// Could potentially check via inference endpoint
return { message: "Claude connected. Usage tracked per request." };
return {
message:
"Claude connected. Detailed quota windows are handled by the open-sse usage service.",
};
} catch (error) {
return { message: "Unable to fetch Claude usage." };
}
@@ -57,8 +57,8 @@ export default function DashboardLayout({ children }) {
>
<Header onMenuClick={() => setSidebarOpen(true)} />
<MaintenanceBanner />
<div className="flex-1 overflow-y-auto custom-scrollbar p-6 lg:p-10">
<div className="max-w-7xl mx-auto">
<div className="flex-1 overflow-y-auto overflow-x-hidden custom-scrollbar p-4 sm:p-6 lg:p-10">
<div className="max-w-7xl mx-auto w-full">
<Breadcrumbs />
{children}
</div>
+6 -1
View File
@@ -78,6 +78,9 @@ const comboStrategySchema = z.enum([
"cost-optimized",
"strict-random",
"auto",
"fill-first",
// #729 schema fixes for combo edit/save
"p2c",
]);
const comboRuntimeConfigSchema = z
@@ -884,6 +887,7 @@ export const updateComboSchema = z
system_message: z.string().max(50000).optional(),
tool_filter_regex: z.string().max(1000).optional(),
context_cache_protection: z.boolean().optional(),
context_length: z.number().int().min(1000).max(2000000).optional(),
})
.superRefine((value, ctx) => {
if (
@@ -895,7 +899,8 @@ export const updateComboSchema = z
value.allowedProviders === undefined &&
value.system_message === undefined &&
value.tool_filter_regex === undefined &&
value.context_cache_protection === undefined
value.context_cache_protection === undefined &&
value.context_length === undefined
) {
ctx.addIssue({
code: z.ZodIssueCode.custom,
@@ -0,0 +1,184 @@
import { describe, test } from "node:test";
import assert from "node:assert/strict";
import { prepareClaudeRequest } from "../../open-sse/translator/helpers/claudeHelper.ts";
describe("Claude cache_control passthrough", () => {
test("preserveCacheControl=true preserves cache_control in system blocks", () => {
const body = {
system: [
{ type: "text", text: "System prompt 1" },
{ type: "text", text: "System prompt 2", cache_control: { type: "ephemeral", ttl: "5m" } },
],
messages: [],
};
const result = prepareClaudeRequest(body, "claude", true);
assert.equal(result.system.length, 2);
assert.equal(result.system[0].cache_control, undefined);
assert.deepEqual(result.system[1].cache_control, { type: "ephemeral", ttl: "5m" });
});
test("preserveCacheControl=false replaces cache_control in system blocks", () => {
const body = {
system: [
{ type: "text", text: "System prompt 1" },
{ type: "text", text: "System prompt 2", cache_control: { type: "ephemeral", ttl: "5m" } },
],
messages: [],
};
const result = prepareClaudeRequest(body, "claude", false);
assert.equal(result.system.length, 2);
assert.equal(result.system[0].cache_control, undefined);
assert.deepEqual(result.system[1].cache_control, { type: "ephemeral", ttl: "1h" });
});
test("preserveCacheControl=true preserves cache_control in message content blocks", () => {
const body = {
messages: [
{
role: "user",
content: [
{ type: "text", text: "User message 1" },
{ type: "text", text: "User message 2", cache_control: { type: "ephemeral" } },
],
},
{
role: "assistant",
content: [
{
type: "text",
text: "Assistant response",
cache_control: { type: "ephemeral", ttl: "10m" },
},
],
},
],
};
const result = prepareClaudeRequest(body, "claude", true);
assert.equal(result.messages.length, 2);
assert.equal(result.messages[0].content[0].cache_control, undefined);
assert.deepEqual(result.messages[0].content[1].cache_control, { type: "ephemeral" });
assert.deepEqual(result.messages[1].content[0].cache_control, {
type: "ephemeral",
ttl: "10m",
});
});
test("preserveCacheControl=false strips and re-adds cache_control in messages", () => {
const body = {
messages: [
{
role: "user",
content: [
{ type: "text", text: "User message 1" },
{ type: "text", text: "User message 2", cache_control: { type: "ephemeral" } },
],
},
{
role: "assistant",
content: [
{
type: "text",
text: "Assistant response",
cache_control: { type: "ephemeral", ttl: "10m" },
},
],
},
],
};
const result = prepareClaudeRequest(body, "claude", false);
// Original cache_control should be stripped and OmniRoute's strategy applied
assert.equal(result.messages.length, 2);
// User message should not have cache_control (only second-to-last user gets it)
assert.equal(result.messages[0].content[0].cache_control, undefined);
assert.equal(result.messages[0].content[1].cache_control, undefined);
// Last assistant should have cache_control added by OmniRoute
assert.deepEqual(result.messages[1].content[0].cache_control, { type: "ephemeral" });
});
test("preserveCacheControl=true preserves cache_control in tools", () => {
const body = {
messages: [],
tools: [
{ name: "tool1", description: "Tool 1", input_schema: { type: "object" } },
{
name: "tool2",
description: "Tool 2",
input_schema: { type: "object" },
cache_control: { type: "ephemeral", ttl: "30m" },
},
],
};
const result = prepareClaudeRequest(body, "claude", true);
assert.equal(result.tools.length, 2);
assert.equal(result.tools[0].cache_control, undefined);
assert.deepEqual(result.tools[1].cache_control, { type: "ephemeral", ttl: "30m" });
});
test("preserveCacheControl=false replaces cache_control in tools", () => {
const body = {
messages: [],
tools: [
{ name: "tool1", description: "Tool 1", input_schema: { type: "object" } },
{
name: "tool2",
description: "Tool 2",
input_schema: { type: "object" },
cache_control: { type: "ephemeral", ttl: "30m" },
},
],
};
const result = prepareClaudeRequest(body, "claude", false);
assert.equal(result.tools.length, 2);
assert.equal(result.tools[0].cache_control, undefined);
assert.deepEqual(result.tools[1].cache_control, { type: "ephemeral", ttl: "1h" });
});
test("preserveCacheControl=true with Claude Code-style caching", () => {
const body = {
system: [{ type: "text", text: "System", cache_control: { type: "ephemeral", ttl: "5m" } }],
messages: [
{
role: "user",
content: [{ type: "text", text: "Turn 1", cache_control: { type: "ephemeral" } }],
},
{
role: "assistant",
content: [{ type: "text", text: "Response 1" }],
},
{
role: "user",
content: [{ type: "text", text: "Turn 2" }],
},
],
tools: [
{
name: "bash",
description: "Execute bash",
input_schema: { type: "object" },
cache_control: { type: "ephemeral", ttl: "5m" },
},
],
};
const result = prepareClaudeRequest(body, "claude", true);
// All original cache_control should be preserved
assert.deepEqual(result.system[0].cache_control, { type: "ephemeral", ttl: "5m" });
assert.deepEqual(result.messages[0].content[0].cache_control, { type: "ephemeral" });
assert.equal(result.messages[1].content[0].cache_control, undefined);
assert.equal(result.messages[2].content[0].cache_control, undefined);
assert.deepEqual(result.tools[0].cache_control, { type: "ephemeral", ttl: "5m" });
});
});
+9
View File
@@ -44,3 +44,12 @@ test("remaining percentage helpers reflect remaining quota and stale resets refi
assert.equal(parsed.length, 1);
assert.equal(providerLimitUtils.calculatePercentage(parsed[0].used, parsed[0].total), 100);
});
test("quota labels normalize session and weekly windows while preserving readable titles", () => {
assert.equal(providerLimitUtils.formatQuotaLabel("session"), "Session");
assert.equal(providerLimitUtils.formatQuotaLabel("session (5h)"), "Session");
assert.equal(providerLimitUtils.formatQuotaLabel("weekly"), "Weekly");
assert.equal(providerLimitUtils.formatQuotaLabel("weekly (7d)"), "Weekly");
assert.equal(providerLimitUtils.formatQuotaLabel("weekly sonnet (7d)"), "Weekly Sonnet");
assert.equal(providerLimitUtils.formatQuotaLabel("code_review"), "Code Review");
});