Merge pull request #730 from diegosouzapw/release/v3.2.3

chore(release): v3.2.3 — Enhancements and Bugfixes
2026-03-28 23:21:55 -03:00 · 2026-03-28 23:19:01 -03:00 · 2026-03-28 23:13:03 -03:00 · 2026-03-28 23:12:26 -03:00 · 2026-03-28 23:11:22 -03:00 · 2026-03-28 23:06:16 -03:00
16 changed files with 341 additions and 54 deletions
@@ -47,3 +47,12 @@ AGENTS.md
 # Build artifacts (pre-built goes inside app/)
 .next/
 node_modules/
+
+# Ignore large binary files and other build directories
+*.tgz
+*.AppImage
+*.deb
+*.rpm
+electron/
+app/electron/
+app/vscode-extension/
@@ -4,6 +4,20 @@

 ---

+## [3.2.3] — 2026-03-29
+
+### ✨ Enhancements & Refactoring
+
+- **Provider Limits Quota UI (#728)** — Normalized quota limit logic and data labeling inside the Limits interface.
+
+### 🐛 Bug Fixes
+
+- **Core Routing Schemas & Leaks** — Expanded `comboStrategySchema` to natively support `fill-first` and `p2c` strategies to unblock complex combo editing natively.
+- **Thinking Tags Extraction (CLI)** — Restructured CLI token responses sanitizer RegEx capturing model reasoning structures inside streams avoiding broken `<thinking>` extractions breaking response text output format.
+- **Strict Format Enforcements** — Hardened pipeline sanitization execution making it universally apply to translation mode targets.
+
+---
+
 ## [3.2.2] — 2026-03-29

 ### ✨ New Features
@@ -1,7 +1,7 @@
 openapi: 3.1.0
 info:
  title: OmniRoute API
-  version: 3.2.2
+  version: 3.2.3
  description: |
    OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
    endpoint that routes requests to multiple AI providers with load balancing,
@@ -531,10 +531,10 @@ export async function handleChatCore({
      connectionId,
      duration: Date.now() - startTime,
      tokens: tokens || {},
-      requestBody: attachLogMeta(body, {
+      requestBody: attachLogMeta((body as Record<string, unknown>) ?? undefined, {
        claudePromptCache: claudeCacheMeta,
      }),
-      responseBody: attachLogMeta(responseBody ?? undefined, {
+      responseBody: attachLogMeta((responseBody as Record<string, unknown>) ?? undefined, {
        claudePromptCache: claudeCacheMeta
          ? {
              applied: claudeCacheMeta.applied,
@@ -1464,8 +1464,9 @@ export async function handleChatCore({

    // Sanitize response for OpenAI SDK compatibility
    // Strips non-standard fields (x_groq, usage_breakdown, service_tier, etc.)
-    // Extracts <think> tags into reasoning_content
-    if (sourceFormat === FORMATS.OPENAI) {
+    // Extracts <think> and <thinking> tags into reasoning_content
+    // Target format determines output shape. If we are outputting OpenAI shape or pseudo-OpenAI shape, sanitize.
+    if (targetFormat === FORMATS.OPENAI || targetFormat === FORMATS.OPENAI_RESPONSES) {
      translatedResponse = sanitizeOpenAIResponse(translatedResponse);
    }

@@ -32,13 +32,12 @@ function toNumber(value: unknown): number | undefined {
  return typeof value === "number" && Number.isFinite(value) ? value : undefined;
 }

-// ── Think tag regex ────────────────────────────────────────────────────────
-// Matches <think>...</think> blocks (greedy, dotAll)
-const THINK_TAG_REGEX = /<think>([\s\S]*?)<\/think>/gi;
+// Matches <think>...</think> blocks and <thinking>...</thinking> (greedy, dotAll)
+const THINK_TAG_REGEX = /<(?:think|thinking)>([\s\S]*?)<\/(?:think|thinking)>/gi;

-// #638: Collapse runs of 3+ consecutive newlines into \n\n
+// #638, #727: Collapse runs of 2+ consecutive newlines into \n\n
 // Tool call responses from thinking models often accumulate excessive newlines
-const EXCESSIVE_NEWLINES = /\n{3,}/g;
+const EXCESSIVE_NEWLINES = /\n{2,}/g;
 function collapseExcessiveNewlines(text: string): string {
  return text.replace(EXCESSIVE_NEWLINES, "\n\n");
 }
@@ -528,31 +528,47 @@ export function createSSEStream(options: StreamOptions = {}) {
              // Content for call log is accumulated only from parsed (above) to avoid double-counting;
              // do not add again from item here.

+              // #723, #727: Sanitize intermediate stream chunks if target is OpenAI format loop
+              let itemSanitized: Record<string, unknown> = item;
+              if (targetFormat === FORMATS.OPENAI || targetFormat === FORMATS.OPENAI_RESPONSES) {
+                itemSanitized = sanitizeStreamingChunk(itemSanitized) as Record<string, unknown>;
+
+                // Extract reasoning tags from content if translation generated them
+                const delta = itemSanitized?.choices?.[0]?.delta;
+                if (delta?.content && typeof delta.content === "string") {
+                  const { content, thinking } = extractThinkingFromContent(delta.content);
+                  delta.content = content;
+                  if (thinking && !delta.reasoning_content) {
+                    delta.reasoning_content = thinking;
+                  }
+                }
+              }
+
              // Filter empty chunks
-              if (!hasValuableContent(item, sourceFormat)) {
+              if (!hasValuableContent(itemSanitized, sourceFormat)) {
                continue; // Skip this empty chunk
              }

              // Inject estimated usage if finish chunk has no valid usage
              const isFinishChunk =
-                item.type === "message_delta" || item.choices?.[0]?.finish_reason;
+                itemSanitized.type === "message_delta" || itemSanitized.choices?.[0]?.finish_reason;
              if (
                state.finishReason &&
                isFinishChunk &&
-                !hasValidUsage(item.usage) &&
+                !hasValidUsage(itemSanitized.usage) &&
                totalContentLength > 0
              ) {
                const estimated = estimateUsage(body, totalContentLength, sourceFormat);
-                item.usage = filterUsageForFormat(estimated, sourceFormat); // Filter + already has buffer
+                itemSanitized.usage = filterUsageForFormat(estimated, sourceFormat); // Filter + already has buffer
                state.usage = estimated;
              } else if (state.finishReason && isFinishChunk && state.usage) {
                // Add buffer and filter usage for client (but keep original in state.usage for logging)
                const buffered = addBufferToUsage(state.usage);
-                item.usage = filterUsageForFormat(buffered, sourceFormat);
+                itemSanitized.usage = filterUsageForFormat(buffered, sourceFormat);
              }

-              const output = formatSSE(item, sourceFormat);
-              clientPayloadCollector.push(item);
+              const output = formatSSE(itemSanitized, sourceFormat);
+              clientPayloadCollector.push(itemSanitized);
              reqLogger?.appendConvertedChunk?.(output);
              controller.enqueue(encoder.encode(output));
            }
@@ -1,12 +1,12 @@
 {
  "name": "omniroute",
-  "version": "3.2.2",
+  "version": "3.2.3",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "omniroute",
-      "version": "3.2.2",
+      "version": "3.2.3",
      "hasInstallScript": true,
      "license": "MIT",
      "workspaces": [
@@ -1,6 +1,6 @@
 {
  "name": "omniroute",
-  "version": "3.2.2",
+  "version": "3.2.3",
  "description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
  "type": "module",
  "bin": {
@@ -4,7 +4,13 @@ import { useTranslations } from "next-intl";

 import { useState, useEffect, useCallback, useMemo, useRef } from "react";
 import Image from "next/image";
-import { parseQuotaData, calculatePercentage, normalizePlanTier, resolvePlanValue } from "./utils";
+import {
+  parseQuotaData,
+  calculatePercentage,
+  formatQuotaLabel,
+  normalizePlanTier,
+  resolvePlanValue,
+} from "./utils";
 import Card from "@/shared/components/Card";
 import Badge from "@/shared/components/Badge";
 import { CardSkeleton } from "@/shared/components/Loading";
@@ -26,7 +32,7 @@ const PROVIDER_CONFIG = {
  kiro: { label: "Kiro AI", color: "#FF6B35" },
  codex: { label: "OpenAI Codex", color: "#10A37F" },
  claude: { label: "Claude Code", color: "#D97757" },
-  glm: { label: "GLM Coding", color: "#4A90D9" },
+  glm: { label: "GLM (Z.AI)", color: "#4A90D9" },
  "kimi-coding": { label: "Kimi Coding", color: "#1E3A8A" },
 };

@@ -42,29 +48,6 @@ const TIER_FILTERS = [
  { key: "unknown", labelKey: "tierUnknown" },
 ];

-// Short model display names for quota bars
-function getShortModelName(name) {
-  const map = {
-    "gemini-3-pro-high": "G3 Pro",
-    "gemini-3-pro-low": "G3 Pro Low",
-    "gemini-3-flash": "G3 Flash",
-    "gemini-2.5-flash": "G2.5 Flash",
-    "claude-opus-4-6-thinking": "Opus 4.6 Tk",
-    "claude-opus-4-5-thinking": "Opus 4.5 Tk",
-    "claude-opus-4-5": "Opus 4.5",
-    "claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
-    "claude-sonnet-4-5": "Sonnet 4.5",
-    chat: "Chat",
-    completions: "Completions",
-    premium_interactions: "Premium",
-    session: "Session",
-    weekly: "Weekly",
-    agentic_request: "Agentic",
-    agentic_request_freetrial: "Agentic (Trial)",
-  };
-  return map[name] || name;
-}
-
 // Get bar color based on remaining percentage
 function getBarColor(remainingPercentage) {
  if (remainingPercentage > QUOTA_BAR_GREEN_THRESHOLD) {
@@ -624,7 +607,7 @@ export default function ProviderLimits() {
                      const remainingPercentage = calculatePercentage(q.used, q.total);
                      const colors = getBarColor(remainingPercentage);
                      const cd = formatCountdown(q.resetAt);
-                      const shortName = getShortModelName(q.name);
+                      const shortName = formatQuotaLabel(q.name);
                      const staleAfterReset = q.staleAfterReset === true;

                      return (
@@ -10,6 +10,26 @@ const PROVIDER_PLAN_FALLBACKS = new Set([
  "github copilot",
 ]);

+const QUOTA_LABEL_MAP: Record<string, string> = {
+  "gemini-3-pro-high": "G3 Pro",
+  "gemini-3-pro-low": "G3 Pro Low",
+  "gemini-3-flash": "G3 Flash",
+  "gemini-2.5-flash": "G2.5 Flash",
+  "claude-opus-4-6-thinking": "Opus 4.6 Tk",
+  "claude-opus-4-5-thinking": "Opus 4.5 Tk",
+  "claude-opus-4-5": "Opus 4.5",
+  "claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
+  "claude-sonnet-4-5": "Sonnet 4.5",
+  chat: "Chat",
+  completions: "Completions",
+  premium_interactions: "Premium",
+  session: "Session",
+  weekly: "Weekly",
+  code_review: "Code Review",
+  agentic_request: "Agentic",
+  agentic_request_freetrial: "Agentic (Trial)",
+};
+
 function toRecord(value: unknown): Record<string, unknown> {
  return value && typeof value === "object" && !Array.isArray(value)
    ? (value as Record<string, unknown>)
@@ -25,6 +45,37 @@ function normalizePlanCandidate(value: unknown) {
  return trimmed;
 }

+function toTitleCaseWords(value: string) {
+  return value
+    .split(/[\s_-]+/)
+    .filter(Boolean)
+    .map((part) => part.charAt(0).toUpperCase() + part.slice(1))
+    .join(" ");
+}
+
+export function formatQuotaLabel(name: string) {
+  const trimmed = typeof name === "string" ? name.trim() : "";
+  if (!trimmed) return "";
+
+  const mapped = QUOTA_LABEL_MAP[trimmed];
+  if (mapped) return mapped;
+
+  if (/^session\s*\(\d+[hm]\)$/i.test(trimmed)) {
+    return "Session";
+  }
+
+  if (/^weekly\s*\(\d+d\)$/i.test(trimmed)) {
+    return "Weekly";
+  }
+
+  const weeklyModelMatch = trimmed.match(/^weekly\s+(.+?)\s*\(\d+d\)$/i);
+  if (weeklyModelMatch) {
+    return `Weekly ${toTitleCaseWords(weeklyModelMatch[1])}`;
+  }
+
+  return trimmed;
+}
+
 /**
 * Format ISO date string to countdown format (inspired by vscode-antigravity-cockpit)
 * @param {string|Date} date - ISO date string or Date object
@@ -204,6 +255,7 @@ export function parseQuotaData(provider, data) {
        break;

      default:
+        // Generic fallback for unknown providers
        if (data.quotas) {
          Object.entries(data.quotas).forEach(([name, quota]: [string, any]) => {
            normalizedQuotas.push(normalizeQuotaEntry(name, quota));
@@ -337,3 +337,16 @@ button .material-symbols-outlined,
 .traffic-light.green {
  background: var(--color-traffic-green);
 }
+
+/* ── Mobile Layout Fixes (Issue #659) ── */
+@media (max-width: 768px) {
+  .ant-table-wrapper {
+    overflow-x: auto;
+    -webkit-overflow-scrolling: touch;
+    max-width: 100vw;
+  }
+
+  .ant-table {
+    min-width: 600px; /* Prevent columns from crushing together */
+  }
+}
@@ -157,13 +157,15 @@ async function getAntigravityUsage(accessToken) {
 }

 /**
- * Claude Usage
+ * Claude Usage (legacy fallback)
+ * Real Claude OAuth quota windows are fetched in @omniroute/open-sse/services/usage.ts.
 */
-async function getClaudeUsage(accessToken) {
+async function getClaudeUsage() {
  try {
-    // Claude OAuth doesn't expose usage API directly
-    // Could potentially check via inference endpoint
-    return { message: "Claude connected. Usage tracked per request." };
+    return {
+      message:
+        "Claude connected. Detailed quota windows are handled by the open-sse usage service.",
+    };
  } catch (error) {
    return { message: "Unable to fetch Claude usage." };
  }
@@ -57,8 +57,8 @@ export default function DashboardLayout({ children }) {
      >
        <Header onMenuClick={() => setSidebarOpen(true)} />
        <MaintenanceBanner />
-        <div className="flex-1 overflow-y-auto custom-scrollbar p-6 lg:p-10">
-          <div className="max-w-7xl mx-auto">
+        <div className="flex-1 overflow-y-auto overflow-x-hidden custom-scrollbar p-4 sm:p-6 lg:p-10">
+          <div className="max-w-7xl mx-auto w-full">
            <Breadcrumbs />
            {children}
          </div>
@@ -78,6 +78,9 @@ const comboStrategySchema = z.enum([
  "cost-optimized",
  "strict-random",
  "auto",
+  "fill-first",
+  // #729 schema fixes for combo edit/save
+  "p2c",
 ]);

 const comboRuntimeConfigSchema = z
@@ -884,6 +887,7 @@ export const updateComboSchema = z
    system_message: z.string().max(50000).optional(),
    tool_filter_regex: z.string().max(1000).optional(),
    context_cache_protection: z.boolean().optional(),
+    context_length: z.number().int().min(1000).max(2000000).optional(),
  })
  .superRefine((value, ctx) => {
    if (
@@ -895,7 +899,8 @@ export const updateComboSchema = z
      value.allowedProviders === undefined &&
      value.system_message === undefined &&
      value.tool_filter_regex === undefined &&
-      value.context_cache_protection === undefined
+      value.context_cache_protection === undefined &&
+      value.context_length === undefined
    ) {
      ctx.addIssue({
        code: z.ZodIssueCode.custom,
@@ -0,0 +1,184 @@
+import { describe, test } from "node:test";
+import assert from "node:assert/strict";
+import { prepareClaudeRequest } from "../../open-sse/translator/helpers/claudeHelper.ts";
+
+describe("Claude cache_control passthrough", () => {
+  test("preserveCacheControl=true preserves cache_control in system blocks", () => {
+    const body = {
+      system: [
+        { type: "text", text: "System prompt 1" },
+        { type: "text", text: "System prompt 2", cache_control: { type: "ephemeral", ttl: "5m" } },
+      ],
+      messages: [],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", true);
+
+    assert.equal(result.system.length, 2);
+    assert.equal(result.system[0].cache_control, undefined);
+    assert.deepEqual(result.system[1].cache_control, { type: "ephemeral", ttl: "5m" });
+  });
+
+  test("preserveCacheControl=false replaces cache_control in system blocks", () => {
+    const body = {
+      system: [
+        { type: "text", text: "System prompt 1" },
+        { type: "text", text: "System prompt 2", cache_control: { type: "ephemeral", ttl: "5m" } },
+      ],
+      messages: [],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", false);
+
+    assert.equal(result.system.length, 2);
+    assert.equal(result.system[0].cache_control, undefined);
+    assert.deepEqual(result.system[1].cache_control, { type: "ephemeral", ttl: "1h" });
+  });
+
+  test("preserveCacheControl=true preserves cache_control in message content blocks", () => {
+    const body = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "User message 1" },
+            { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } },
+          ],
+        },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "text",
+              text: "Assistant response",
+              cache_control: { type: "ephemeral", ttl: "10m" },
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", true);
+
+    assert.equal(result.messages.length, 2);
+    assert.equal(result.messages[0].content[0].cache_control, undefined);
+    assert.deepEqual(result.messages[0].content[1].cache_control, { type: "ephemeral" });
+    assert.deepEqual(result.messages[1].content[0].cache_control, {
+      type: "ephemeral",
+      ttl: "10m",
+    });
+  });
+
+  test("preserveCacheControl=false strips and re-adds cache_control in messages", () => {
+    const body = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "User message 1" },
+            { type: "text", text: "User message 2", cache_control: { type: "ephemeral" } },
+          ],
+        },
+        {
+          role: "assistant",
+          content: [
+            {
+              type: "text",
+              text: "Assistant response",
+              cache_control: { type: "ephemeral", ttl: "10m" },
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", false);
+
+    // Original cache_control should be stripped and OmniRoute's strategy applied
+    assert.equal(result.messages.length, 2);
+    // User message should not have cache_control (only second-to-last user gets it)
+    assert.equal(result.messages[0].content[0].cache_control, undefined);
+    assert.equal(result.messages[0].content[1].cache_control, undefined);
+    // Last assistant should have cache_control added by OmniRoute
+    assert.deepEqual(result.messages[1].content[0].cache_control, { type: "ephemeral" });
+  });
+
+  test("preserveCacheControl=true preserves cache_control in tools", () => {
+    const body = {
+      messages: [],
+      tools: [
+        { name: "tool1", description: "Tool 1", input_schema: { type: "object" } },
+        {
+          name: "tool2",
+          description: "Tool 2",
+          input_schema: { type: "object" },
+          cache_control: { type: "ephemeral", ttl: "30m" },
+        },
+      ],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", true);
+
+    assert.equal(result.tools.length, 2);
+    assert.equal(result.tools[0].cache_control, undefined);
+    assert.deepEqual(result.tools[1].cache_control, { type: "ephemeral", ttl: "30m" });
+  });
+
+  test("preserveCacheControl=false replaces cache_control in tools", () => {
+    const body = {
+      messages: [],
+      tools: [
+        { name: "tool1", description: "Tool 1", input_schema: { type: "object" } },
+        {
+          name: "tool2",
+          description: "Tool 2",
+          input_schema: { type: "object" },
+          cache_control: { type: "ephemeral", ttl: "30m" },
+        },
+      ],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", false);
+
+    assert.equal(result.tools.length, 2);
+    assert.equal(result.tools[0].cache_control, undefined);
+    assert.deepEqual(result.tools[1].cache_control, { type: "ephemeral", ttl: "1h" });
+  });
+
+  test("preserveCacheControl=true with Claude Code-style caching", () => {
+    const body = {
+      system: [{ type: "text", text: "System", cache_control: { type: "ephemeral", ttl: "5m" } }],
+      messages: [
+        {
+          role: "user",
+          content: [{ type: "text", text: "Turn 1", cache_control: { type: "ephemeral" } }],
+        },
+        {
+          role: "assistant",
+          content: [{ type: "text", text: "Response 1" }],
+        },
+        {
+          role: "user",
+          content: [{ type: "text", text: "Turn 2" }],
+        },
+      ],
+      tools: [
+        {
+          name: "bash",
+          description: "Execute bash",
+          input_schema: { type: "object" },
+          cache_control: { type: "ephemeral", ttl: "5m" },
+        },
+      ],
+    };
+
+    const result = prepareClaudeRequest(body, "claude", true);
+
+    // All original cache_control should be preserved
+    assert.deepEqual(result.system[0].cache_control, { type: "ephemeral", ttl: "5m" });
+    assert.deepEqual(result.messages[0].content[0].cache_control, { type: "ephemeral" });
+    assert.equal(result.messages[1].content[0].cache_control, undefined);
+    assert.equal(result.messages[2].content[0].cache_control, undefined);
+    assert.deepEqual(result.tools[0].cache_control, { type: "ephemeral", ttl: "5m" });
+  });
+});
@@ -44,3 +44,12 @@ test("remaining percentage helpers reflect remaining quota and stale resets refi
  assert.equal(parsed.length, 1);
  assert.equal(providerLimitUtils.calculatePercentage(parsed[0].used, parsed[0].total), 100);
 });
+
+test("quota labels normalize session and weekly windows while preserving readable titles", () => {
+  assert.equal(providerLimitUtils.formatQuotaLabel("session"), "Session");
+  assert.equal(providerLimitUtils.formatQuotaLabel("session (5h)"), "Session");
+  assert.equal(providerLimitUtils.formatQuotaLabel("weekly"), "Weekly");
+  assert.equal(providerLimitUtils.formatQuotaLabel("weekly (7d)"), "Weekly");
+  assert.equal(providerLimitUtils.formatQuotaLabel("weekly sonnet (7d)"), "Weekly Sonnet");
+  assert.equal(providerLimitUtils.formatQuotaLabel("code_review"), "Code Review");
+});
Author	SHA1	Message	Date
Diego Rodrigues de Sa e Souza	f171b7de96	Merge pull request #730 from diegosouzapw/release/v3.2.3 Build Electron Desktop App / Validate version (push) Failing after 40s Details Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped Details Build Electron Desktop App / Build Electron (linux) (push) Has been skipped Details Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped Details Build Electron Desktop App / Build Electron (windows) (push) Has been skipped Details Build Electron Desktop App / Create Release (push) Has been skipped Details Build Electron Desktop App / Publish to npm (push) Has been skipped Details chore(release): v3.2.3 — Enhancements and Bugfixes	2026-03-28 23:21:55 -03:00
diegosouzapw	c0cbf00199	chore(release): v3.2.3 — Enhancements and Bugfixes	2026-03-28 23:19:01 -03:00
diegosouzapw	0cd6e59fb9	Merge cache-control fix and resolve changelog conflict	2026-03-28 23:13:03 -03:00
diegosouzapw	11a8adc71c	Merge branch 'feat/issue-659-mobile-ui'	2026-03-28 23:12:26 -03:00
diegosouzapw	b9c7fd879f	fix(core): resolve routing schemas, CLI streaming leaks, and thinking tag extraction	2026-03-28 23:11:22 -03:00
Diego Rodrigues de Sa e Souza	2fc4c7ea33	Merge pull request #728 from rdself/codex/normalize-provider-limits-labels normalize provider limits labels	2026-03-28 23:06:16 -03:00
R.D.	538028c150	normalize provider limits quota labels	2026-03-28 21:17:07 -04:00
diegosouzapw	94a00cb6d6	feat: improve dashboard layout for smaller screens (#659 )	2026-03-28 21:53:07 -03:00
tombii	b84c915b23	fix(sse): preserve cache_control in Claude passthrough mode When Claude Code routes through OmniRoute (Claude → OmniRoute → Claude), OmniRoute was stripping all cache_control markers and replacing them with its own generic caching strategy. This broke Claude Code's carefully placed cache breakpoints for plans and other features. Changes: - Add preserveCacheControl parameter to prepareClaudeRequest() - Detect Claude passthrough mode (sourceFormat === targetFormat === CLAUDE) - Skip cache_control normalization when preserveCacheControl=true - Preserve client's cache_control markers in system, messages, and tools This ensures Claude Code's prompt caching optimization works correctly while maintaining OmniRoute's caching strategy for translation scenarios. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-28 16:30:41 +01:00