chore(release): v3.3.7 — OpenCode config fix, i18n keys fix

chore(release): bump version to v3.3.7
fix: resolve opencode json structure to use record mapping instead of array (#816 )
2026-03-30 19:30:18 -03:00 · 2026-03-30 19:28:20 -03:00 · 2026-03-30 19:23:25 -03:00 · 2026-03-30 19:23:14 -03:00 · 2026-03-30 19:16:50 -03:00 · 2026-03-30 18:24:15 -03:00
81 changed files with 2370 additions and 495 deletions
@@ -96,7 +96,18 @@ Keep an empty `## [Unreleased]` section above it.
 // turbo

 ```bash
-VERSION=$(node -p "require('./package.json').version") && sed -i "s/  version: .*/  version: $VERSION/" docs/openapi.yaml && echo "✓ openapi.yaml → $VERSION"
+VERSION=$(node -p "require('./package.json').version")
+sed -i "s/  version: .*/  version: $VERSION/" docs/openapi.yaml
+echo "✓ openapi.yaml → $VERSION"
+
+for dir in electron open-sse; do
+  if [ -d "$dir" ] && [ -f "$dir/package.json" ]; then
+    (cd "$dir" && npm version "$VERSION" --no-git-tag-version --allow-same-version > /dev/null)
+    echo "✓ $dir/package.json → $VERSION"
+  fi
+done
+# Re-run install to assert the workspace lockfile is updated
+npm install
 ```

 ### 6. Update README.md and i18n docs
@@ -57,17 +57,18 @@ jobs:
      - name: Resolve version and dist-tag
        id: resolve
        run: |
-          case "${{ github.event_name }}" in
-            workflow_dispatch|workflow_call)
-              VERSION="${{ inputs.version }}"
-              TAG="${{ inputs.tag }}"
-              ;;
-            release)
+          VERSION="${{ inputs.version }}"
+          TAG="${{ inputs.tag }}"
+
+          if [ -z "$VERSION" ]; then
+            if [ "${{ github.event_name }}" = "release" ]; then
              VERSION="${GITHUB_REF_NAME}"
-              ;;
-          esac
+            fi
+          fi
+
          # Strip v prefix if present
          VERSION="${VERSION#v}"
+
          # Default dist-tag logic
          if [ -z "$TAG" ]; then
            if [[ "$VERSION" == *-* ]]; then
@@ -3,6 +3,40 @@
 ## [Unreleased]

 ---
+
+## [3.3.7] - 2026-03-30
+
+### 🐛 Bug Fixes
+
+- **OpenCode Config:** Restructured generated `opencode.json` to use the `@ai-sdk/openai-compatible` record-based schema with `options` and `models` as object maps instead of flat arrays, fixing config validation failures (#816)
+- **i18n Missing Keys:** Added missing `cloudflaredUrlNotice` translation key across all 30 language files to prevent `MISSING_MESSAGE` console errors in the Endpoint page (#823)
+
+---
+
+## [3.3.6] - 2026-03-30
+
+### 🐛 Bug Fixes
+
+- **Token Accounting:** Included prompt cache tokens safely in historical usage inputs calculations for correct quota deductions (PR #822)
+- **Combo Test Probes:** Fixed combo testing logic false negatives by resolving parsing for reasoning-only responses and enabled massive parallelization via Promise.all (PR #828)
+- **Docker Quick Tunnels:** Embedded required ca-certificates inside the base runtime container to resolve Cloudflared TLS startup failures, and surfaced stdout network errors replacing generic exit codes (PR #829)
+
+---
+
+## [3.3.5] - 2026-03-30
+
+### ✨ New Features
+
+- **Gemini Quota Tracking:** Added real-time Gemini CLI quota tracking via the `retrieveUserQuota` API (PR #825)
+- **Cache Dashboard:** Enhanced the Cache Dashboard to display prompt cache metrics, 24h trends, and estimated cost savings (PR #824)
+
+### 🐛 Bug Fixes
+
+- **User Experience:** Removed invasive auto-opening OAuth modal loops on barren provider detailed pages (PR #820)
+- **Dependency Updates:** Bumped and locked down dependencies for development and production trees including Next.js 16.2.1, Recharts, and TailwindCSS 4.2.2 (PR #826, #827)
+
+---
+
 ## [3.3.4] - 2026-03-30

 ### ✨ New Features
@@ -2,7 +2,7 @@ FROM node:22-bookworm-slim AS builder
 WORKDIR /app

 RUN apt-get update \
-  && apt-get install -y --no-install-recommends libsecret-1-0 \
+  && apt-get install -y --no-install-recommends libsecret-1-0 ca-certificates \
  && rm -rf /var/lib/apt/lists/*

 COPY package*.json ./
@@ -30,7 +30,7 @@ ENV NODE_OPTIONS="--max-old-space-size=256"
 # Data directory inside Docker — must match the volume mount in docker-compose.yml
 ENV DATA_DIR=/app/data
 RUN apt-get update \
-  && apt-get install -y --no-install-recommends libsecret-1-0 \
+  && apt-get install -y --no-install-recommends libsecret-1-0 ca-certificates \
  && rm -rf /var/lib/apt/lists/*
 RUN mkdir -p /app/data

@@ -882,6 +882,7 @@ Notes:

 - Quick Tunnel URLs are temporary and change after every restart.
 - Managed install currently supports Linux, macOS, and Windows on `x64` / `arm64`.
+- Docker images bundle system CA roots and pass them to managed `cloudflared`, which avoids TLS trust failures when the tunnel bootstraps inside the container.
 - Set `CLOUDFLARED_BIN=/absolute/path/to/cloudflared` if you want OmniRoute to use an existing binary instead of downloading one.

 **Using Docker Compose with Caddy (HTTPS Auto-TLS):**
@@ -1,7 +1,7 @@
 openapi: 3.1.0
 info:
  title: OmniRoute API
-  version: 3.3.4
+  version: 3.3.7
  description: |
    OmniRoute is a local-first AI API proxy router. It provides an OpenAI-compatible
    endpoint that routes requests to multiple AI providers with load balancing,
@@ -1,6 +1,6 @@
 {
  "name": "omniroute-desktop",
-  "version": "2.3.13",
+  "version": "3.3.7",
  "description": "OmniRoute Desktop Application",
  "main": "main.js",
  "author": {
@@ -226,23 +226,18 @@ export const REGISTRY: Record<string, RegistryEntry> = {
    oauth: {
      clientIdEnv: "GEMINI_CLI_OAUTH_CLIENT_ID",
      clientIdDefault: "681255809395-oo8ft2oprdrnp9e3aqf6av3hmdib135j.apps.googleusercontent.com",
-      clientSecretEnv: "GEMINI_CLI_OAUTH_CLIENT_SECRET",
+      clientSecretEnv: "GEMINI_OAUTH_CLIENT_SECRET",
      clientSecretDefault: "",
    },
    models: [
-      { id: "gemini-3.1-pro-high", name: "Gemini 3.1 Pro High" },
-      { id: "gemini-3.1-pro-low", name: "Gemini 3.1 Pro Low" },
-      { id: "gemini-3.1-pro", name: "Gemini 3.1 Pro" },
-      { id: "gemini-3-1-pro", name: "Gemini 3.1 Pro (Alt ID)" },
+      { id: "gemini-3-pro-preview", name: "Gemini 3 Pro Preview" },
      { id: "gemini-3.1-pro-preview", name: "Gemini 3.1 Pro Preview" },
-      { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
+      { id: "gemini-3.1-pro-preview-customtools", name: "Gemini 3.1 Pro Preview Custom Tools" },
      { id: "gemini-3-flash-preview", name: "Gemini 3 Flash Preview" },
+      { id: "gemini-3.1-flash-lite-preview", name: "Gemini 3.1 Flash Lite Preview" },
      { id: "gemini-2.5-pro", name: "Gemini 2.5 Pro" },
      { id: "gemini-2.5-flash", name: "Gemini 2.5 Flash" },
      { id: "gemini-2.5-flash-lite", name: "Gemini 2.5 Flash Lite" },
-      { id: "gemini-2.0-flash", name: "Gemini 2.0 Flash" },
-      { id: "gemini-1.5-pro", name: "Gemini 1.5 Pro" },
-      { id: "gemini-1.5-flash", name: "Gemini 1.5 Flash" },
    ],
  },

@@ -32,7 +32,11 @@ import {
  appendRequestLog,
  saveCallLog,
 } from "@/lib/usageDb";
-import { getLoggedInputTokens, getLoggedOutputTokens } from "@/lib/usage/tokenAccounting";
+import {
+  getLoggedInputTokens,
+  getLoggedOutputTokens,
+  formatUsageLog,
+} from "@/lib/usage/tokenAccounting";
 import { recordCost } from "@/domain/costRules";
 import { calculateCost } from "@/lib/usage/costCalculator";
 import { CLAUDE_OAUTH_TOOL_PREFIX } from "../translator/request/openai-to-claude.ts";
@@ -1432,7 +1436,7 @@ export async function handleChatCore({
    // Save structured call log with full payloads
    const cacheUsageLogMeta = buildCacheUsageLogMeta(usage);
    if (usage && typeof usage === "object") {
-      const msg = `[${new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit" })}] 📊 [USAGE] ${provider.toUpperCase()} | in=${getLoggedInputTokens(usage)} | out=${getLoggedOutputTokens(usage)}${connectionId ? ` | account=${connectionId.slice(0, 8)}...` : ""}`;
+      const msg = `[${new Date().toLocaleTimeString("en-US", { hour12: false, hour: "2-digit", minute: "2-digit" })}] 📊 [USAGE] ${provider.toUpperCase()} | ${formatUsageLog(usage)}${connectionId ? ` | account=${connectionId.slice(0, 8)}...` : ""}`;
      console.log(`${COLORS.green}${msg}${COLORS.reset}`);

      // Track cache token metrics
@@ -408,7 +408,7 @@ function convertOpenAINonStreamingToClaude(openaiResponse: JsonRecord): JsonReco
  const choiceObj = toRecord(choice);
  const messageObj = toRecord(choiceObj.message);

-  const content = [];
+  const content: JsonRecord[] = [];

  let hasTextOrReasoning = false;

@@ -46,11 +46,18 @@ export function extractUsageFromResponse(responseBody, provider) {
    (responseBody.usage.input_tokens !== undefined ||
      responseBody.usage.output_tokens !== undefined)
  ) {
+    const inputTokens = responseBody.usage.input_tokens || 0;
+    const cacheRead = responseBody.usage.cache_read_input_tokens || 0;
+    const cacheCreation = responseBody.usage.cache_creation_input_tokens || 0;
+
+    // Total prompt tokens = input + cache_read + cache_creation (per Claude API docs)
+    const promptTokens = inputTokens + cacheRead + cacheCreation;
+
    return {
-      prompt_tokens: responseBody.usage.input_tokens || 0,
+      prompt_tokens: promptTokens,
      completion_tokens: responseBody.usage.output_tokens || 0,
-      cache_read_input_tokens: responseBody.usage.cache_read_input_tokens,
-      cache_creation_input_tokens: responseBody.usage.cache_creation_input_tokens,
+      cache_read_input_tokens: cacheRead,
+      cache_creation_input_tokens: cacheCreation,
    };
  }

@@ -60,6 +60,12 @@ export {
  getSessionSnapshotInput,
  getSessionSnapshotOutput,
  getSessionSnapshotTool,
+  cacheStatsInput,
+  cacheStatsOutput,
+  cacheStatsTool,
+  cacheFlushInput,
+  cacheFlushOutput,
+  cacheFlushTool,
 } from "./tools.ts";

 // A2A schemas
@@ -806,11 +806,73 @@ export const syncPricingTool: McpToolDefinition<typeof syncPricingInput, typeof
    sourceEndpoints: ["/api/pricing/sync"],
  };

+// ============ Cache Tools ============
+
+export const cacheStatsInput = z.object({}).describe("No parameters required");
+
+export const cacheStatsOutput = z.object({
+  semanticCache: z.object({
+    memoryEntries: z.number(),
+    dbEntries: z.number(),
+    hits: z.number(),
+    misses: z.number(),
+    hitRate: z.string(),
+    tokensSaved: z.number(),
+  }),
+  promptCache: z
+    .object({
+      totalRequests: z.number(),
+      requestsWithCacheControl: z.number(),
+      totalCachedTokens: z.number(),
+      totalCacheCreationTokens: z.number(),
+      estimatedCostSaved: z.number(),
+    })
+    .nullable(),
+  idempotency: z.object({
+    activeKeys: z.number(),
+    windowMs: z.number(),
+  }),
+});
+
+export const cacheStatsTool: McpToolDefinition<typeof cacheStatsInput, typeof cacheStatsOutput> = {
+  name: "omniroute_cache_stats",
+  description:
+    "Returns cache statistics including semantic cache hit rate, prompt cache metrics by provider, and idempotency layer stats.",
+  inputSchema: cacheStatsInput,
+  outputSchema: cacheStatsOutput,
+  scopes: ["read:cache"],
+  auditLevel: "basic",
+  phase: 2,
+  sourceEndpoints: ["/api/cache"],
+};
+
+export const cacheFlushInput = z.object({
+  signature: z.string().optional().describe("Specific cache signature to invalidate"),
+  model: z.string().optional().describe("Invalidate all entries for a specific model"),
+});
+
+export const cacheFlushOutput = z.object({
+  ok: z.boolean(),
+  invalidated: z.number().optional(),
+  scope: z.string().optional(),
+});
+
+export const cacheFlushTool: McpToolDefinition<typeof cacheFlushInput, typeof cacheFlushOutput> = {
+  name: "omniroute_cache_flush",
+  description:
+    "Flush cache entries. Provide signature to invalidate a single entry, model to invalidate all entries for a model, or omit both to clear all.",
+  inputSchema: cacheFlushInput,
+  outputSchema: cacheFlushOutput,
+  scopes: ["write:cache"],
+  auditLevel: "full",
+  phase: 2,
+  sourceEndpoints: ["/api/cache"],
+};
+
 // ============ Tool Registry ============

 /** All MCP tool definitions, ordered by phase then name */
 export const MCP_TOOLS = [
-  // Phase 1: Essential
  getHealthTool,
  listCombosTool,
  getComboMetricsTool,
@@ -819,7 +881,6 @@ export const MCP_TOOLS = [
  routeRequestTool,
  costReportTool,
  listModelsCatalogTool,
-  // Phase 2: Advanced
  simulateRouteTool,
  setBudgetGuardTool,
  setRoutingStrategyTool,
@@ -830,6 +891,8 @@ export const MCP_TOOLS = [
  explainRouteTool,
  getSessionSnapshotTool,
  syncPricingTool,
+  cacheStatsTool,
+  cacheFlushTool,
 ] as const;

 /** Essential tools only (Phase 1) */
@@ -1,6 +1,6 @@
 {
  "name": "@omniroute/open-sse",
-  "version": "0.0.1",
+  "version": "3.3.7",
  "description": "Express SSE sidecar for OmniRoute — handles streaming, protocol translation, and provider orchestration",
  "type": "module",
  "main": "index.js",
@@ -159,13 +159,13 @@ async function getGlmUsage(apiKey: string, providerSpecificData?: Record<string,
 * @returns {Promise<unknown>} Usage data with quotas
 */
 export async function getUsageForProvider(connection) {
-  const { provider, accessToken, apiKey, providerSpecificData } = connection;
+  const { provider, accessToken, apiKey, providerSpecificData, projectId } = connection;

  switch (provider) {
    case "github":
      return await getGitHubUsage(accessToken, providerSpecificData);
    case "gemini-cli":
-      return await getGeminiUsage(accessToken);
+      return await getGeminiUsage(accessToken, providerSpecificData, projectId);
    case "antigravity":
      return await getAntigravityUsage(accessToken, undefined);
    case "claude":
@@ -195,24 +195,22 @@ function parseResetTime(resetValue) {
  if (!resetValue) return null;

  try {
-    // If it's already a Date object
+    let date;
    if (resetValue instanceof Date) {
-      return resetValue.toISOString();
+      date = resetValue;
+    } else if (typeof resetValue === "number") {
+      date = new Date(resetValue);
+    } else if (typeof resetValue === "string") {
+      date = new Date(resetValue);
+    } else {
+      return null;
    }

-    // If it's a number (Unix timestamp in milliseconds)
-    if (typeof resetValue === "number") {
-      return new Date(resetValue).toISOString();
-    }
+    // Epoch-zero (1970-01-01) means no scheduled reset — treat as null
+    if (date.getTime() <= 0) return null;

-    // If it's a string (ISO date or parseable date string)
-    if (typeof resetValue === "string") {
-      return new Date(resetValue).toISOString();
-    }
-
-    return null;
+    return date.toISOString();
  } catch (error) {
-    console.warn(`Failed to parse reset time: ${resetValue}`, error);
    return null;
  }
 }
@@ -417,36 +415,180 @@ function inferGitHubPlanName(data: JsonRecord, premiumQuota: UsageQuota | null):
  return "GitHub Copilot";
 }

+// ── Gemini CLI subscription info cache ──────────────────────────────────────
+// Prevents duplicate loadCodeAssist calls within the same quota cycle.
+// Key: accessToken → { data, fetchedAt }
+const _geminiCliSubCache = new Map();
+const GEMINI_CLI_CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
+
 /**
- * Gemini CLI Usage (Google Cloud)
+ * Gemini CLI Usage — fetch per-model quota from Cloud Code Assist API.
+ * Gemini CLI and Antigravity share the same upstream (cloudcode-pa.googleapis.com),
+ * so this follows the same pattern as getAntigravityUsage().
 */
-async function getGeminiUsage(accessToken) {
+async function getGeminiUsage(accessToken, providerSpecificData?, connectionProjectId?) {
+  if (!accessToken) {
+    return { plan: "Free", message: "Gemini CLI access token not available." };
+  }
+
  try {
-    // Gemini CLI uses Google Cloud quotas
-    // Try to get quota info from Cloud Resource Manager
+    const subscriptionInfo = await getGeminiCliSubscriptionInfoCached(accessToken);
+    const projectId =
+      connectionProjectId ||
+      providerSpecificData?.projectId ||
+      subscriptionInfo?.cloudaicompanionProject ||
+      null;
+
+    const plan = getGeminiCliPlanLabel(subscriptionInfo);
+
+    if (!projectId) {
+      return { plan, message: "Gemini CLI project ID not available." };
+    }
+
+    // Use retrieveUserQuota (same endpoint as Gemini CLI /stats command).
+    // Returns per-model buckets with remainingFraction and resetTime.
    const response = await fetch(
-      "https://cloudresourcemanager.googleapis.com/v1/projects?filter=lifecycleState:ACTIVE",
+      "https://cloudcode-pa.googleapis.com/v1internal:retrieveUserQuota",
      {
+        method: "POST",
        headers: {
          Authorization: `Bearer ${accessToken}`,
-          Accept: "application/json",
+          "Content-Type": "application/json",
        },
+        body: JSON.stringify({ project: projectId }),
+        signal: AbortSignal.timeout(10000),
      }
    );

    if (!response.ok) {
-      // Quota API may not be accessible, return generic message
-      return {
-        message: "Gemini CLI uses Google Cloud quotas. Check Google Cloud Console for details.",
-      };
+      return { plan, message: `Gemini CLI quota error (${response.status}).` };
    }

-    return { message: "Gemini CLI connected. Usage tracked via Google Cloud Console." };
+    const data = await response.json();
+    const quotas: Record<string, UsageQuota> = {};
+
+    if (Array.isArray(data.buckets)) {
+      for (const bucket of data.buckets) {
+        if (!bucket.modelId || bucket.remainingFraction == null) continue;
+
+        const remainingFraction = toNumber(bucket.remainingFraction, 0);
+        const remainingPercentage = remainingFraction * 100;
+        const QUOTA_NORMALIZED_BASE = 1000;
+        const total = QUOTA_NORMALIZED_BASE;
+        const remaining = Math.round(total * remainingFraction);
+        const used = Math.max(0, total - remaining);
+
+        quotas[bucket.modelId] = {
+          used,
+          total,
+          resetAt: parseResetTime(bucket.resetTime),
+          remainingPercentage,
+          unlimited: false,
+        };
+      }
+    }
+
+    return { plan, quotas };
  } catch (error) {
-    return { message: "Unable to fetch Gemini usage. Check Google Cloud Console." };
+    return { message: `Gemini CLI error: ${(error as Error).message}` };
  }
 }

+/**
+ * Get Gemini CLI subscription info (cached, 5 min TTL)
+ */
+async function getGeminiCliSubscriptionInfoCached(accessToken) {
+  const cacheKey = accessToken;
+  const cached = _geminiCliSubCache.get(cacheKey);
+
+  if (cached && Date.now() - cached.fetchedAt < GEMINI_CLI_CACHE_TTL_MS) {
+    return cached.data;
+  }
+
+  const data = await getGeminiCliSubscriptionInfo(accessToken);
+  _geminiCliSubCache.set(cacheKey, { data, fetchedAt: Date.now() });
+  return data;
+}
+
+/**
+ * Get Gemini CLI subscription info using correct headers.
+ */
+async function getGeminiCliSubscriptionInfo(accessToken) {
+  try {
+    const response = await fetch("https://cloudcode-pa.googleapis.com/v1internal:loadCodeAssist", {
+      method: "POST",
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        metadata: {
+          ideType: "IDE_UNSPECIFIED",
+          platform: "PLATFORM_UNSPECIFIED",
+          pluginType: "GEMINI",
+        },
+      }),
+    });
+
+    if (!response.ok) return null;
+
+    return await response.json();
+  } catch {
+    return null;
+  }
+}
+
+/**
+ * Map Gemini CLI subscription tier to display label (same tiers as Antigravity).
+ */
+function getGeminiCliPlanLabel(subscriptionInfo) {
+  if (!subscriptionInfo || Object.keys(subscriptionInfo).length === 0) return "Free";
+
+  let tierId = "";
+  if (Array.isArray(subscriptionInfo.allowedTiers)) {
+    for (const tier of subscriptionInfo.allowedTiers) {
+      if (tier.isDefault && tier.id) {
+        tierId = tier.id.trim().toUpperCase();
+        break;
+      }
+    }
+  }
+
+  if (!tierId) {
+    tierId = (subscriptionInfo.currentTier?.id || "").toUpperCase();
+  }
+
+  if (tierId) {
+    if (tierId.includes("ULTRA")) return "Ultra";
+    if (tierId.includes("PRO")) return "Pro";
+    if (tierId.includes("ENTERPRISE")) return "Enterprise";
+    if (tierId.includes("BUSINESS") || tierId.includes("STANDARD")) return "Business";
+    if (tierId.includes("FREE") || tierId.includes("INDIVIDUAL") || tierId.includes("LEGACY"))
+      return "Free";
+  }
+
+  const tierName =
+    subscriptionInfo.currentTier?.name ||
+    subscriptionInfo.currentTier?.displayName ||
+    subscriptionInfo.subscriptionType ||
+    subscriptionInfo.tier ||
+    "";
+  const upper = tierName.toUpperCase();
+
+  if (upper.includes("ULTRA")) return "Ultra";
+  if (upper.includes("PRO")) return "Pro";
+  if (upper.includes("ENTERPRISE")) return "Enterprise";
+  if (upper.includes("STANDARD") || upper.includes("BUSINESS")) return "Business";
+  if (upper.includes("INDIVIDUAL") || upper.includes("FREE")) return "Free";
+
+  if (subscriptionInfo.currentTier?.upgradeSubscriptionType) return "Free";
+  if (tierName) {
+    return tierName.charAt(0).toUpperCase() + tierName.slice(1).toLowerCase();
+  }
+
+  return "Free";
+}
+
 // ── Antigravity subscription info cache ──────────────────────────────────────
 // Prevents duplicate loadCodeAssist calls within the same quota cycle.
 // Key: truncated accessToken → { data, fetchedAt }
@@ -72,12 +72,7 @@ const DETERMINISTIC_STRATEGIES: Set<RoutingStrategyValue> = new Set(["priority",
 /**
 * Providers that support prompt caching
 */
-const CACHING_PROVIDERS = new Set([
-  "claude",
-  "anthropic",
-  "zai",
-  "qwen", // Alibaba Qwen Coding Plan International
-]);
+const CACHING_PROVIDERS = new Set(["claude", "anthropic", "zai", "qwen", "deepseek"]);

 /**
 * Detect if the client is Claude Code or another caching-aware client
@@ -1,12 +1,12 @@
 {
  "name": "omniroute",
-  "version": "3.3.4",
+  "version": "3.3.7",
  "lockfileVersion": 3,
  "requires": true,
  "packages": {
    "": {
      "name": "omniroute",
-      "version": "3.3.4",
+      "version": "3.3.7",
      "hasInstallScript": true,
      "license": "MIT",
      "workspaces": [
@@ -20324,7 +20324,7 @@
    },
    "open-sse": {
      "name": "@omniroute/open-sse",
-      "version": "0.0.1"
+      "version": "3.3.7"
    }
  }
 }
@@ -1,6 +1,6 @@
 {
  "name": "omniroute",
-  "version": "3.3.4",
+  "version": "3.3.7",
  "description": "Smart AI Router with auto fallback — route to FREE & cheap models, zero downtime. Works with Cursor, Cline, Claude Desktop, Codex, and any OpenAI-compatible tool.",
  "type": "module",
  "bin": {
@@ -0,0 +1,136 @@
+"use client";
+
+import { useEffect, useState } from "react";
+import { Card } from "@/shared/components";
+import { useTranslations } from "next-intl";
+
+export default function DiversityScoreCard() {
+  const [data, setData] = useState<any>(null);
+  const [loading, setLoading] = useState(true);
+  const t = useTranslations("analytics");
+
+  useEffect(() => {
+    fetch("/api/analytics/diversity")
+      .then((res) => res.json())
+      .then((json) => {
+        setData(json);
+        setLoading(false);
+      })
+      .catch((err) => {
+        console.error(err);
+        setLoading(false);
+      });
+  }, []);
+
+  if (loading || !data) {
+    return (
+      <Card className="p-5 flex flex-col justify-center items-center h-full min-h-[200px]">
+        <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-primary"></div>
+      </Card>
+    );
+  }
+
+  const scorePercentage = Math.round((data.score || 0) * 100);
+
+  let riskColor = "text-green-500";
+  let gaugeColor = "bg-green-500";
+  let riskLabel = "Healthy Distribution";
+
+  if (scorePercentage < 40) {
+    riskColor = "text-red-500";
+    gaugeColor = "bg-red-500";
+    riskLabel = "High Vendor Lock-in Risk";
+  } else if (scorePercentage < 70) {
+    riskColor = "text-amber-500";
+    gaugeColor = "bg-amber-500";
+    riskLabel = "Moderate Distribution";
+  }
+
+  return (
+    <Card className="p-5 flex flex-col h-full bg-[var(--card-bg,#1e1e2e)] relative overflow-hidden group">
+      <div className="flex items-center gap-2 mb-4">
+        <span className="material-symbols-outlined text-[20px] text-cyan-400">pie_chart</span>
+        <h3 className="font-semibold text-[var(--text-primary,#fff)] flex-1">
+          Provider Diversity Score
+        </h3>
+        <span
+          className={`text-xs px-2 py-0.5 rounded-md border ${gaugeColor.replace("bg-", "border-").replace("500", "500/20")} ${gaugeColor.replace("bg-", "bg-").replace("500", "500/10")} ${riskColor}`}
+        >
+          Shannon Entropy
+        </span>
+      </div>
+
+      <div className="flex items-center justify-between mt-2 mb-6">
+        <div className="flex flex-col">
+          <span className={`text-4xl font-bold tabular-nums tracking-tight ${riskColor}`}>
+            {scorePercentage}%
+          </span>
+          <span className="text-sm text-[var(--text-muted,#aaaaaa)] mt-1">{riskLabel}</span>
+        </div>
+
+        {/* Simple CSS Donut */}
+        <div className="relative w-20 h-20 flex-shrink-0">
+          <svg className="w-full h-full transform -rotate-90" viewBox="0 0 36 36">
+            <path
+              className="text-[var(--border,#333)]"
+              strokeWidth="4"
+              stroke="currentColor"
+              fill="none"
+              d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
+            />
+            <path
+              className={riskColor}
+              strokeWidth="4"
+              strokeDasharray={`${scorePercentage}, 100`}
+              stroke="currentColor"
+              fill="none"
+              strokeLinecap="round"
+              d="M18 2.0845 a 15.9155 15.9155 0 0 1 0 31.831 a 15.9155 15.9155 0 0 1 0 -31.831"
+            />
+          </svg>
+        </div>
+      </div>
+
+      <div className="space-y-4 flex-1">
+        <p className="text-xs uppercase tracking-wider font-semibold text-[var(--text-muted,#888)]">
+          Provider Share
+        </p>
+
+        {Object.keys(data.providers || {}).length === 0 ? (
+          <div className="text-sm text-[var(--text-secondary,#666)] py-2">
+            No recent usage data available.
+          </div>
+        ) : (
+          <div className="space-y-3">
+            {Object.entries(data.providers)
+              .sort(([, a]: any, [, b]: any) => b.share - a.share)
+              .slice(0, 4) // Top 4 providers
+              .map(([provider, stat]: [string, any]) => (
+                <div key={provider} className="flex flex-col gap-1.5">
+                  <div className="flex items-center justify-between text-sm">
+                    <span className="font-medium text-[var(--text-primary,#ddd)] capitalize">
+                      {provider}
+                    </span>
+                    <span className="font-mono text-[var(--text-muted,#aaa)]">
+                      {Math.round(stat.share * 100)}%
+                    </span>
+                  </div>
+                  <div className="w-full h-1.5 bg-[var(--surface,#333)] rounded-full overflow-hidden">
+                    <div
+                      className={`h-full ${gaugeColor} rounded-full`}
+                      style={{ width: `${Math.round(stat.share * 100)}%` }}
+                    />
+                  </div>
+                </div>
+              ))}
+          </div>
+        )}
+      </div>
+
+      <div className="mt-4 pt-4 border-t border-[var(--border,#333)] flex justify-between text-[11px] text-[var(--text-muted,#777)]">
+        <span>Window: {data.windowSize} reqs</span>
+        <span>Based on Last {Math.round(data.ttlMs / 60000)} mins</span>
+      </div>
+    </Card>
+  );
+}
@@ -4,6 +4,7 @@ import { useState, Suspense } from "react";
 import { UsageAnalytics, CardSkeleton, SegmentedControl } from "@/shared/components";
 import EvalsTab from "../usage/components/EvalsTab";
 import SearchAnalyticsTab from "./SearchAnalyticsTab";
+import DiversityScoreCard from "./components/DiversityScoreCard";
 import { useTranslations } from "next-intl";

 export default function AnalyticsPage() {
@@ -38,9 +39,14 @@ export default function AnalyticsPage() {
      />

      {activeTab === "overview" && (
-        <Suspense fallback={<CardSkeleton />}>
-          <UsageAnalytics />
-        </Suspense>
+        <div className="flex flex-col gap-6">
+          <div className="grid grid-cols-1 md:grid-cols-3 gap-6">
+            <DiversityScoreCard />
+          </div>
+          <Suspense fallback={<CardSkeleton />}>
+            <UsageAnalytics />
+          </Suspense>
+        </div>
      )}
      {activeTab === "evals" && <EvalsTab />}
      {activeTab === "search" && <SearchAnalyticsTab />}
@@ -0,0 +1,174 @@
+"use client";
+
+import { useState, useEffect, useCallback } from "react";
+import { Button } from "@/shared/components";
+import { useTranslations } from "next-intl";
+
+interface CacheEntry {
+  id: string;
+  signature: string;
+  model: string;
+  hit_count: number;
+  tokens_saved: number;
+  created_at: string;
+  expires_at: string;
+}
+
+interface Pagination {
+  page: number;
+  limit: number;
+  total: number;
+  totalPages: number;
+}
+
+export default function CacheEntriesTab() {
+  const t = useTranslations("cache");
+  const [entries, setEntries] = useState<CacheEntry[]>([]);
+  const [pagination, setPagination] = useState<Pagination>({
+    page: 1,
+    limit: 20,
+    total: 0,
+    totalPages: 0,
+  });
+  const [loading, setLoading] = useState(true);
+  const [search, setSearch] = useState("");
+  const [deleting, setDeleting] = useState<string | null>(null);
+
+  const fetchEntries = useCallback(
+    async (page = 1) => {
+      setLoading(true);
+      try {
+        const params = new URLSearchParams({ page: String(page), limit: String(pagination.limit) });
+        if (search) params.set("search", search);
+
+        const res = await fetch(`/api/cache/entries?${params}`);
+        if (res.ok) {
+          const data = await res.json();
+          setEntries(data.entries);
+          setPagination(data.pagination);
+        }
+      } catch {
+        // ignore
+      } finally {
+        setLoading(false);
+      }
+    },
+    [search, pagination.limit]
+  );
+
+  useEffect(() => {
+    fetchEntries();
+  }, [fetchEntries]);
+
+  const handleDelete = async (signature: string) => {
+    setDeleting(signature);
+    try {
+      await fetch(`/api/cache/entries?signature=${encodeURIComponent(signature)}`, {
+        method: "DELETE",
+      });
+      await fetchEntries(pagination.page);
+    } finally {
+      setDeleting(null);
+    }
+  };
+
+  const formatDate = (dateStr: string) => {
+    return new Date(dateStr).toLocaleString();
+  };
+
+  return (
+    <div className="flex flex-col gap-4">
+      <div className="flex items-center gap-3">
+        <input
+          type="text"
+          placeholder={t("searchEntries")}
+          value={search}
+          onChange={(e) => setSearch(e.target.value)}
+          onKeyDown={(e) => e.key === "Enter" && fetchEntries()}
+          className="flex-1 px-3 py-2 text-sm rounded-lg border border-border bg-surface text-text-main placeholder:text-text-muted"
+        />
+        <Button variant="secondary" size="sm" onClick={() => fetchEntries()}>
+          {t("search")}
+        </Button>
+      </div>
+
+      {loading ? (
+        <div className="text-sm text-text-muted">{t("loading")}</div>
+      ) : entries.length === 0 ? (
+        <div className="text-sm text-text-muted text-center py-8">{t("noEntries")}</div>
+      ) : (
+        <>
+          <div className="overflow-x-auto">
+            <table className="w-full text-sm">
+              <thead>
+                <tr className="text-left text-xs text-text-muted border-b border-border/30">
+                  <th className="pb-2 pr-4">{t("signature")}</th>
+                  <th className="pb-2 pr-4">{t("model")}</th>
+                  <th className="pb-2 pr-4">{t("hits")}</th>
+                  <th className="pb-2 pr-4">{t("tokensSaved")}</th>
+                  <th className="pb-2 pr-4">{t("created")}</th>
+                  <th className="pb-2 pr-4">{t("expires")}</th>
+                  <th className="pb-2">{t("actions")}</th>
+                </tr>
+              </thead>
+              <tbody>
+                {entries.map((entry) => (
+                  <tr key={entry.id} className="border-b border-border/20">
+                    <td className="py-2 pr-4 font-mono text-xs">
+                      {entry.signature.slice(0, 12)}...
+                    </td>
+                    <td className="py-2 pr-4">{entry.model}</td>
+                    <td className="py-2 pr-4 tabular-nums">{entry.hit_count}</td>
+                    <td className="py-2 pr-4 tabular-nums text-green-500">
+                      {entry.tokens_saved.toLocaleString()}
+                    </td>
+                    <td className="py-2 pr-4 text-xs text-text-muted">
+                      {formatDate(entry.created_at)}
+                    </td>
+                    <td className="py-2 pr-4 text-xs text-text-muted">
+                      {formatDate(entry.expires_at)}
+                    </td>
+                    <td className="py-2">
+                      <button
+                        onClick={() => handleDelete(entry.signature)}
+                        disabled={deleting === entry.signature}
+                        className="text-xs text-red-400 hover:text-red-300 disabled:opacity-50"
+                      >
+                        {deleting === entry.signature ? "..." : "🗑️"}
+                      </button>
+                    </td>
+                  </tr>
+                ))}
+              </tbody>
+            </table>
+          </div>
+
+          {/* Pagination */}
+          {pagination.totalPages > 1 && (
+            <div className="flex items-center justify-center gap-2 pt-2">
+              <Button
+                variant="secondary"
+                size="sm"
+                onClick={() => fetchEntries(pagination.page - 1)}
+                disabled={pagination.page <= 1}
+              >
+                ←
+              </Button>
+              <span className="text-sm text-text-muted">
+                {pagination.page} / {pagination.totalPages}
+              </span>
+              <Button
+                variant="secondary"
+                size="sm"
+                onClick={() => fetchEntries(pagination.page + 1)}
+                disabled={pagination.page >= pagination.totalPages}
+              >
+                →
+              </Button>
+            </div>
+          )}
+        </>
+      )}
+    </div>
+  );
+}
@@ -4,6 +4,7 @@ import { useState, useEffect, useCallback } from "react";
 import { Card, Button, EmptyState } from "@/shared/components";
 import { useNotificationStore } from "@/store/notificationStore";
 import { useTranslations } from "next-intl";
+import CacheEntriesTab from "./components/CacheEntriesTab";

 // ─── Types ───────────────────────────────────────────────────────────────────

@@ -16,13 +17,44 @@ interface SemanticCacheStats {
  tokensSaved: number;
 }

+interface PromptCacheProviderStats {
+  requests: number;
+  inputTokens: number;
+  cachedTokens: number;
+  cacheCreationTokens: number;
+}
+
+interface PromptCacheMetrics {
+  totalRequests: number;
+  requestsWithCacheControl: number;
+  totalInputTokens: number;
+  totalCachedTokens: number;
+  totalCacheCreationTokens: number;
+  tokensSaved: number;
+  estimatedCostSaved: number;
+  byProvider: Record<string, PromptCacheProviderStats>;
+  byStrategy: Record<string, PromptCacheProviderStats>;
+  lastUpdated: string;
+}
+
 interface IdempotencyStats {
  activeKeys: number;
  windowMs: number;
 }

+interface CacheTrendPoint {
+  timestamp: string;
+  requests: number;
+  cachedRequests: number;
+  inputTokens: number;
+  cachedTokens: number;
+  cacheCreationTokens: number;
+}
+
 interface CacheStats {
  semanticCache: SemanticCacheStats;
+  promptCache: PromptCacheMetrics | null;
+  trend: CacheTrendPoint[];
  idempotency: IdempotencyStats;
 }

@@ -107,6 +139,7 @@ export default function CachePage() {
  const [stats, setStats] = useState<CacheStats | null>(null);
  const [loading, setLoading] = useState(true);
  const [clearing, setClearing] = useState(false);
+  const [activeTab, setActiveTab] = useState<"overview" | "entries">("overview");
  const notify = useNotificationStore();

  const fetchStats = useCallback(async () => {
@@ -136,27 +169,32 @@ export default function CachePage() {
      const res = await fetch("/api/cache", { method: "DELETE" });
      if (res.ok) {
        const data = await res.json();
-        notify.add({
-          type: "success",
-          message: t("clearSuccess", { count: data.expiredRemoved ?? 0 }),
-        });
+        notify.success(t("clearSuccess", { count: data.expiredRemoved ?? 0 }));
        await fetchStats();
      } else {
-        notify.add({ type: "error", message: t("clearError") });
+        notify.error(t("clearError"));
      }
    } catch (error) {
      console.error("[CachePage] Failed to clear cache:", error);
-      notify.add({ type: "error", message: t("clearError") });
+      notify.error(t("clearError"));
    } finally {
      setClearing(false);
    }
  };

  const sc = stats?.semanticCache;
+  const pc = stats?.promptCache;
+  const trend = stats?.trend ?? [];
  const idp = stats?.idempotency;
  const hitRate = sc ? parseFloat(sc.hitRate) : 0;
  const totalRequests = sc ? sc.hits + sc.misses : 0;

+  const promptCacheHitRate =
+    pc && pc.totalRequests > 0 ? (pc.requestsWithCacheControl / pc.totalRequests) * 100 : 0;
+  const providerEntries = pc ? Object.entries(pc.byProvider) : [];
+
+  const maxTrendRequests = Math.max(1, ...trend.map((p) => p.requests));
+
  return (
    <div className="flex flex-col gap-6">
      {/* Header */}
@@ -190,149 +228,334 @@ export default function CachePage() {
        </div>
      </div>

-      {/* Loading skeleton */}
-      {loading && (
-        <div
-          className="grid grid-cols-2 md:grid-cols-4 gap-4"
-          aria-busy="true"
-          aria-label="Loading cache statistics"
+      {/* Tab navigation */}
+      <div className="flex gap-1 p-1 rounded-lg bg-black/5 dark:bg-white/5 w-fit">
+        <button
+          onClick={() => setActiveTab("overview")}
+          className={`px-4 py-2 rounded-md text-sm font-medium transition-all ${
+            activeTab === "overview"
+              ? "bg-white dark:bg-white/10 text-text-main shadow-sm"
+              : "text-text-muted hover:text-text-main"
+          }`}
        >
-          {Array.from({ length: 4 }).map((_, i) => (
-            <div key={i} className="h-24 rounded-xl bg-surface-raised animate-pulse" />
-          ))}
-        </div>
-      )}
+          {t("overview")}
+        </button>
+        <button
+          onClick={() => setActiveTab("entries")}
+          className={`px-4 py-2 rounded-md text-sm font-medium transition-all ${
+            activeTab === "entries"
+              ? "bg-white dark:bg-white/10 text-text-main shadow-sm"
+              : "text-text-muted hover:text-text-main"
+          }`}
+        >
+          {t("entries")}
+        </button>
+      </div>

-      {/* Error / empty state */}
-      {!loading && !stats && (
-        <EmptyState
-          icon="cached"
-          title={t("unavailable")}
-          description={t("unavailableDesc")}
-          actionLabel={t("refresh")}
-          onAction={() => void fetchStats()}
-        />
-      )}
+      {/* Entries tab */}
+      {activeTab === "entries" && <CacheEntriesTab />}

-      {/* Main content */}
-      {!loading && stats && (
+      {/* Overview tab content */}
+      {activeTab === "overview" && (
        <>
-          {/* Stats grid */}
-          <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
-            <StatCard
-              icon="memory"
-              label={t("memoryEntries")}
-              value={sc?.memoryEntries ?? 0}
-              sub={t("memoryEntriesSub")}
-            />
-            <StatCard
-              icon="storage"
-              label={t("dbEntries")}
-              value={sc?.dbEntries ?? 0}
-              sub={t("dbEntriesSub")}
-            />
-            <StatCard
-              icon="trending_up"
-              label={t("cacheHits")}
-              value={sc?.hits ?? 0}
-              sub={t("cacheHitsSub", { total: totalRequests })}
-              valueClass="text-green-500"
-            />
-            <StatCard
-              icon="token"
-              label={t("tokensSaved")}
-              value={(sc?.tokensSaved ?? 0).toLocaleString()}
-              sub={t("tokensSavedSub")}
-              valueClass="text-blue-400"
-            />
-          </div>
-
-          {/* Hit rate + breakdown */}
-          <Card>
-            <div className="p-5 flex flex-col gap-4">
-              <div className="flex items-center justify-between">
-                <h2 className="font-medium text-sm">{t("performance")}</h2>
-                <span className="text-xs text-text-muted">
-                  {t("autoRefresh", { seconds: REFRESH_INTERVAL_SECONDS })}
-                </span>
-              </div>
-              <HitRateBar hitRate={hitRate} label={t("hitRate")} />
-              <div className="grid grid-cols-3 gap-4 pt-3 border-t border-border/30 text-center">
-                <div>
-                  <div className="text-lg font-semibold tabular-nums text-green-500">
-                    {sc?.hits ?? 0}
-                  </div>
-                  <div className="text-xs text-text-muted mt-0.5">{t("hits")}</div>
-                </div>
-                <div>
-                  <div className="text-lg font-semibold tabular-nums text-red-400">
-                    {sc?.misses ?? 0}
-                  </div>
-                  <div className="text-xs text-text-muted mt-0.5">{t("misses")}</div>
-                </div>
-                <div>
-                  <div className="text-lg font-semibold tabular-nums">{totalRequests}</div>
-                  <div className="text-xs text-text-muted mt-0.5">{t("total")}</div>
-                </div>
-              </div>
+          {/* Loading skeleton */}
+          {loading && (
+            <div
+              className="grid grid-cols-2 md:grid-cols-4 gap-4"
+              aria-busy="true"
+              aria-label="Loading cache statistics"
+            >
+              {Array.from({ length: 4 }).map((_, i) => (
+                <div key={i} className="h-24 rounded-xl bg-surface-raised animate-pulse" />
+              ))}
            </div>
-          </Card>
+          )}

-          {/* Cache behavior */}
-          <Card>
-            <div className="p-5 flex flex-col gap-3">
-              <h2 className="font-medium text-sm">{t("behavior")}</h2>
-              <div className="grid grid-cols-1 md:grid-cols-2 gap-3">
-                <InfoRow icon="info">{t("behaviorDeterministic")}</InfoRow>
-                <InfoRow icon="info">
-                  {t.rich("behaviorBypass", {
-                    header: () => (
-                      <code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
-                        X-OmniRoute-No-Cache: true
-                      </code>
-                    ),
-                  })}
-                </InfoRow>
-                <InfoRow icon="info">{t("behaviorTwoTier")}</InfoRow>
-                <InfoRow icon="info">
-                  {t.rich("behaviorTtl", {
-                    envVar: () => (
-                      <code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
-                        SEMANTIC_CACHE_TTL_MS
-                      </code>
-                    ),
-                  })}
-                </InfoRow>
-              </div>
-            </div>
-          </Card>
+          {/* Error / empty state */}
+          {!loading && !stats && (
+            <EmptyState
+              icon="cached"
+              title={t("unavailable")}
+              description={t("unavailableDesc")}
+              actionLabel={t("refresh")}
+              onAction={() => void fetchStats()}
+            />
+          )}

-          {/* Idempotency */}
-          <Card>
-            <div className="p-5 flex flex-col gap-3">
-              <div className="flex items-center gap-2">
-                <span
-                  className="material-symbols-outlined text-base text-text-muted"
-                  aria-hidden="true"
-                >
-                  fingerprint
-                </span>
-                <h2 className="font-medium text-sm">{t("idempotency")}</h2>
+          {/* Main content */}
+          {!loading && stats && (
+            <>
+              {/* Stats grid */}
+              <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
+                <StatCard
+                  icon="memory"
+                  label={t("memoryEntries")}
+                  value={sc?.memoryEntries ?? 0}
+                  sub={t("memoryEntriesSub")}
+                />
+                <StatCard
+                  icon="storage"
+                  label={t("dbEntries")}
+                  value={sc?.dbEntries ?? 0}
+                  sub={t("dbEntriesSub")}
+                />
+                <StatCard
+                  icon="trending_up"
+                  label={t("cacheHits")}
+                  value={sc?.hits ?? 0}
+                  sub={t("cacheHitsSub", { total: totalRequests })}
+                  valueClass="text-green-500"
+                />
+                <StatCard
+                  icon="token"
+                  label={t("tokensSaved")}
+                  value={(sc?.tokensSaved ?? 0).toLocaleString()}
+                  sub={t("tokensSavedSub")}
+                  valueClass="text-blue-400"
+                />
              </div>
-              <div className="grid grid-cols-2 gap-4">
-                <div className="p-3 rounded-lg bg-surface/50">
-                  <div className="text-lg font-semibold tabular-nums">{idp?.activeKeys ?? 0}</div>
-                  <div className="text-xs text-text-muted mt-0.5">{t("activeDedupKeys")}</div>
-                </div>
-                <div className="p-3 rounded-lg bg-surface/50">
-                  <div className="text-lg font-semibold tabular-nums">
-                    {idp ? `${(idp.windowMs / 1000).toFixed(0)}s` : "—"}
+
+              {/* Hit rate + breakdown */}
+              <Card>
+                <div className="p-5 flex flex-col gap-4">
+                  <div className="flex items-center justify-between">
+                    <h2 className="font-medium text-sm">{t("performance")}</h2>
+                    <span className="text-xs text-text-muted">
+                      {t("autoRefresh", { seconds: REFRESH_INTERVAL_SECONDS })}
+                    </span>
+                  </div>
+                  <HitRateBar hitRate={hitRate} label={t("hitRate")} />
+                  <div className="grid grid-cols-3 gap-4 pt-3 border-t border-border/30 text-center">
+                    <div>
+                      <div className="text-lg font-semibold tabular-nums text-green-500">
+                        {sc?.hits ?? 0}
+                      </div>
+                      <div className="text-xs text-text-muted mt-0.5">{t("hits")}</div>
+                    </div>
+                    <div>
+                      <div className="text-lg font-semibold tabular-nums text-red-400">
+                        {sc?.misses ?? 0}
+                      </div>
+                      <div className="text-xs text-text-muted mt-0.5">{t("misses")}</div>
+                    </div>
+                    <div>
+                      <div className="text-lg font-semibold tabular-nums">{totalRequests}</div>
+                      <div className="text-xs text-text-muted mt-0.5">{t("total")}</div>
+                    </div>
                  </div>
-                  <div className="text-xs text-text-muted mt-0.5">{t("dedupWindow")}</div>
                </div>
-              </div>
-            </div>
-          </Card>
+              </Card>
+
+              {/* Prompt Cache Stats */}
+              {pc && (
+                <Card>
+                  <div className="p-5 flex flex-col gap-4">
+                    <div className="flex items-center gap-2">
+                      <span
+                        className="material-symbols-outlined text-base text-text-muted"
+                        aria-hidden="true"
+                      >
+                        bolt
+                      </span>
+                      <h2 className="font-medium text-sm">{t("promptCache")}</h2>
+                    </div>
+
+                    <div className="grid grid-cols-2 md:grid-cols-4 gap-4">
+                      <div className="p-3 rounded-lg bg-surface/50">
+                        <div className="text-lg font-semibold tabular-nums">
+                          {pc.requestsWithCacheControl.toLocaleString()}
+                        </div>
+                        <div className="text-xs text-text-muted mt-0.5">{t("cachedRequests")}</div>
+                      </div>
+                      <div className="p-3 rounded-lg bg-surface/50">
+                        <div className="text-lg font-semibold tabular-nums text-green-500">
+                          {promptCacheHitRate.toFixed(1)}%
+                        </div>
+                        <div className="text-xs text-text-muted mt-0.5">{t("cacheHitRate")}</div>
+                      </div>
+                      <div className="p-3 rounded-lg bg-surface/50">
+                        <div className="text-lg font-semibold tabular-nums text-blue-400">
+                          {pc.totalCachedTokens.toLocaleString()}
+                        </div>
+                        <div className="text-xs text-text-muted mt-0.5">{t("cachedTokens")}</div>
+                      </div>
+                      <div className="p-3 rounded-lg bg-surface/50">
+                        <div className="text-lg font-semibold tabular-nums text-purple-400">
+                          {pc.totalCacheCreationTokens.toLocaleString()}
+                        </div>
+                        <div className="text-xs text-text-muted mt-0.5">
+                          {t("cacheCreationTokens")}
+                        </div>
+                      </div>
+                    </div>
+
+                    {providerEntries.length > 0 && (
+                      <div className="pt-3 border-t border-border/30">
+                        <h3 className="text-xs font-medium text-text-muted mb-3">
+                          {t("byProvider")}
+                        </h3>
+                        <div className="overflow-x-auto">
+                          <table className="w-full text-sm">
+                            <thead>
+                              <tr className="text-left text-xs text-text-muted border-b border-border/30">
+                                <th className="pb-2 pr-4">{t("provider")}</th>
+                                <th className="pb-2 pr-4">{t("requests")}</th>
+                                <th className="pb-2 pr-4">{t("inputTokens")}</th>
+                                <th className="pb-2 pr-4">{t("cachedTokensCol")}</th>
+                                <th className="pb-2">{t("cacheCreation")}</th>
+                              </tr>
+                            </thead>
+                            <tbody>
+                              {providerEntries.map(([provider, data]) => (
+                                <tr key={provider} className="border-b border-border/20">
+                                  <td className="py-2 pr-4 font-medium">{provider}</td>
+                                  <td className="py-2 pr-4 tabular-nums">
+                                    {data.requests.toLocaleString()}
+                                  </td>
+                                  <td className="py-2 pr-4 tabular-nums">
+                                    {data.inputTokens.toLocaleString()}
+                                  </td>
+                                  <td className="py-2 pr-4 tabular-nums text-green-500">
+                                    {data.cachedTokens.toLocaleString()}
+                                  </td>
+                                  <td className="py-2 tabular-nums text-purple-400">
+                                    {data.cacheCreationTokens.toLocaleString()}
+                                  </td>
+                                </tr>
+                              ))}
+                            </tbody>
+                          </table>
+                        </div>
+                      </div>
+                    )}
+                  </div>
+                </Card>
+              )}
+
+              {/* Cache Trend (24h) */}
+              {trend.length > 0 && (
+                <Card>
+                  <div className="p-5 flex flex-col gap-4">
+                    <div className="flex items-center gap-2">
+                      <span
+                        className="material-symbols-outlined text-base text-text-muted"
+                        aria-hidden="true"
+                      >
+                        timeline
+                      </span>
+                      <h2 className="font-medium text-sm">{t("trend24h")}</h2>
+                    </div>
+                    <div className="flex items-end gap-1 h-32">
+                      {trend.map((point) => {
+                        const height = Math.max(4, (point.requests / maxTrendRequests) * 100);
+                        const cachedHeight =
+                          point.requests > 0
+                            ? Math.max(2, (point.cachedRequests / point.requests) * height)
+                            : 0;
+                        const hour = new Date(point.timestamp).toLocaleTimeString([], {
+                          hour: "2-digit",
+                          minute: "2-digit",
+                          hour12: false,
+                        });
+                        return (
+                          <div
+                            key={point.timestamp}
+                            className="flex-1 flex flex-col items-center gap-1 group relative"
+                          >
+                            <div className="absolute bottom-full mb-1 hidden group-hover:block bg-surface-raised border border-border rounded px-2 py-1 text-xs whitespace-nowrap z-10">
+                              {hour}: {point.requests} {t("requests").toLowerCase()},{" "}
+                              {point.cachedRequests} {t("cached").toLowerCase()}
+                            </div>
+                            <div className="w-full flex flex-col justify-end h-full gap-px">
+                              <div
+                                className="w-full bg-green-500/30 rounded-t"
+                                style={{ height: `${cachedHeight}%` }}
+                              />
+                              <div
+                                className="w-full bg-text-muted/20 rounded-t"
+                                style={{ height: `${height - cachedHeight}%` }}
+                              />
+                            </div>
+                            <span className="text-[10px] text-text-muted truncate w-full text-center">
+                              {hour.split(":")[0]}
+                            </span>
+                          </div>
+                        );
+                      })}
+                    </div>
+                    <div className="flex items-center gap-4 text-xs text-text-muted">
+                      <div className="flex items-center gap-1.5">
+                        <div className="w-3 h-3 rounded bg-text-muted/20" />
+                        <span>{t("total")}</span>
+                      </div>
+                      <div className="flex items-center gap-1.5">
+                        <div className="w-3 h-3 rounded bg-green-500/30" />
+                        <span>{t("cached")}</span>
+                      </div>
+                    </div>
+                  </div>
+                </Card>
+              )}
+
+              {/* Cache behavior */}
+              <Card>
+                <div className="p-5 flex flex-col gap-3">
+                  <h2 className="font-medium text-sm">{t("behavior")}</h2>
+                  <div className="grid grid-cols-1 md:grid-cols-2 gap-3">
+                    <InfoRow icon="info">{t("behaviorDeterministic")}</InfoRow>
+                    <InfoRow icon="info">
+                      {t.rich("behaviorBypass", {
+                        header: () => (
+                          <code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
+                            X-OmniRoute-No-Cache: true
+                          </code>
+                        ),
+                      })}
+                    </InfoRow>
+                    <InfoRow icon="info">{t("behaviorTwoTier")}</InfoRow>
+                    <InfoRow icon="info">
+                      {t.rich("behaviorTtl", {
+                        envVar: () => (
+                          <code className="bg-surface px-1 py-0.5 rounded text-xs font-mono">
+                            SEMANTIC_CACHE_TTL_MS
+                          </code>
+                        ),
+                      })}
+                    </InfoRow>
+                  </div>
+                </div>
+              </Card>
+
+              {/* Idempotency */}
+              <Card>
+                <div className="p-5 flex flex-col gap-3">
+                  <div className="flex items-center gap-2">
+                    <span
+                      className="material-symbols-outlined text-base text-text-muted"
+                      aria-hidden="true"
+                    >
+                      fingerprint
+                    </span>
+                    <h2 className="font-medium text-sm">{t("idempotency")}</h2>
+                  </div>
+                  <div className="grid grid-cols-2 gap-4">
+                    <div className="p-3 rounded-lg bg-surface/50">
+                      <div className="text-lg font-semibold tabular-nums">
+                        {idp?.activeKeys ?? 0}
+                      </div>
+                      <div className="text-xs text-text-muted mt-0.5">{t("activeDedupKeys")}</div>
+                    </div>
+                    <div className="p-3 rounded-lg bg-surface/50">
+                      <div className="text-lg font-semibold tabular-nums">
+                        {idp ? `${(idp.windowMs / 1000).toFixed(0)}s` : "—"}
+                      </div>
+                      <div className="text-xs text-text-muted mt-0.5">{t("dedupWindow")}</div>
+                    </div>
+                  </div>
+                </div>
+              </Card>
+            </>
+          )}
        </>
      )}
    </div>
@@ -802,8 +802,6 @@ export default function ProviderDetailPage() {
  const { copied, copy } = useCopyToClipboard();
  const t = useTranslations("providers");
  const notify = useNotificationStore();
-  const hasAutoOpened = useRef(false);
-  const userDismissed = useRef(false);
  const [proxyTarget, setProxyTarget] = useState(null);
  const [proxyConfig, setProxyConfig] = useState(null);
  const [connProxyMap, setConnProxyMap] = useState<
@@ -989,26 +987,6 @@ export default function ProviderDetailPage() {
    }
  }, [loading, connections, loadConnProxies]);

-  // Auto-open Add Connection modal when no connections exist (better UX)
-  // Only fires once on initial load, not on HMR remounts or after user dismissal
-  useEffect(() => {
-    if (
-      !loading &&
-      connections.length === 0 &&
-      providerInfo &&
-      !isCompatible &&
-      !hasAutoOpened.current &&
-      !userDismissed.current
-    ) {
-      hasAutoOpened.current = true;
-      if (isOAuth) {
-        setShowOAuthModal(true);
-      } else {
-        setShowAddApiKeyModal(true);
-      }
-    }
-  }, [loading]); // eslint-disable-line react-hooks/exhaustive-deps
-
  const handleSetAlias = async (modelId, alias, providerAliasOverride = providerAlias) => {
    const fullModel = `${providerAliasOverride}/${modelId}`;
    try {
@@ -1490,7 +1468,10 @@ export default function ProviderDetailPage() {
        logs: [
          t("foundModelsStartingImport", { count: newModels.length }),
          ...(newModels.length < fetchedModels.length
-            ? [t("skippingExistingModels", { count: fetchedModels.length - newModels.length }) || `Skipping ${fetchedModels.length - newModels.length} existing models`]
+            ? [
+                t("skippingExistingModels", { count: fetchedModels.length - newModels.length }) ||
+                  `Skipping ${fetchedModels.length - newModels.length} existing models`,
+              ]
            : []),
        ],
      }));
@@ -2428,7 +2409,6 @@ export default function ProviderDetailPage() {
          providerInfo={providerInfo}
          onSuccess={handleOAuthSuccess}
          onClose={() => {
-            userDismissed.current = true;
            setShowOAuthModal(false);
          }}
        />
@@ -2437,7 +2417,6 @@ export default function ProviderDetailPage() {
          isOpen={showOAuthModal}
          onSuccess={handleOAuthSuccess}
          onClose={() => {
-            userDismissed.current = true;
            setShowOAuthModal(false);
          }}
        />
@@ -2448,7 +2427,6 @@ export default function ProviderDetailPage() {
          providerInfo={providerInfo}
          onSuccess={handleOAuthSuccess}
          onClose={() => {
-            userDismissed.current = true;
            setShowOAuthModal(false);
          }}
        />
@@ -0,0 +1,191 @@
+"use client";
+
+import { useState, useEffect } from "react";
+import { Card, Button } from "@/shared/components";
+import { useTranslations } from "next-intl";
+
+interface CacheConfig {
+  semanticCacheEnabled: boolean;
+  semanticCacheMaxSize: number;
+  semanticCacheTTL: number;
+  promptCacheEnabled: boolean;
+  promptCacheStrategy: "auto" | "system-only" | "manual";
+  alwaysPreserveClientCache: "auto" | "always" | "never";
+}
+
+export default function CacheSettingsTab() {
+  const t = useTranslations("settings");
+  const [config, setConfig] = useState<CacheConfig>({
+    semanticCacheEnabled: true,
+    semanticCacheMaxSize: 100,
+    semanticCacheTTL: 1800000,
+    promptCacheEnabled: true,
+    promptCacheStrategy: "auto",
+    alwaysPreserveClientCache: "auto",
+  });
+  const [saving, setSaving] = useState(false);
+  const [loading, setLoading] = useState(true);
+
+  useEffect(() => {
+    fetch("/api/settings/cache-config")
+      .then((r) => (r.ok ? r.json() : null))
+      .then((data) => {
+        if (data) setConfig(data);
+      })
+      .catch(() => {})
+      .finally(() => setLoading(false));
+  }, []);
+
+  const handleSave = async () => {
+    setSaving(true);
+    try {
+      await fetch("/api/settings/cache-config", {
+        method: "PUT",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify(config),
+      });
+    } finally {
+      setSaving(false);
+    }
+  };
+
+  if (loading) {
+    return (
+      <Card className="p-6">
+        <p className="text-sm text-text-muted">{t("loading")}</p>
+      </Card>
+    );
+  }
+
+  return (
+    <Card className="p-6">
+      <h3 className="text-lg font-semibold text-text-main flex items-center gap-2 mb-4">
+        <span className="material-symbols-outlined text-[20px]">cached</span>
+        {t("cacheSettings")}
+      </h3>
+
+      <div className="space-y-6">
+        {/* Semantic Cache */}
+        <div className="space-y-3">
+          <h4 className="text-sm font-medium text-text-main">{t("semanticCache")}</h4>
+
+          <label className="flex items-center justify-between">
+            <span className="text-sm text-text-muted">{t("enabled")}</span>
+            <button
+              onClick={() =>
+                setConfig((c) => ({ ...c, semanticCacheEnabled: !c.semanticCacheEnabled }))
+              }
+              className={`relative w-10 h-5 rounded-full transition-colors ${
+                config.semanticCacheEnabled ? "bg-green-500" : "bg-border"
+              }`}
+            >
+              <span
+                className={`absolute top-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
+                  config.semanticCacheEnabled ? "left-5" : "left-0.5"
+                }`}
+              />
+            </button>
+          </label>
+
+          <label className="flex items-center justify-between">
+            <span className="text-sm text-text-muted">{t("maxEntries")}</span>
+            <input
+              type="number"
+              min={1}
+              max={1000}
+              value={config.semanticCacheMaxSize}
+              onChange={(e) =>
+                setConfig((c) => ({ ...c, semanticCacheMaxSize: parseInt(e.target.value) || 100 }))
+              }
+              className="w-24 px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
+            />
+          </label>
+
+          <label className="flex items-center justify-between">
+            <span className="text-sm text-text-muted">{t("ttlMinutes")}</span>
+            <input
+              type="number"
+              min={1}
+              max={1440}
+              value={Math.round(config.semanticCacheTTL / 60000)}
+              onChange={(e) =>
+                setConfig((c) => ({
+                  ...c,
+                  semanticCacheTTL: (parseInt(e.target.value) || 30) * 60000,
+                }))
+              }
+              className="w-24 px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
+            />
+          </label>
+        </div>
+
+        {/* Prompt Cache */}
+        <div className="space-y-3 pt-4 border-t border-border/30">
+          <h4 className="text-sm font-medium text-text-main">{t("promptCache")}</h4>
+
+          <label className="flex items-center justify-between">
+            <span className="text-sm text-text-muted">{t("enabled")}</span>
+            <button
+              onClick={() =>
+                setConfig((c) => ({ ...c, promptCacheEnabled: !c.promptCacheEnabled }))
+              }
+              className={`relative w-10 h-5 rounded-full transition-colors ${
+                config.promptCacheEnabled ? "bg-green-500" : "bg-border"
+              }`}
+            >
+              <span
+                className={`absolute top-0.5 w-4 h-4 rounded-full bg-white transition-transform ${
+                  config.promptCacheEnabled ? "left-5" : "left-0.5"
+                }`}
+              />
+            </button>
+          </label>
+
+          <label className="flex items-center justify-between">
+            <span className="text-sm text-text-muted">{t("strategy")}</span>
+            <select
+              value={config.promptCacheStrategy}
+              onChange={(e) =>
+                setConfig((c) => ({
+                  ...c,
+                  promptCacheStrategy: e.target.value as CacheConfig["promptCacheStrategy"],
+                }))
+              }
+              className="px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
+            >
+              <option value="auto">Auto</option>
+              <option value="system-only">System Only</option>
+              <option value="manual">Manual</option>
+            </select>
+          </label>
+
+          <label className="flex items-center justify-between">
+            <span className="text-sm text-text-muted">{t("preserveClientCache")}</span>
+            <select
+              value={config.alwaysPreserveClientCache}
+              onChange={(e) =>
+                setConfig((c) => ({
+                  ...c,
+                  alwaysPreserveClientCache: e.target
+                    .value as CacheConfig["alwaysPreserveClientCache"],
+                }))
+              }
+              className="px-2 py-1 text-sm rounded border border-border bg-surface text-text-main"
+            >
+              <option value="auto">Auto</option>
+              <option value="always">Always</option>
+              <option value="never">Never</option>
+            </select>
+          </label>
+        </div>
+
+        {/* Save */}
+        <div className="pt-4 border-t border-border/30">
+          <Button onClick={handleSave} disabled={saving} size="sm">
+            {saving ? t("saving") : t("save")}
+          </Button>
+        </div>
+      </div>
+    </Card>
+  );
+}
@@ -16,8 +16,7 @@ import CodexServiceTierTab from "./components/CodexServiceTierTab";
 import SystemPromptTab from "./components/SystemPromptTab";
 import ModelAliasesTab from "./components/ModelAliasesTab";
 import BackgroundDegradationTab from "./components/BackgroundDegradationTab";
-
-import CacheStatsCard from "./components/CacheStatsCard";
+import CacheSettingsTab from "./components/CacheSettingsTab";
 import ResilienceTab from "./components/ResilienceTab";

 const tabs = [
@@ -88,7 +87,7 @@ export default function SettingsPage() {
              <ThinkingBudgetTab />
              <CodexServiceTierTab />
              <SystemPromptTab />
-              <CacheStatsCard />
+              <CacheSettingsTab />
            </div>
          )}

@@ -28,6 +28,7 @@ const QUOTA_BAR_YELLOW_THRESHOLD = 20;
 // Provider display config
 const PROVIDER_CONFIG = {
  antigravity: { label: "Antigravity", color: "#F59E0B" },
+  "gemini-cli": { label: "Gemini CLI", color: "#4285F4" },
  github: { label: "GitHub Copilot", color: "#333" },
  kiro: { label: "Kiro AI", color: "#FF6B35" },
  codex: { label: "OpenAI Codex", color: "#10A37F" },
@@ -279,12 +280,13 @@ export default function ProviderLimits() {
  const sortedConnections = useMemo(() => {
    const priority = {
      antigravity: 1,
-      github: 2,
-      codex: 3,
-      claude: 4,
-      kiro: 5,
-      glm: 6,
-      "kimi-coding": 7,
+      "gemini-cli": 2,
+      github: 3,
+      codex: 4,
+      claude: 5,
+      kiro: 6,
+      glm: 7,
+      "kimi-coding": 8,
    };
    return [...filteredConnections].sort(
      (a, b) => (priority[a.provider] || 9) - (priority[b.provider] || 9)
@@ -624,6 +626,7 @@ export default function ProviderLimits() {
                        >
                          {/* Model label */}
                          <span
+                            title={q.modelKey || q.name}
                            className="text-[11px] font-semibold py-0.5 px-2 rounded whitespace-nowrap min-w-[60px] text-center"
                            style={{ background: colors.bg, color: colors.text }}
                          >
@@ -11,15 +11,6 @@ const PROVIDER_PLAN_FALLBACKS = new Set([
 ]);

 const QUOTA_LABEL_MAP: Record<string, string> = {
-  "gemini-3-pro-high": "G3 Pro",
-  "gemini-3-pro-low": "G3 Pro Low",
-  "gemini-3-flash": "G3 Flash",
-  "gemini-2.5-flash": "G2.5 Flash",
-  "claude-opus-4-6-thinking": "Opus 4.6 Tk",
-  "claude-opus-4-5-thinking": "Opus 4.5 Tk",
-  "claude-opus-4-5": "Opus 4.5",
-  "claude-sonnet-4-5-thinking": "Sonnet 4.5 Tk",
-  "claude-sonnet-4-5": "Sonnet 4.5",
  chat: "Chat",
  completions: "Completions",
  premium_interactions: "Premium",
@@ -254,6 +245,14 @@ export function parseQuotaData(provider, data) {
        }
        break;

+      case "gemini-cli":
+        if (data.quotas) {
+          Object.entries(data.quotas).forEach(([modelKey, quota]: [string, any]) => {
+            normalizedQuotas.push(normalizeQuotaEntry(modelKey, quota, { modelKey }));
+          });
+        }
+        break;
+
      default:
        // Generic fallback for unknown providers
        if (data.quotas) {
@@ -0,0 +1,13 @@
+import { NextResponse } from "next/server";
+import { getDiversityReport } from "../../../../../open-sse/services/autoCombo/providerDiversity";
+
+export const dynamic = "force-dynamic";
+
+export async function GET() {
+  try {
+    const report = getDiversityReport();
+    return NextResponse.json(report);
+  } catch (error: any) {
+    return NextResponse.json({ error: error.message }, { status: 500 });
+  }
+}
@@ -0,0 +1,95 @@
+import { NextRequest, NextResponse } from "next/server";
+import { getDbInstance } from "@/lib/db/core";
+
+interface CacheEntry {
+  id: string;
+  signature: string;
+  model: string;
+  hit_count: number;
+  tokens_saved: number;
+  created_at: string;
+  expires_at: string;
+}
+
+export async function GET(req: NextRequest) {
+  try {
+    const { searchParams } = new URL(req.url);
+    const page = Math.max(1, parseInt(searchParams.get("page") || "1", 10));
+    const limit = Math.min(100, Math.max(1, parseInt(searchParams.get("limit") || "20", 10)));
+    const search = searchParams.get("search") || "";
+    const model = searchParams.get("model") || "";
+    const sortBy = searchParams.get("sortBy") || "created_at";
+    const sortOrder = searchParams.get("sortOrder") || "desc";
+
+    const db = getDbInstance();
+    const offset = (page - 1) * limit;
+
+    const conditions: string[] = [];
+    const params: unknown[] = [];
+
+    if (search) {
+      conditions.push("(signature LIKE ? OR model LIKE ?)");
+      params.push(`%${search}%`, `%${search}%`);
+    }
+
+    if (model) {
+      conditions.push("model = ?");
+      params.push(model);
+    }
+
+    const whereClause = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
+
+    const validSortColumns = ["created_at", "expires_at", "hit_count", "tokens_saved", "model"];
+    const orderBy = validSortColumns.includes(sortBy) ? sortBy : "created_at";
+    const order = sortOrder === "asc" ? "ASC" : "DESC";
+
+    const countRow = db
+      .prepare(`SELECT COUNT(*) as total FROM semantic_cache ${whereClause}`)
+      .get(...params) as { total: number };
+
+    const entries = db
+      .prepare(
+        `SELECT id, signature, model, hit_count, tokens_saved, created_at, expires_at
+         FROM semantic_cache ${whereClause}
+         ORDER BY ${orderBy} ${order}
+         LIMIT ? OFFSET ?`
+      )
+      .all(...params, limit, offset) as CacheEntry[];
+
+    return NextResponse.json({
+      entries,
+      pagination: {
+        page,
+        limit,
+        total: countRow?.total || 0,
+        totalPages: Math.ceil((countRow?.total || 0) / limit),
+      },
+    });
+  } catch (error) {
+    return NextResponse.json({ error: String(error) }, { status: 500 });
+  }
+}
+
+export async function DELETE(req: NextRequest) {
+  try {
+    const { searchParams } = new URL(req.url);
+    const signature = searchParams.get("signature");
+    const model = searchParams.get("model");
+
+    const db = getDbInstance();
+
+    if (signature) {
+      db.prepare("DELETE FROM semantic_cache WHERE signature = ?").run(signature);
+      return NextResponse.json({ ok: true, deleted: 1 });
+    }
+
+    if (model) {
+      const result = db.prepare("DELETE FROM semantic_cache WHERE model = ?").run(model);
+      return NextResponse.json({ ok: true, deleted: result.changes });
+    }
+
+    return NextResponse.json({ error: "Provide signature or model parameter" }, { status: 400 });
+  } catch (error) {
+    return NextResponse.json({ error: String(error) }, { status: 500 });
+  }
+}
@@ -8,21 +8,26 @@ import {
  invalidateStale,
 } from "@/lib/semanticCache";
 import { getIdempotencyStats } from "@/lib/idempotencyLayer";
+import { getCacheMetrics, getCacheTrend } from "@/lib/db/settings";

 function errorMessage(error: unknown): string {
  return error instanceof Error ? error.message : String(error);
 }

-/**
- * GET /api/cache — Cache statistics
- */
-export async function GET() {
+export async function GET(req: NextRequest) {
  try {
+    const { searchParams } = new URL(req.url);
+    const trendHours = parseInt(searchParams.get("trendHours") || "24", 10);
+
    const cacheStats = getCacheStats();
    const idempotencyStats = getIdempotencyStats();
+    const promptCacheMetrics = await getCacheMetrics();
+    const trend = await getCacheTrend(trendHours);

    return NextResponse.json({
      semanticCache: cacheStats,
+      promptCache: promptCacheMetrics,
+      trend,
      idempotency: idempotencyStats,
    });
  } catch (error) {
@@ -30,17 +35,6 @@ export async function GET() {
  }
 }

-/**
- * DELETE /api/cache — Clear all caches or targeted invalidation.
- *
- * Exactly one optional query parameter may be provided:
- *   ?model=<name>      — invalidate all entries for a specific model
- *   ?signature=<hex>   — invalidate a single entry by its SHA-256 signature
- *   ?staleMs=<number>  — invalidate entries older than N milliseconds
- *   (no params)        — clear all cache entries
- *
- * Providing more than one parameter returns 400 Bad Request.
- */
 export async function DELETE(req: NextRequest) {
  try {
    const { searchParams } = new URL(req.url);
@@ -5,6 +5,87 @@ import { getComboByName } from "@/lib/localDb";
 import { testComboSchema } from "@/shared/validation/schemas";
 import { isValidationFailure, validateBody } from "@/shared/validation/helpers";

+async function testComboModel(modelStr, internalUrl) {
+  const startTime = Date.now();
+  try {
+    // Send a minimal but real chat request through the same internal
+    // endpoint an external OpenAI-compatible client would use.
+    const testBody = buildComboTestRequestBody(modelStr);
+
+    const controller = new AbortController();
+    const timeout = setTimeout(() => controller.abort(), 20000);
+
+    let res;
+    try {
+      res = await fetch(internalUrl, {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          // Internal dashboard tests still use the normal /v1 pipeline but
+          // bypass REQUIRE_API_KEY so admins can test with local session auth.
+          "X-Internal-Test": "combo-health-check",
+          // Force a fresh execution path so combo tests cannot be satisfied by
+          // OmniRoute's semantic cache or other request reuse layers.
+          "X-OmniRoute-No-Cache": "true",
+          "X-Request-Id": `combo-test-${randomUUID()}`,
+        },
+        body: JSON.stringify(testBody),
+        signal: controller.signal,
+      });
+    } finally {
+      clearTimeout(timeout);
+    }
+
+    const latencyMs = Date.now() - startTime;
+
+    if (res.ok) {
+      let responseBody = null;
+      try {
+        responseBody = await res.json();
+      } catch {
+        responseBody = null;
+      }
+
+      const responseText = extractComboTestResponseText(responseBody);
+      if (!responseText) {
+        return {
+          model: modelStr,
+          status: "error",
+          statusCode: res.status,
+          error: "Provider returned HTTP 200 but no text content.",
+          latencyMs,
+        };
+      }
+
+      return { model: modelStr, status: "ok", latencyMs, responseText };
+    }
+
+    let errorMsg = "";
+    try {
+      const errBody = await res.json();
+      errorMsg = errBody?.error?.message || errBody?.error || res.statusText;
+    } catch {
+      errorMsg = res.statusText;
+    }
+
+    return {
+      model: modelStr,
+      status: "error",
+      statusCode: res.status,
+      error: errorMsg,
+      latencyMs,
+    };
+  } catch (error) {
+    const latencyMs = Date.now() - startTime;
+    return {
+      model: modelStr,
+      status: "error",
+      error: error.name === "AbortError" ? "Timeout (20s)" : error.message,
+      latencyMs,
+    };
+  }
+}
+
 /**
 * POST /api/combos/test - Quick test a combo
 * Sends a real chat completion request through each model in the combo
@@ -44,93 +125,11 @@ export async function POST(request) {
      return NextResponse.json({ error: "Combo has no models" }, { status: 400 });
    }

-    const results = [];
-    let resolvedBy = null;
-
-    // Test each model sequentially
-    for (const modelStr of models) {
-      const startTime = Date.now();
-      try {
-        // Send a minimal but real chat request through the same internal
-        // endpoint an external OpenAI-compatible client would use.
-        const testBody = buildComboTestRequestBody(modelStr);
-
-        const internalUrl = `${getBaseUrl(request)}/v1/chat/completions`;
-        const controller = new AbortController();
-        const timeout = setTimeout(() => controller.abort(), 20000);
-
-        let res;
-        try {
-          res = await fetch(internalUrl, {
-            method: "POST",
-            headers: {
-              "Content-Type": "application/json",
-              // Internal dashboard tests still use the normal /v1 pipeline but
-              // bypass REQUIRE_API_KEY so admins can test with local session auth.
-              "X-Internal-Test": "combo-health-check",
-              // Force a fresh execution path so combo tests cannot be satisfied by
-              // OmniRoute's semantic cache or other request reuse layers.
-              "X-OmniRoute-No-Cache": "true",
-              "X-Request-Id": `combo-test-${randomUUID()}`,
-            },
-            body: JSON.stringify(testBody),
-            signal: controller.signal,
-          });
-        } finally {
-          clearTimeout(timeout);
-        }
-
-        const latencyMs = Date.now() - startTime;
-
-        if (res.ok) {
-          let responseBody = null;
-          try {
-            responseBody = await res.json();
-          } catch {
-            responseBody = null;
-          }
-
-          const responseText = extractComboTestResponseText(responseBody);
-          if (!responseText) {
-            results.push({
-              model: modelStr,
-              status: "error",
-              statusCode: res.status,
-              error: "Provider returned HTTP 200 but no text content.",
-              latencyMs,
-            });
-            continue;
-          }
-
-          results.push({ model: modelStr, status: "ok", latencyMs, responseText });
-          if (!resolvedBy) resolvedBy = modelStr;
-        } else {
-          let errorMsg = "";
-          try {
-            const errBody = await res.json();
-            errorMsg = errBody?.error?.message || errBody?.error || res.statusText;
-          } catch {
-            errorMsg = res.statusText;
-          }
-
-          results.push({
-            model: modelStr,
-            status: "error",
-            statusCode: res.status,
-            error: errorMsg,
-            latencyMs,
-          });
-        }
-      } catch (error) {
-        const latencyMs = Date.now() - startTime;
-        results.push({
-          model: modelStr,
-          status: "error",
-          error: error.name === "AbortError" ? "Timeout (20s)" : error.message,
-          latencyMs,
-        });
-      }
-    }
+    const internalUrl = `${getBaseUrl(request)}/v1/chat/completions`;
+    const results = await Promise.all(
+      models.map((modelStr) => testComboModel(modelStr, internalUrl))
+    );
+    const resolvedBy = results.find((result) => result.status === "ok")?.model || null;

    return NextResponse.json({
      comboName,
@@ -139,19 +139,7 @@ const PROVIDER_MODELS_CONFIG: Record<string, ProviderModelsConfigEntry> = {
        name: m.displayName || (m.name || "").replace(/^models\//, ""),
      })),
  },
-  "gemini-cli": {
-    url: "https://generativelanguage.googleapis.com/v1beta/models",
-    method: "GET",
-    headers: { "Content-Type": "application/json" },
-    authHeader: "Authorization",
-    authPrefix: "Bearer ",
-    parseResponse: (data) =>
-      (data.models || []).map((m) => ({
-        ...m,
-        id: (m.name || m.id || "").replace(/^models\//, ""),
-        name: m.displayName || (m.name || "").replace(/^models\//, ""),
-      })),
-  },
+  // gemini-cli handled via retrieveUserQuota (see GET handler)
  qwen: {
    url: "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models",
    method: "GET",
@@ -505,6 +493,68 @@ export async function GET(
      return buildResponse({ provider, connectionId, models });
    }

+    if (provider === "gemini-cli") {
+      // Gemini CLI doesn't have a /models endpoint. Instead, query the quota
+      // endpoint to discover available models from the quota buckets.
+      if (!accessToken) {
+        return NextResponse.json(
+          { error: "No access token for Gemini CLI. Please reconnect OAuth." },
+          { status: 400 }
+        );
+      }
+
+      const psd = asRecord(connection.providerSpecificData);
+      const projectId = connection.projectId || psd.projectId || null;
+
+      if (!projectId) {
+        return NextResponse.json(
+          { error: "Gemini CLI project ID not available. Please reconnect OAuth." },
+          { status: 400 }
+        );
+      }
+
+      try {
+        const quotaRes = await fetch(
+          "https://cloudcode-pa.googleapis.com/v1internal:retrieveUserQuota",
+          {
+            method: "POST",
+            headers: {
+              Authorization: `Bearer ${accessToken}`,
+              "Content-Type": "application/json",
+            },
+            body: JSON.stringify({ project: projectId }),
+            signal: AbortSignal.timeout(10000),
+          }
+        );
+
+        if (!quotaRes.ok) {
+          const errText = await quotaRes.text();
+          console.log(`[models] Gemini CLI quota fetch failed (${quotaRes.status}):`, errText);
+          return NextResponse.json(
+            { error: `Failed to fetch Gemini CLI models: ${quotaRes.status}` },
+            { status: quotaRes.status }
+          );
+        }
+
+        const quotaData = await quotaRes.json();
+        const buckets: Array<{ modelId?: string; tokenType?: string }> = quotaData.buckets || [];
+
+        const models = buckets
+          .filter((b) => b.modelId)
+          .map((b) => ({
+            id: b.modelId,
+            name: b.modelId,
+            owned_by: "google",
+          }));
+
+        return buildResponse({ provider, connectionId, models });
+      } catch (err: unknown) {
+        const msg = err instanceof Error ? err.message : String(err);
+        console.log("[models] Gemini CLI model fetch error:", msg);
+        return NextResponse.json({ error: "Failed to fetch Gemini CLI models" }, { status: 500 });
+      }
+    }
+
    if (isAnthropicCompatibleProvider(provider)) {
      let baseUrl = getProviderBaseUrl(connection.providerSpecificData);
      if (!baseUrl) {
@@ -0,0 +1,96 @@
+import { NextRequest, NextResponse } from "next/server";
+import { getSettings, updateSettings } from "@/lib/localDb";
+import { isAuthenticated } from "@/shared/utils/apiAuth";
+import { z } from "zod";
+import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
+
+const cacheConfigUpdateSchema = z.object({
+  semanticCacheEnabled: z.boolean().optional(),
+  semanticCacheMaxSize: z.number().positive().optional(),
+  semanticCacheTTL: z.number().positive().optional(),
+  promptCacheEnabled: z.boolean().optional(),
+  promptCacheStrategy: z.enum(["auto", "system-only", "manual"]).optional(),
+  alwaysPreserveClientCache: z.enum(["auto", "always", "never"]).optional(),
+});
+
+const CACHE_CONFIG_KEYS = [
+  "semanticCacheEnabled",
+  "semanticCacheMaxSize",
+  "semanticCacheTTL",
+  "promptCacheEnabled",
+  "promptCacheStrategy",
+  "alwaysPreserveClientCache",
+] as const;
+
+const DEFAULTS = {
+  semanticCacheEnabled: true,
+  semanticCacheMaxSize: 100,
+  semanticCacheTTL: 1800000,
+  promptCacheEnabled: true,
+  promptCacheStrategy: "auto",
+  alwaysPreserveClientCache: "auto",
+};
+
+export async function GET(request: NextRequest) {
+  if (!(await isAuthenticated(request))) {
+    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
+  }
+
+  try {
+    const settings = await getSettings();
+    const config: Record<string, unknown> = {};
+    for (const key of CACHE_CONFIG_KEYS) {
+      config[key] = settings[key] ?? DEFAULTS[key];
+    }
+    return NextResponse.json(config);
+  } catch (error) {
+    return NextResponse.json({ error: String(error) }, { status: 500 });
+  }
+}
+
+export async function PUT(request: NextRequest) {
+  if (!(await isAuthenticated(request))) {
+    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
+  }
+
+  try {
+    let rawBody: unknown;
+    try {
+      rawBody = await request.json();
+    } catch {
+      return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 });
+    }
+
+    const validation = validateBody(cacheConfigUpdateSchema, rawBody);
+    if (isValidationFailure(validation)) {
+      return validation.response;
+    }
+
+    const updates: Record<string, unknown> = {};
+    const body = validation.data;
+
+    if (body.semanticCacheEnabled !== undefined) {
+      updates.semanticCacheEnabled = body.semanticCacheEnabled;
+    }
+    if (body.semanticCacheMaxSize !== undefined) {
+      updates.semanticCacheMaxSize = body.semanticCacheMaxSize;
+    }
+    if (body.semanticCacheTTL !== undefined) {
+      updates.semanticCacheTTL = body.semanticCacheTTL;
+    }
+    if (body.promptCacheEnabled !== undefined) {
+      updates.promptCacheEnabled = body.promptCacheEnabled;
+    }
+    if (body.promptCacheStrategy !== undefined) {
+      updates.promptCacheStrategy = body.promptCacheStrategy;
+    }
+    if (body.alwaysPreserveClientCache !== undefined) {
+      updates.alwaysPreserveClientCache = body.alwaysPreserveClientCache;
+    }
+
+    await updateSettings(updates);
+    return NextResponse.json({ ok: true });
+  } catch (error) {
+    return NextResponse.json({ error: String(error) }, { status: 500 });
+  }
+}
@@ -1,6 +1,7 @@
 import { NextRequest, NextResponse } from "next/server";
 import { z } from "zod";
 import { isAuthenticated } from "@/shared/utils/apiAuth";
+import { isValidationFailure, validateBody } from "@/shared/validation/helpers";
 import {
  getCloudflaredTunnelStatus,
  startCloudflaredTunnel,
@@ -40,27 +41,27 @@ export async function POST(request: NextRequest) {
    return unauthorized();
  }

-  let payload: unknown;
+  let rawBody: unknown;
  try {
-    payload = await request.json();
+    rawBody = await request.json();
  } catch {
    return NextResponse.json({ error: "Invalid JSON body" }, { status: 400 });
  }

-  const parsed = actionSchema.safeParse(payload);
-  if (!parsed.success) {
-    return NextResponse.json({ error: parsed.error.flatten() }, { status: 400 });
+  const validation = validateBody(actionSchema, rawBody);
+  if (isValidationFailure(validation)) {
+    return validation.response;
  }

+  const parsed = validation.data;
+
  try {
    const status =
-      parsed.data.action === "enable"
-        ? await startCloudflaredTunnel()
-        : await stopCloudflaredTunnel();
+      parsed.action === "enable" ? await startCloudflaredTunnel() : await stopCloudflaredTunnel();

    return NextResponse.json({
      success: true,
-      action: parsed.data.action,
+      action: parsed.action,
      status,
    });
  } catch (error) {
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "جارٍ تحميل لوحة تحكم MCP...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Зареждане на таблото за управление на MCP...",
@@ -1062,7 +1062,8 @@
    "a2aQuickStartStep2": "Odešlete požadavky JSON-RPC na `POST /a2a` pomocí `message/send` nebo `message/stream`.",
    "a2aQuickStartStep3": "Sledujte a ovládejte úkoly pomocí příkazů `tasks/get` a `tasks/cancel`.",
    "completionsLegacy": "Completions (Zastaralé)",
-    "completionsLegacyDesc": "Zastaralé OpenAI text completion – akceptuje oba formáty, prompt string i messages array."
+    "completionsLegacyDesc": "Zastaralé OpenAI text completion – akceptuje oba formáty, prompt string i messages array.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "endpoints": {
    "tabProxy": "Koncová Proxy",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Indlæser MCP-dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "MCP-Dashboard wird geladen...",
@@ -1712,6 +1712,17 @@
    "cacheMisses": "Cache Misses",
    "hitRate": "Hit Rate",
    "cacheEntries": "Cache Entries",
+    "cacheSettings": "Cache Settings",
+    "semanticCache": "Semantic Cache",
+    "maxEntries": "Max Entries",
+    "ttlMinutes": "TTL (minutes)",
+    "promptCache": "Prompt Cache",
+    "strategy": "Strategy",
+    "preserveClientCache": "Preserve Client Cache",
+    "enabled": "Enabled",
+    "loading": "Loading...",
+    "saving": "Saving...",
+    "save": "Save",
    "circuitBreaker": "Circuit Breaker",
    "retryPolicy": "Retry Policy",
    "maxRetries": "Max Retries",
@@ -2920,6 +2931,30 @@
    "clearSuccess": "Cache cleared. {count} expired entries removed.",
    "clearError": "Failed to clear cache.",
    "unavailable": "Cache unavailable",
-    "unavailableDesc": "Could not fetch cache statistics. Make sure the server is running."
+    "unavailableDesc": "Could not fetch cache statistics. Make sure the server is running.",
+    "promptCache": "Prompt Cache (Provider-Side)",
+    "cachedRequests": "Cached Requests",
+    "cacheHitRate": "Cache Hit Rate",
+    "cachedTokens": "Cached Tokens",
+    "cacheCreationTokens": "Cache Creation Tokens",
+    "byProvider": "Breakdown by Provider",
+    "provider": "Provider",
+    "requests": "Requests",
+    "inputTokens": "Input Tokens",
+    "cachedTokensCol": "Cached",
+    "cacheCreation": "Creation",
+    "trend24h": "Cache Trend (24h)",
+    "cached": "Cached",
+    "overview": "Overview",
+    "entries": "Entries",
+    "searchEntries": "Search entries...",
+    "search": "Search",
+    "loading": "Loading...",
+    "noEntries": "No cache entries found",
+    "signature": "Signature",
+    "model": "Model",
+    "created": "Created",
+    "expires": "Expires",
+    "actions": "Actions"
  }
 }
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Crea un Quick Tunnel temporal de Cloudflare. La URL cambia después de cada reinicio."
  },
  "mcpDashboard": {
    "loading": "Cargando el panel de MCP...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Ladataan MCP-hallintapaneelia...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Chargement du tableau de bord MCP...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -919,7 +919,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1062,7 +1062,8 @@
    "a2aQuickStartStep2": "`message/send` या `message/stream` का उपयोग करके JSON-RPC अनुरोधों को `POST /a2a` पर भेजें।",
    "a2aQuickStartStep3": "`कार्य/प्राप्त करें` और `कार्य/रद्द करें` का उपयोग करके कार्यों को ट्रैक और नियंत्रित करें।",
    "completionsLegacy": "पूर्णताएँ (विरासत)",
-    "completionsLegacyDesc": "लीगेसी ओपनएआई टेक्स्ट पूर्णताएँ - शीघ्र स्ट्रिंग और संदेश सरणी प्रारूप दोनों को स्वीकार करती हैं"
+    "completionsLegacyDesc": "लीगेसी ओपनएआई टेक्स्ट पूर्णताएँ - शीघ्र स्ट्रिंग और संदेश सरणी प्रारूप दोनों को स्वीकार करती हैं",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "endpoints": {
    "tabProxy": "समापन बिंदु प्रॉक्सी",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Laster inn MCP-dashbordet ...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Nilo-load ang MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1023,7 +1023,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Cria um Quick Tunnel temporário do Cloudflare. A URL muda a cada reinício."
  },
  "mcpDashboard": {
    "loading": "Carregando painel MCP...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "endpoints": {
    "tabProxy": "Endpoint Proxy",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1060,7 +1060,8 @@
    "a2aQuickStartStep2": "JSON-RPC isteklerini `message/send` veya `message/stream` kullanarak `POST /a2a` adresine gönderin.",
    "a2aQuickStartStep3": "Görevleri `tasks/get` ve `tasks/cancel` ile izleyin ve yönetin.",
    "completionsLegacy": "Tamamlamalar (Eski)",
-    "completionsLegacyDesc": "Eski OpenAI metin tamamlamaları — hem bilgi istemi dizesini hem de mesaj dizisi biçimini kabul eder"
+    "completionsLegacyDesc": "Eski OpenAI metin tamamlamaları — hem bilgi istemi dizesini hem de mesaj dizisi biçimini kabul eder",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "endpoints": {
    "tabProxy": "Uç Nokta Proxy",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -1011,7 +1011,8 @@
    "webSearch": "Web Search",
    "webSearchDesc": "Unified web search across multiple providers with automatic failover and caching",
    "searchProvider": "Search Provider",
-    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected."
+    "searchProviderDesc": "This provider is used for web search via POST /v1/search. No model configuration needed — search providers are ready to use once an API key is connected.",
+    "cloudflaredUrlNotice": "Creates a temporary Cloudflare Quick Tunnel. The URL changes after every restart."
  },
  "mcpDashboard": {
    "loading": "Loading MCP dashboard...",
@@ -13,6 +13,18 @@ const CLOUDFLARED_RELEASE_BASE =
  "https://github.com/cloudflare/cloudflared/releases/latest/download";
 const START_TIMEOUT_MS = 30000;
 const STOP_TIMEOUT_MS = 5000;
+const GENERIC_EXIT_ERROR_PREFIX = "cloudflared exited";
+const DEFAULT_CERT_FILE_CANDIDATES = [
+  "/etc/ssl/certs/ca-certificates.crt",
+  "/etc/pki/tls/certs/ca-bundle.crt",
+  "/etc/ssl/cert.pem",
+  "/private/etc/ssl/cert.pem",
+] as const;
+const DEFAULT_CERT_DIR_CANDIDATES = [
+  "/etc/ssl/certs",
+  "/etc/pki/tls/certs",
+  "/system/etc/security/cacerts",
+] as const;

 type CloudflaredInstallSource = "managed" | "path" | "env";
 type TunnelPhase = "unsupported" | "not_installed" | "stopped" | "starting" | "running" | "error";
@@ -238,9 +250,55 @@ export function extractTryCloudflareUrl(text: string) {
  return match ? match[0] : null;
 }

+function normalizeCloudflaredLogLine(line: string) {
+  return line
+    .trim()
+    .replace(/^\d{4}-\d{2}-\d{2}T\S+\s+(?:INF|WRN|ERR)\s+/i, "")
+    .trim();
+}
+
+export function extractCloudflaredErrorMessage(text: string) {
+  const lines = String(text || "")
+    .split(/\r?\n/)
+    .map(normalizeCloudflaredLogLine)
+    .filter(Boolean);
+
+  for (let i = lines.length - 1; i >= 0; i--) {
+    if (/(?:\berror\b|\bfailed\b|\btls:\b|\bx509\b|\bcertificate\b)/i.test(lines[i])) {
+      return lines[i];
+    }
+  }
+
+  return null;
+}
+
+function isSpecificCloudflaredError(error: string | null | undefined) {
+  return !!error && !error.startsWith(GENERIC_EXIT_ERROR_PREFIX);
+}
+
+function getGenericExitError(code: number | null, signal: NodeJS.Signals | null) {
+  return `cloudflared exited unexpectedly (${code ?? "signal"}${signal ? `/${signal}` : ""})`;
+}
+
+export function getDefaultCloudflaredCertEnv(
+  existsSync: (candidate: string) => boolean = fsSync.existsSync,
+  certFileCandidates: readonly string[] = DEFAULT_CERT_FILE_CANDIDATES,
+  certDirCandidates: readonly string[] = DEFAULT_CERT_DIR_CANDIDATES
+) {
+  const certEnv: NodeJS.ProcessEnv = {};
+  const certFile = certFileCandidates.find((candidate) => existsSync(candidate));
+  const certDir = certDirCandidates.find((candidate) => existsSync(candidate));
+
+  if (certFile) certEnv.SSL_CERT_FILE = certFile;
+  if (certDir) certEnv.SSL_CERT_DIR = certDir;
+
+  return certEnv;
+}
+
 export function buildCloudflaredChildEnv(
  sourceEnv: NodeJS.ProcessEnv = process.env,
-  runtimeDirs: CloudflaredRuntimeDirs = getCloudflaredRuntimeDirs()
+  runtimeDirs: CloudflaredRuntimeDirs = getCloudflaredRuntimeDirs(),
+  defaultCertEnv: NodeJS.ProcessEnv = getDefaultCloudflaredCertEnv()
 ): NodeJS.ProcessEnv {
  const childEnv: NodeJS.ProcessEnv = {};

@@ -262,6 +320,12 @@ export function buildCloudflaredChildEnv(
  if (!childEnv.TMPDIR) childEnv.TMPDIR = runtimeDirs.tempDir;
  if (!childEnv.TMP) childEnv.TMP = runtimeDirs.tempDir;
  if (!childEnv.TEMP) childEnv.TEMP = runtimeDirs.tempDir;
+  if (!childEnv.SSL_CERT_FILE && defaultCertEnv.SSL_CERT_FILE) {
+    childEnv.SSL_CERT_FILE = defaultCertEnv.SSL_CERT_FILE;
+  }
+  if (!childEnv.SSL_CERT_DIR && defaultCertEnv.SSL_CERT_DIR) {
+    childEnv.SSL_CERT_DIR = defaultCertEnv.SSL_CERT_DIR;
+  }

  return childEnv;
 }
@@ -447,7 +511,9 @@ async function finalizeProcessExit(code: number | null, signal: NodeJS.Signals |
  const lastError =
    code === 0 || signal === "SIGTERM" || signal === "SIGINT"
      ? null
-      : `cloudflared exited unexpectedly (${code ?? "signal"}${signal ? `/${signal}` : ""})`;
+      : isSpecificCloudflaredError(currentState.lastError)
+        ? currentState.lastError
+        : getGenericExitError(code, signal);

  tunnelProcess = null;
  tunnelPid = null;
@@ -562,14 +628,10 @@ export async function startCloudflaredTunnel(): Promise<CloudflaredTunnelStatus>
      startedAt: new Date().toISOString(),
    });

-    const child = spawn(
-      binary.binaryPath as string,
-      getCloudflaredStartArgs(targetUrl),
-      {
-        stdio: ["ignore", "pipe", "pipe"],
-        env: buildCloudflaredChildEnv(),
-      }
-    );
+    const child = spawn(binary.binaryPath as string, getCloudflaredStartArgs(targetUrl), {
+      stdio: ["ignore", "pipe", "pipe"],
+      env: buildCloudflaredChildEnv(),
+    });

    tunnelProcess = child;
    tunnelPid = child.pid ?? null;
@@ -597,6 +659,14 @@ export async function startCloudflaredTunnel(): Promise<CloudflaredTunnelStatus>
        if (!text) return;

        await appendTunnelLog(source, text);
+        const errorMessage = source === "stderr" ? extractCloudflaredErrorMessage(text) : null;
+        if (errorMessage) {
+          await updateStateFile({
+            pid: child.pid,
+            status: "error",
+            lastError: errorMessage,
+          });
+        }
        const url = extractTryCloudflareUrl(text);
        if (!url) return;

@@ -643,11 +713,18 @@ export async function startCloudflaredTunnel(): Promise<CloudflaredTunnelStatus>
  try {
    return await startPromise;
  } catch (error) {
+    const currentState = await readStateFile();
+    const message = isSpecificCloudflaredError(currentState.lastError)
+      ? currentState.lastError
+      : error instanceof Error
+        ? error.message
+        : "Failed to start cloudflared tunnel";
+
    await updateStateFile({
      status: "error",
-      lastError: error instanceof Error ? error.message : "Failed to start cloudflared tunnel",
+      lastError: message,
    });
-    throw error;
+    throw new Error(message);
  } finally {
    startPromise = null;
  }
@@ -4,6 +4,10 @@ function asRecord(value: unknown): JsonRecord {
  return value && typeof value === "object" && !Array.isArray(value) ? (value as JsonRecord) : {};
 }

+function joinNonEmpty(parts: string[]) {
+  return parts.filter(Boolean).join("\n").trim();
+}
+
 function extractTextFromContent(content: unknown): string {
  if (typeof content === "string") return content.trim();

@@ -28,12 +32,85 @@ function extractTextFromContent(content: unknown): string {
    .trim();
 }

+function extractReasoningText(record: JsonRecord): string {
+  const reasoningDetails = Array.isArray(record.reasoning_details) ? record.reasoning_details : [];
+  const detailText = reasoningDetails
+    .map((detail) => {
+      const detailRecord = asRecord(detail);
+      const detailType = typeof detailRecord.type === "string" ? detailRecord.type : "";
+      const text =
+        typeof detailRecord.text === "string"
+          ? detailRecord.text.trim()
+          : typeof detailRecord.content === "string"
+            ? detailRecord.content.trim()
+            : "";
+
+      if (
+        text &&
+        (detailType === "" ||
+          detailType === "reasoning" ||
+          detailType === "reasoning.text" ||
+          detailType === "thinking")
+      ) {
+        return text;
+      }
+
+      return "";
+    })
+    .filter(Boolean);
+
+  return joinNonEmpty([
+    typeof record.reasoning_content === "string" ? record.reasoning_content.trim() : "",
+    typeof record.reasoning === "string" ? record.reasoning.trim() : "",
+    typeof record.reasoning_text === "string" ? record.reasoning_text.trim() : "",
+    joinNonEmpty(detailText),
+  ]);
+}
+
+function getUsageReasoningTokens(body: JsonRecord): number {
+  const usage = asRecord(body.usage);
+  if (!usage) return 0;
+
+  const completionDetails = asRecord(usage.completion_tokens_details);
+  const topLevelReasoning =
+    typeof usage.reasoning_tokens === "number" && Number.isFinite(usage.reasoning_tokens)
+      ? usage.reasoning_tokens
+      : 0;
+  const detailedReasoning =
+    typeof completionDetails.reasoning_tokens === "number" &&
+    Number.isFinite(completionDetails.reasoning_tokens)
+      ? completionDetails.reasoning_tokens
+      : 0;
+
+  return Math.max(topLevelReasoning, detailedReasoning);
+}
+
+function hasReasoningOnlyCompletion(body: JsonRecord): boolean {
+  if (!Array.isArray(body.choices) || body.choices.length === 0) return false;
+  if (getUsageReasoningTokens(body) <= 0) return false;
+
+  return body.choices.some((choice) => {
+    const choiceRecord = asRecord(choice);
+    const message = asRecord(choiceRecord.message);
+    const finishReason =
+      typeof choiceRecord.finish_reason === "string" ? choiceRecord.finish_reason : "";
+
+    if (!message || message.role !== "assistant") return false;
+    if (!finishReason) return false;
+    if (extractTextFromContent(message.content)) return false;
+    if (extractReasoningText(message)) return false;
+    return true;
+  });
+}
+
 export function buildComboTestRequestBody(modelStr: string) {
  return {
    model: modelStr,
    messages: [{ role: "user", content: "Reply with OK only." }],
-    // Keep this close to a real client request without inflating cost.
-    max_tokens: 16,
+    // Give reasoning-heavy models enough headroom to emit a tiny visible answer
+    // without turning the smoke test into a full-cost real request.
+    max_tokens: 64,
+    temperature: 0,
    stream: false,
  };
 }
@@ -52,6 +129,9 @@ export function extractComboTestResponseText(responseBody: unknown): string {
      const messageText = extractTextFromContent(message.content);
      if (messageText) return messageText;

+      const reasoningText = extractReasoningText(message);
+      if (reasoningText) return reasoningText;
+
      if (typeof choiceRecord.text === "string" && choiceRecord.text.trim()) {
        return choiceRecord.text.trim();
      }
@@ -63,8 +143,21 @@ export function extractComboTestResponseText(responseBody: unknown): string {
      const itemRecord = asRecord(item);
      const contentText = extractTextFromContent(itemRecord.content);
      if (contentText) return contentText;
+
+      const reasoningText = extractReasoningText(itemRecord);
+      if (reasoningText) return reasoningText;
    }
  }

-  return extractTextFromContent(body.content);
+  const topLevelText = extractTextFromContent(body.content);
+  if (topLevelText) return topLevelText;
+
+  const topLevelReasoning = extractReasoningText(body);
+  if (topLevelReasoning) return topLevelReasoning;
+
+  if (hasReasoningOnlyCompletion(body)) {
+    return "[reasoning-only completion]";
+  }
+
+  return "";
 }
@@ -0,0 +1,15 @@
+-- Migration 012: Fix tokens_input to include cache tokens
+--
+-- Problem: Historical data stored tokens_input as just the base input_tokens
+-- from the API, not including cache_read and cache_creation tokens.
+--
+-- Per Claude API docs:
+-- Total input tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens
+--
+-- This migration corrects historical records by adding cache tokens to tokens_input.
+-- Only affects records where cache tokens exist.
+
+-- Update tokens_input to include cache tokens
+UPDATE usage_history
+SET tokens_input = tokens_input + tokens_cache_read + tokens_cache_creation
+WHERE tokens_cache_read > 0 OR tokens_cache_creation > 0;
@@ -577,9 +577,14 @@ export async function getCacheMetrics() {
      cacheCreationTokens: number | null;
    }>;

-    // Calculate tokens saved (cached tokens are reused, not charged at full price)
    const tokensSaved = totalsRow?.totalCachedTokens || 0;

+    const AVG_INPUT_PRICE_PER_MILLION = 3;
+    const CACHE_DISCOUNT = 0.9;
+    const estimatedCostSaved =
+      Math.round((tokensSaved / 1_000_000) * AVG_INPUT_PRICE_PER_MILLION * CACHE_DISCOUNT * 100) /
+      100;
+
    // Build byProvider object
    const byProvider: Record<
      string,
@@ -653,6 +658,58 @@ export async function updateCacheMetrics(_metrics: Record<string, unknown>) {
  return getCacheMetrics();
 }

+export interface CacheTrendPoint {
+  timestamp: string;
+  requests: number;
+  cachedRequests: number;
+  inputTokens: number;
+  cachedTokens: number;
+  cacheCreationTokens: number;
+}
+
+export async function getCacheTrend(hours = 24): Promise<CacheTrendPoint[]> {
+  const db = getDbInstance();
+
+  try {
+    const rows = db
+      .prepare(
+        `
+        SELECT
+          strftime('%Y-%m-%dT%H:00:00Z', timestamp) as hour,
+          COUNT(*) as requests,
+          SUM(CASE WHEN tokens_cache_read > 0 OR tokens_cache_creation > 0 THEN 1 ELSE 0 END) as cachedRequests,
+          SUM(tokens_input) as inputTokens,
+          SUM(tokens_cache_read) as cachedTokens,
+          SUM(tokens_cache_creation) as cacheCreationTokens
+        FROM usage_history
+        WHERE timestamp >= datetime('now', ?)
+        GROUP BY hour
+        ORDER BY hour ASC
+      `
+      )
+      .all(`-${hours} hours`) as Array<{
+      hour: string;
+      requests: number;
+      cachedRequests: number;
+      inputTokens: number | null;
+      cachedTokens: number | null;
+      cacheCreationTokens: number | null;
+    }>;
+
+    return rows.map((r) => ({
+      timestamp: r.hour,
+      requests: r.requests,
+      cachedRequests: r.cachedRequests,
+      inputTokens: r.inputTokens || 0,
+      cachedTokens: r.cachedTokens || 0,
+      cacheCreationTokens: r.cacheCreationTokens || 0,
+    }));
+  } catch (error) {
+    console.error("Failed to fetch cache trend:", error);
+    return [];
+  }
+}
+
 export async function resetCacheMetrics() {
  // No-op: cannot delete historical usage data
  // Cache metrics are computed from usage_history, so they reflect actual request history
@@ -0,0 +1 @@
+export { analyzePrefix, shouldInjectCacheControl } from "./prefixAnalyzer";
@@ -0,0 +1,77 @@
+import crypto from "crypto";
+
+interface Message {
+  role: string;
+  content: string | unknown[];
+}
+
+interface PrefixAnalysis {
+  prefixEndIdx: number;
+  prefixHash: string;
+  prefixTokens: number;
+  prefixType: "system_only" | "system_and_tools" | "system_tools_history";
+  confidence: number;
+}
+
+function normalizeContent(content: string | unknown[]): string {
+  if (typeof content === "string") return content;
+  return JSON.stringify(content);
+}
+
+function estimateTokens(text: string): number {
+  return Math.ceil(text.length / 4);
+}
+
+export function analyzePrefix(messages: Message[]): PrefixAnalysis {
+  if (!Array.isArray(messages) || messages.length === 0) {
+    return {
+      prefixEndIdx: -1,
+      prefixHash: "",
+      prefixTokens: 0,
+      prefixType: "system_only",
+      confidence: 0,
+    };
+  }
+
+  let prefixEndIdx = -1;
+  let prefixType: PrefixAnalysis["prefixType"] = "system_only";
+  let confidence = 0.5;
+
+  for (let i = 0; i < messages.length; i++) {
+    const msg = messages[i];
+    const role = msg.role || "user";
+
+    if (role === "system") {
+      prefixEndIdx = i;
+      prefixType = "system_only";
+      confidence = 0.9;
+    } else if (role === "tool" || (role === "assistant" && Array.isArray(msg.content))) {
+      prefixEndIdx = i;
+      prefixType = "system_and_tools";
+      confidence = 0.8;
+    } else if (role === "assistant") {
+      prefixEndIdx = i;
+      prefixType = "system_tools_history";
+      confidence = 0.7;
+    } else {
+      break;
+    }
+  }
+
+  const prefixMessages = messages.slice(0, prefixEndIdx + 1);
+  const prefixText = prefixMessages.map((m) => normalizeContent(m.content)).join("\n");
+  const prefixHash = crypto.createHash("sha256").update(prefixText).digest("hex");
+  const prefixTokens = estimateTokens(prefixText);
+
+  return {
+    prefixEndIdx,
+    prefixHash,
+    prefixTokens,
+    prefixType,
+    confidence,
+  };
+}
+
+export function shouldInjectCacheControl(analysis: PrefixAnalysis, minTokens = 1024): boolean {
+  return analysis.prefixTokens >= minTokens && analysis.confidence >= 0.7;
+}
@@ -29,6 +29,45 @@ function toNumber(value: unknown, fallback = 0): number {
  return fallback;
 }

+function ensureCacheMetricsTable() {
+  try {
+    const db = getDbInstance();
+    db.prepare(
+      `CREATE TABLE IF NOT EXISTS cache_metrics (
+        key TEXT PRIMARY KEY,
+        value INTEGER NOT NULL DEFAULT 0,
+        updated_at TEXT NOT NULL DEFAULT (datetime('now'))
+      )`
+    ).run();
+    db.prepare(
+      `INSERT OR IGNORE INTO cache_metrics (key, value) VALUES ('hits', 0), ('misses', 0), ('tokens_saved', 0)`
+    ).run();
+  } catch {
+    // DB not available
+  }
+}
+
+function incrementMetric(metric: "hits" | "misses" | "tokens_saved", amount = 1) {
+  try {
+    const db = getDbInstance();
+    db.prepare(
+      `UPDATE cache_metrics SET value = value + ?, updated_at = datetime('now') WHERE key = ?`
+    ).run(amount, metric);
+  } catch {
+    // DB not available — fall back to in-memory
+  }
+}
+
+function getMetricValue(metric: string): number {
+  try {
+    const db = getDbInstance();
+    const row = db.prepare(`SELECT value FROM cache_metrics WHERE key = ?`).get(metric);
+    return row ? toNumber(asRecord(row).value, 0) : 0;
+  } catch {
+    return 0;
+  }
+}
+
 function getHeaderValue(
  headers: { get?: (name: string) => string | null } | Record<string, unknown> | null | undefined,
  name: string
@@ -51,7 +90,6 @@ function getHeaderValue(
 // ─── Singleton ─────────────────

 let memoryCache: LRUCache | null = null;
-let stats = { hits: 0, misses: 0, tokensSaved: 0 };

 function getMemoryCache() {
  if (!memoryCache) {
@@ -60,6 +98,7 @@ function getMemoryCache() {
      maxBytes: parseInt(process.env.SEMANTIC_CACHE_MAX_BYTES || String(4 * 1024 * 1024), 10),
      defaultTTL: parseInt(process.env.SEMANTIC_CACHE_TTL_MS || "1800000", 10),
    });
+    ensureCacheMetricsTable();
  }
  return memoryCache;
 }
@@ -108,8 +147,8 @@ export function getCachedResponse(signature) {
  // 1. Check memory cache
  const memResult = getMemoryCache().get(signature);
  if (memResult) {
-    stats.hits++;
-    stats.tokensSaved += memResult.tokensSaved || 0;
+    incrementMetric("hits");
+    incrementMetric("tokens_saved", memResult.tokensSaved || 0);
    return memResult.response;
  }

@@ -126,7 +165,7 @@ export function getCachedResponse(signature) {
      const record = asRecord(row);
      const responsePayload = typeof record.response === "string" ? record.response : null;
      if (!responsePayload) {
-        stats.misses++;
+        incrementMetric("misses");
        return null;
      }
      const parsed = JSON.parse(responsePayload);
@@ -141,15 +180,15 @@ export function getCachedResponse(signature) {
        signature
      );

-      stats.hits++;
-      stats.tokensSaved += tokensSaved;
+      incrementMetric("hits");
+      incrementMetric("tokens_saved", tokensSaved);
      return parsed;
    }
  } catch {
    // DB not available — fail open
  }

-  stats.misses++;
+  incrementMetric("misses");
  return null;
 }

@@ -280,6 +319,17 @@ export function stopAutoCleanup(): void {
  }
 }

+export function cleanOldMetrics(retentionDays = 90): number {
+  try {
+    const db = getDbInstance();
+    const cutoff = new Date(Date.now() - retentionDays * 86400000).toISOString();
+    const result = db.prepare("DELETE FROM semantic_cache WHERE created_at < ?").run(cutoff);
+    return result.changes || 0;
+  } catch {
+    return 0;
+  }
+}
+
 /**
 * Clear all cache entries.
 */
@@ -288,17 +338,12 @@ export function clearCache() {
  try {
    const db = getDbInstance();
    db.prepare("DELETE FROM semantic_cache").run();
+    db.prepare("UPDATE cache_metrics SET value = 0").run();
  } catch {
    // DB not available
  }
-  stats = { hits: 0, misses: 0, tokensSaved: 0 };
 }

-// ─── Stats ─────────────────
-
-/**
- * Get cache statistics.
- */
 export function getCacheStats() {
  const memStats = getMemoryCache().getStats();
  let dbSize = 0;
@@ -312,14 +357,18 @@ export function getCacheStats() {
    // DB not available
  }

-  const total = stats.hits + stats.misses;
+  const hits = getMetricValue("hits");
+  const misses = getMetricValue("misses");
+  const tokensSaved = getMetricValue("tokens_saved");
+  const total = hits + misses;
+
  return {
    memoryEntries: memStats.size,
    dbEntries: dbSize,
-    hits: stats.hits,
-    misses: stats.misses,
-    hitRate: total > 0 ? ((stats.hits / total) * 100).toFixed(1) : "0.0",
-    tokensSaved: stats.tokensSaved,
+    hits,
+    misses,
+    hitRate: total > 0 ? ((hits / total) * 100).toFixed(1) : "0.0",
+    tokensSaved,
  };
 }

@@ -52,19 +52,9 @@ export function getLoggedInputTokens(tokens: unknown): number {
    return toFiniteNumber(tokenRecord.input_tokens);
  }

+  // prompt_tokens from translator already includes input + cache_read + cache_creation
+  // Do NOT subtract cached tokens - we want the total billable prompt tokens
  const promptTokens = toFiniteNumber(tokenRecord.prompt_tokens);
-  if (promptTokens <= 0) return 0;
-
-  const promptDetails = getPromptTokenDetails(tokenRecord);
-  const cachedFromDetails = toFiniteNumber(promptDetails.cached_tokens);
-  if (cachedFromDetails > 0) {
-    return Math.max(promptTokens - cachedFromDetails, 0);
-  }
-
-  if ("cached_tokens" in tokenRecord && !("cache_read_input_tokens" in tokenRecord)) {
-    return Math.max(promptTokens - toFiniteNumber(tokenRecord.cached_tokens), 0);
-  }
-
  return promptTokens;
 }

@@ -73,7 +63,17 @@ export function getLoggedOutputTokens(tokens: unknown): number {
  if (tokenRecord.output !== undefined && tokenRecord.output !== null) {
    return toFiniteNumber(tokenRecord.output);
  }
-  return toFiniteNumber(
-    tokenRecord.completion_tokens ?? tokenRecord.output_tokens
-  );
+  return toFiniteNumber(tokenRecord.completion_tokens ?? tokenRecord.output_tokens);
+}
+
+export function formatUsageLog(tokens: unknown): string {
+  const input = getLoggedInputTokens(tokens);
+  const output = getLoggedOutputTokens(tokens);
+  const cacheRead = getPromptCacheReadTokens(tokens);
+
+  let msg = `in=${input} | out=${output}`;
+  if (cacheRead > 0) {
+    msg += ` | CR=${cacheRead}`;
+  }
+  return msg;
 }
@@ -656,6 +656,7 @@ export const ID_TO_ALIAS = Object.values(AI_PROVIDERS).reduce((acc, p) => {
 // Providers that support usage/quota API
 export const USAGE_SUPPORTED_PROVIDERS = [
  "antigravity",
+  "gemini-cli",
  "kiro",
  "github",
  "codex",
@@ -4,14 +4,6 @@ type OpenCodeConfigInput = {
  model?: string;
 };

-type OpenCodeProviderConfig = {
-  name: string;
-  api: "openai";
-  baseURL: string;
-  apiKey: string;
-  models: string[];
-};
-
 const OPENCODE_DEFAULT_MODELS = [
  "claude-opus-4-5-thinking",
  "claude-sonnet-4-5-thinking",
@@ -28,7 +20,7 @@ export const buildOpenCodeProviderConfig = ({
  baseUrl,
  apiKey,
  model,
-}: OpenCodeConfigInput): OpenCodeProviderConfig => {
+}: OpenCodeConfigInput): Record<string, any> => {
  const normalizedBaseUrl = String(baseUrl || "")
    .trim()
    .replace(/\/+$/, "");
@@ -36,12 +28,21 @@ export const buildOpenCodeProviderConfig = ({

  const uniqueModels = [...new Set([normalizedModel, ...OPENCODE_DEFAULT_MODELS].filter(Boolean))];

+  const modelsRecord: Record<string, { name: string }> = {};
+  for (const m of uniqueModels) {
+    if (m) {
+      modelsRecord[m] = { name: m };
+    }
+  }
+
  return {
+    npm: "@ai-sdk/openai-compatible",
    name: "OmniRoute",
-    api: "openai",
-    baseURL: normalizedBaseUrl,
-    apiKey: apiKey || "sk_omniroute",
-    models: uniqueModels,
+    options: {
+      baseURL: normalizedBaseUrl,
+      apiKey: apiKey || "sk_omniroute",
+    },
+    models: modelsRecord,
  };
 };

@@ -270,9 +270,17 @@ describe("Page Integration — usage page wiring", () => {
 describe("Page Integration — settings page wiring", () => {
  const src = readProjectFile("src/app/(dashboard)/dashboard/settings/page.tsx");

-  it("should include resilience and cache cards in tabs", () => {
+  it("should include resilience tab in advanced settings", () => {
    assert.ok(src, "src/app/(dashboard)/dashboard/settings/page.tsx should exist");
    assert.match(src, /ResilienceTab/);
+  });
+});
+
+describe("Page Integration — cache page wiring", () => {
+  const src = readProjectFile("src/app/(dashboard)/dashboard/cache/page.tsx");
+
+  it("should include cache stats card for prompt cache metrics", () => {
+    assert.ok(src, "src/app/(dashboard)/dashboard/cache/page.tsx should exist");
    assert.match(src, /CacheStatsCard/);
  });
 });
@@ -3,7 +3,9 @@ import assert from "node:assert/strict";

 import {
  buildCloudflaredChildEnv,
+  extractCloudflaredErrorMessage,
  extractTryCloudflareUrl,
+  getDefaultCloudflaredCertEnv,
  getCloudflaredStartArgs,
  getCloudflaredAssetSpec,
 } from "../../src/lib/cloudflaredTunnel.ts";
@@ -20,6 +22,17 @@ test("extractTryCloudflareUrl returns null when no tunnel URL is present", () =>
  assert.equal(extractTryCloudflareUrl("cloudflared starting without assigned URL"), null);
 });

+test("extractCloudflaredErrorMessage keeps the actionable stderr line", () => {
+  const error = extractCloudflaredErrorMessage(
+    '2026-03-30T19:56:12Z INF Requesting new quick Tunnel on trycloudflare.com...\n2026-03-30T19:56:12Z ERR failed to request quick Tunnel: Post "https://api.trycloudflare.com/tunnel": tls: failed to verify certificate: x509: certificate signed by unknown authority'
+  );
+
+  assert.equal(
+    error,
+    'failed to request quick Tunnel: Post "https://api.trycloudflare.com/tunnel": tls: failed to verify certificate: x509: certificate signed by unknown authority'
+  );
+});
+
 test("getCloudflaredAssetSpec resolves linux amd64 binary", () => {
  const spec = getCloudflaredAssetSpec("linux", "x64");

@@ -49,22 +62,26 @@ test("getCloudflaredAssetSpec returns null for unsupported platforms", () => {
 });

 test("buildCloudflaredChildEnv keeps runtime essentials, isolates runtime dirs, and drops secrets", () => {
-  const env = buildCloudflaredChildEnv({
-    PATH: "/usr/bin",
-    HTTPS_PROXY: "http://proxy.internal:8080",
-    JWT_SECRET: "top-secret",
-    API_KEY_SECRET: "another-secret",
-  }, {
-    runtimeRoot: "/managed/runtime",
-    homeDir: "/managed/runtime/home",
-    configDir: "/managed/runtime/config",
-    cacheDir: "/managed/runtime/cache",
-    dataDir: "/managed/runtime/data",
-    tempDir: "/managed/runtime/tmp",
-    userProfileDir: "/managed/runtime/userprofile",
-    appDataDir: "/managed/runtime/userprofile/AppData/Roaming",
-    localAppDataDir: "/managed/runtime/userprofile/AppData/Local",
-  });
+  const env = buildCloudflaredChildEnv(
+    {
+      PATH: "/usr/bin",
+      HTTPS_PROXY: "http://proxy.internal:8080",
+      JWT_SECRET: "top-secret",
+      API_KEY_SECRET: "another-secret",
+    },
+    {
+      runtimeRoot: "/managed/runtime",
+      homeDir: "/managed/runtime/home",
+      configDir: "/managed/runtime/config",
+      cacheDir: "/managed/runtime/cache",
+      dataDir: "/managed/runtime/data",
+      tempDir: "/managed/runtime/tmp",
+      userProfileDir: "/managed/runtime/userprofile",
+      appDataDir: "/managed/runtime/userprofile/AppData/Roaming",
+      localAppDataDir: "/managed/runtime/userprofile/AppData/Local",
+    },
+    {}
+  );

  assert.deepEqual(env, {
    PATH: "/usr/bin",
@@ -82,6 +99,41 @@ test("buildCloudflaredChildEnv keeps runtime essentials, isolates runtime dirs,
  });
 });

+test("getDefaultCloudflaredCertEnv detects common CA bundle paths", () => {
+  const env = getDefaultCloudflaredCertEnv((candidate) =>
+    ["/etc/ssl/certs/ca-certificates.crt", "/etc/ssl/certs"].includes(candidate)
+  );
+
+  assert.deepEqual(env, {
+    SSL_CERT_FILE: "/etc/ssl/certs/ca-certificates.crt",
+    SSL_CERT_DIR: "/etc/ssl/certs",
+  });
+});
+
+test("buildCloudflaredChildEnv injects discovered CA paths when the parent env omits them", () => {
+  const env = buildCloudflaredChildEnv(
+    { PATH: "/usr/bin" },
+    {
+      runtimeRoot: "/managed/runtime",
+      homeDir: "/managed/runtime/home",
+      configDir: "/managed/runtime/config",
+      cacheDir: "/managed/runtime/cache",
+      dataDir: "/managed/runtime/data",
+      tempDir: "/managed/runtime/tmp",
+      userProfileDir: "/managed/runtime/userprofile",
+      appDataDir: "/managed/runtime/userprofile/AppData/Roaming",
+      localAppDataDir: "/managed/runtime/userprofile/AppData/Local",
+    },
+    {
+      SSL_CERT_FILE: "/etc/ssl/certs/ca-certificates.crt",
+      SSL_CERT_DIR: "/etc/ssl/certs",
+    }
+  );
+
+  assert.equal(env.SSL_CERT_FILE, "/etc/ssl/certs/ca-certificates.crt");
+  assert.equal(env.SSL_CERT_DIR, "/etc/ssl/certs");
+});
+
 test("getCloudflaredStartArgs relies on cloudflared protocol auto-negotiation", () => {
  assert.deepEqual(getCloudflaredStartArgs("http://127.0.0.1:20128"), [
    "tunnel",
@@ -9,7 +9,8 @@ test("combo test helper builds a realistic smoke payload", () => {

  assert.equal(body.model, "openrouter/openai/gpt-5.4");
  assert.equal(body.messages[0].content, "Reply with OK only.");
-  assert.equal(body.max_tokens, 16);
+  assert.equal(body.max_tokens, 64);
+  assert.equal(body.temperature, 0);
  assert.equal(body.stream, false);
 });

@@ -46,6 +47,62 @@ test("combo test helper extracts text from block-based responses", () => {
  assert.equal(text, "OK\nConfirmed.");
 });

+test("combo test helper extracts reasoning content when visible text is absent", () => {
+  const text = extractComboTestResponseText({
+    choices: [
+      {
+        message: {
+          role: "assistant",
+          content: null,
+          reasoning_content: "Working through the request.\nOK",
+        },
+      },
+    ],
+  });
+
+  assert.equal(text, "Working through the request.\nOK");
+});
+
+test("combo test helper extracts reasoning_text aliases from GitHub-style responses", () => {
+  const text = extractComboTestResponseText({
+    choices: [
+      {
+        message: {
+          role: "assistant",
+          content: "",
+          reasoning_text: "Reasoning trace",
+        },
+      },
+    ],
+  });
+
+  assert.equal(text, "Reasoning trace");
+});
+
+test("combo test helper treats reasoning-only completions as a healthy signal", () => {
+  const text = extractComboTestResponseText({
+    choices: [
+      {
+        finish_reason: "length",
+        message: {
+          role: "assistant",
+          content: "",
+        },
+      },
+    ],
+    usage: {
+      prompt_tokens: 6,
+      completion_tokens: 12,
+      total_tokens: 18,
+      completion_tokens_details: {
+        reasoning_tokens: 12,
+      },
+    },
+  });
+
+  assert.equal(text, "[reasoning-only completion]");
+});
+
 test("combo test helper returns empty string when no text content exists", () => {
  const text = extractComboTestResponseText({
    choices: [
@@ -86,6 +86,8 @@ test("combo test route marks a model healthy only when it returns assistant text
  assert.match(fetchCalls[0].init.headers["X-Request-Id"], /^combo-test-/);
  assert.equal(forwardedBody.model, "openrouter/openai/gpt-5.4");
  assert.equal(forwardedBody.messages[0].content, "Reply with OK only.");
+  assert.equal(forwardedBody.max_tokens, 64);
+  assert.equal(forwardedBody.temperature, 0);
  assert.equal(body.resolvedBy, "openrouter/openai/gpt-5.4");
  assert.equal(body.results[0].status, "ok");
  assert.equal(body.results[0].responseText, "OK");
@@ -122,6 +124,45 @@ test("combo test route treats empty successful responses as failures", async ()
  assert.match(body.results[0].error, /no text content/i);
 });

+test("combo test route accepts reasoning-only completions as healthy smoke-test responses", async () => {
+  await createTestCombo();
+
+  globalThis.fetch = async () =>
+    new Response(
+      JSON.stringify({
+        choices: [
+          {
+            finish_reason: "length",
+            message: {
+              role: "assistant",
+              content: "",
+            },
+          },
+        ],
+        usage: {
+          prompt_tokens: 6,
+          completion_tokens: 12,
+          total_tokens: 18,
+          completion_tokens_details: {
+            reasoning_tokens: 12,
+          },
+        },
+      }),
+      {
+        status: 200,
+        headers: { "content-type": "application/json" },
+      }
+    );
+
+  const response = await route.POST(makeRequest());
+  const body = await response.json();
+
+  assert.equal(response.status, 200);
+  assert.equal(body.resolvedBy, "openrouter/openai/gpt-5.4");
+  assert.equal(body.results[0].status, "ok");
+  assert.equal(body.results[0].responseText, "[reasoning-only completion]");
+});
+
 test("combo test route surfaces provider errors instead of downgrading them to reachability", async () => {
  await createTestCombo();

@@ -148,3 +189,67 @@ test("combo test route surfaces provider errors instead of downgrading them to r
  assert.equal(body.results[0].error, "Upstream rejected this request shape");
  assert.equal("probeMethod" in body.results[0], false);
 });
+
+test("combo test route launches model probes concurrently while preserving combo order", async () => {
+  await createTestCombo(["provider/first", "provider/second", "provider/third"]);
+
+  const fetchCalls = [];
+  const resolvers = [];
+  globalThis.fetch = (url, init = {}) =>
+    new Promise((resolve) => {
+      fetchCalls.push({ url: String(url), init });
+      resolvers.push(resolve);
+    });
+
+  const responsePromise = route.POST(makeRequest());
+  await new Promise((resolve) => setTimeout(resolve, 0));
+
+  assert.equal(fetchCalls.length, 3);
+  assert.deepEqual(
+    fetchCalls.map(({ init }) => JSON.parse(init.body).model),
+    ["provider/first", "provider/second", "provider/third"]
+  );
+
+  resolvers[2](
+    new Response(
+      JSON.stringify({
+        choices: [{ message: { role: "assistant", content: "THIRD" } }],
+      }),
+      { status: 200, headers: { "content-type": "application/json" } }
+    )
+  );
+  resolvers[1](
+    new Response(
+      JSON.stringify({
+        choices: [{ message: { role: "assistant", content: "SECOND" } }],
+      }),
+      { status: 200, headers: { "content-type": "application/json" } }
+    )
+  );
+  resolvers[0](
+    new Response(
+      JSON.stringify({
+        choices: [{ message: { role: "assistant", content: "FIRST" } }],
+      }),
+      { status: 200, headers: { "content-type": "application/json" } }
+    )
+  );
+
+  const response = await responsePromise;
+  const body = await response.json();
+
+  assert.equal(response.status, 200);
+  assert.equal(body.resolvedBy, "provider/first");
+  assert.deepEqual(
+    body.results.map((result) => ({
+      model: result.model,
+      status: result.status,
+      responseText: result.responseText,
+    })),
+    [
+      { model: "provider/first", status: "ok", responseText: "FIRST" },
+      { model: "provider/second", status: "ok", responseText: "SECOND" },
+      { model: "provider/third", status: "ok", responseText: "THIRD" },
+    ]
+  );
+});
@@ -49,9 +49,9 @@ test("T40: OpenCode config generator includes endpoint and selected API key", ()
    apiKey: "sk_test_opencode",
    model: "claude-sonnet-4-5-thinking",
  });
-  assert.equal(providerConfig.baseURL, "http://localhost:20128/v1");
-  assert.equal(providerConfig.apiKey, "sk_test_opencode");
-  assert.ok(providerConfig.models.includes("claude-sonnet-4-5-thinking"));
+  assert.equal(providerConfig.options.baseURL, "http://localhost:20128/v1");
+  assert.equal(providerConfig.options.apiKey, "sk_test_opencode");
+  assert.ok(providerConfig.models["claude-sonnet-4-5-thinking"]);

  const mergedConfig = mergeOpenCodeConfig(
    { provider: { custom: { name: "Custom Provider" } } },
@@ -62,8 +62,8 @@ test("T40: OpenCode config generator includes endpoint and selected API key", ()
    }
  );
  assert.ok(mergedConfig.provider.custom);
-  assert.equal(mergedConfig.provider.omniroute.baseURL, "http://localhost:20128/v1");
-  assert.equal(mergedConfig.provider.omniroute.apiKey, "sk_test_opencode");
+  assert.equal(mergedConfig.provider.omniroute.options.baseURL, "http://localhost:20128/v1");
+  assert.equal(mergedConfig.provider.omniroute.options.apiKey, "sk_test_opencode");
 });

 test("T40: Windsurf card documents current official limitations honestly", () => {
Author	SHA1	Message	Date
diegosouzapw	ab4914ee6a	chore(release): v3.3.7 — OpenCode config fix, i18n keys fix Build Electron Desktop App / Validate version (push) Failing after 31s Details Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped Details Build Electron Desktop App / Build Electron (linux) (push) Has been skipped Details Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped Details Build Electron Desktop App / Build Electron (windows) (push) Has been skipped Details Build Electron Desktop App / Create Release (push) Has been skipped Details Build Electron Desktop App / Publish to npm (push) Has been skipped Details	2026-03-30 19:30:18 -03:00
diegosouzapw	e7c73c76dd	chore(release): bump version to v3.3.7	2026-03-30 19:28:20 -03:00
diegosouzapw	3591a3fe5c	fix: resolve opencode json structure to use record mapping instead of array (#816 )	2026-03-30 19:23:25 -03:00
diegosouzapw	fbdce049b2	fix: add missing cloudflaredUrlNotice i18n keys (#823 )	2026-03-30 19:23:14 -03:00
diegosouzapw	9a8520a2de	fix: add missing cloudflaredUrlNotice i18n keys to prevent MISSING_MESSAGE console errors (#823 )	2026-03-30 19:16:50 -03:00
diegosouzapw	0b2c488a61	chore(release): bump version to v3.3.6 Build Electron Desktop App / Validate version (push) Failing after 30s Details Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped Details Build Electron Desktop App / Build Electron (linux) (push) Has been skipped Details Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped Details Build Electron Desktop App / Build Electron (windows) (push) Has been skipped Details Build Electron Desktop App / Create Release (push) Has been skipped Details Build Electron Desktop App / Publish to npm (push) Has been skipped Details	2026-03-30 18:24:15 -03:00
diegosouzapw	76e135077b	Resolve merge conflicts with main natively built Prompt Cache UI	2026-03-30 18:20:19 -03:00
Diego Rodrigues de Sa e Souza	6078cd2eab	Merge pull request #829 from rdself/coder/fix-cloudflared-startup Fix cloudflared quick tunnel startup in Docker	2026-03-30 18:18:03 -03:00
Diego Rodrigues de Sa e Souza	3482dade71	Merge pull request #828 from rdself/coder/fix-combo-test-false-negative Fix combo test false negatives and parallelize model probes	2026-03-30 18:18:00 -03:00
diegosouzapw	04d0c350db	build: sync monorepo package versions across electron and open-sse	2026-03-30 18:02:33 -03:00
R.D.	b6a5c91045	Install CA certificates in runtime image	2026-03-30 17:01:50 -04:00
diegosouzapw	7a37c79ebc	ci: fix pipeline errors and enforce route lint validatation	2026-03-30 17:54:44 -03:00
R.D.	ba227c5ec3	Run combo health probes concurrently	2026-03-30 16:49:01 -04:00
diegosouzapw	df23162e9d	chore(release): v3.3.5 - all changes in ONE commit Build Electron Desktop App / Validate version (push) Failing after 31s Details Build Electron Desktop App / Build Electron (macos-arm64) (push) Has been skipped Details Build Electron Desktop App / Build Electron (linux) (push) Has been skipped Details Build Electron Desktop App / Build Electron (macos-intel) (push) Has been skipped Details Build Electron Desktop App / Build Electron (windows) (push) Has been skipped Details Build Electron Desktop App / Create Release (push) Has been skipped Details Build Electron Desktop App / Publish to npm (push) Has been skipped Details	2026-03-30 17:35:51 -03:00
dependabot[bot]	2c12f18b44	deps: bump the production group with 8 updates Bumps the production group with 8 updates: \| Package \| From \| To \| \| --- \| --- \| --- \| \| [@lobehub/icons](https://github.com/lobehub/lobe-icons) \| `5.0.1` \| `5.2.0` \| \| [@modelcontextprotocol/sdk](https://github.com/modelcontextprotocol/typescript-sdk) \| `1.27.1` \| `1.29.0` \| \| [@swc/helpers](https://github.com/swc-project/swc/tree/HEAD/packages/helpers) \| `0.5.19` \| `0.5.20` \| \| [jose](https://github.com/panva/jose) \| `6.2.1` \| `6.2.2` \| \| [next](https://github.com/vercel/next.js) \| `16.1.7` \| `16.2.1` \| \| [recharts](https://github.com/recharts/recharts) \| `3.8.0` \| `3.8.1` \| \| [undici](https://github.com/nodejs/undici) \| `7.24.4` \| `7.24.6` \| \| [wreq-js](https://github.com/sqdshguy/wreq-js) \| `2.2.0` \| `2.2.2` \| Updates `@lobehub/icons` from 5.0.1 to 5.2.0 - [Release notes](https://github.com/lobehub/lobe-icons/releases) - [Changelog](https://github.com/lobehub/lobe-icons/blob/master/CHANGELOG.md) - [Commits](https://github.com/lobehub/lobe-icons/compare/v5.0.1...v5.2.0) Updates `@modelcontextprotocol/sdk` from 1.27.1 to 1.29.0 - [Release notes](https://github.com/modelcontextprotocol/typescript-sdk/releases) - [Commits](https://github.com/modelcontextprotocol/typescript-sdk/compare/v1.27.1...v1.29.0) Updates `@swc/helpers` from 0.5.19 to 0.5.20 - [Release notes](https://github.com/swc-project/swc/releases) - [Changelog](https://github.com/swc-project/swc/blob/main/CHANGELOG-CORE.md) - [Commits](https://github.com/swc-project/swc/commits/HEAD/packages/helpers) Updates `jose` from 6.2.1 to 6.2.2 - [Release notes](https://github.com/panva/jose/releases) - [Changelog](https://github.com/panva/jose/blob/main/CHANGELOG.md) - [Commits](https://github.com/panva/jose/compare/v6.2.1...v6.2.2) Updates `next` from 16.1.7 to 16.2.1 - [Release notes](https://github.com/vercel/next.js/releases) - [Changelog](https://github.com/vercel/next.js/blob/canary/release.js) - [Commits](https://github.com/vercel/next.js/compare/v16.1.7...v16.2.1) Updates `recharts` from 3.8.0 to 3.8.1 - [Release notes](https://github.com/recharts/recharts/releases) - [Changelog](https://github.com/recharts/recharts/blob/main/CHANGELOG.md) - [Commits](https://github.com/recharts/recharts/compare/v3.8.0...v3.8.1) Updates `undici` from 7.24.4 to 7.24.6 - [Release notes](https://github.com/nodejs/undici/releases) - [Commits](https://github.com/nodejs/undici/compare/v7.24.4...v7.24.6) Updates `wreq-js` from 2.2.0 to 2.2.2 - [Release notes](https://github.com/sqdshguy/wreq-js/releases) - [Commits](https://github.com/sqdshguy/wreq-js/compare/v2.2.0...v2.2.2) --- updated-dependencies: - dependency-name: "@lobehub/icons" dependency-version: 5.2.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: production - dependency-name: "@modelcontextprotocol/sdk" dependency-version: 1.29.0 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: production - dependency-name: "@swc/helpers" dependency-version: 0.5.20 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production - dependency-name: jose dependency-version: 6.2.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production - dependency-name: next dependency-version: 16.2.1 dependency-type: direct:production update-type: version-update:semver-minor dependency-group: production - dependency-name: recharts dependency-version: 3.8.1 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production - dependency-name: undici dependency-version: 7.24.6 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production - dependency-name: wreq-js dependency-version: 2.2.2 dependency-type: direct:production update-type: version-update:semver-patch dependency-group: production ... Signed-off-by: dependabot[bot] <support@github.com>	2026-03-30 17:32:55 -03:00
dependabot[bot]	eaeb28b4e1	deps: bump the development group with 7 updates Bumps the development group with 7 updates: \| Package \| From \| To \| \| --- \| --- \| --- \| \| [@tailwindcss/postcss](https://github.com/tailwindlabs/tailwindcss/tree/HEAD/packages/@tailwindcss-postcss) \| `4.2.1` \| `4.2.2` \| \| [@types/keytar](https://github.com/DefinitelyTyped/DefinitelyTyped/tree/HEAD/types/keytar) \| `4.4.0` \| `4.4.2` \| \| [eslint-config-next](https://github.com/vercel/next.js/tree/HEAD/packages/eslint-config-next) \| `16.1.6` \| `16.2.1` \| \| [tailwindcss](https://github.com/tailwindlabs/tailwindcss/tree/HEAD/packages/tailwindcss) \| `4.2.1` \| `4.2.2` \| \| [typescript](https://github.com/microsoft/TypeScript) \| `5.9.3` \| `6.0.2` \| \| [typescript-eslint](https://github.com/typescript-eslint/typescript-eslint/tree/HEAD/packages/typescript-eslint) \| `8.57.1` \| `8.58.0` \| \| [vitest](https://github.com/vitest-dev/vitest/tree/HEAD/packages/vitest) \| `4.1.0` \| `4.1.2` \| Updates `@tailwindcss/postcss` from 4.2.1 to 4.2.2 - [Release notes](https://github.com/tailwindlabs/tailwindcss/releases) - [Changelog](https://github.com/tailwindlabs/tailwindcss/blob/main/CHANGELOG.md) - [Commits](https://github.com/tailwindlabs/tailwindcss/commits/v4.2.2/packages/@tailwindcss-postcss) Updates `@types/keytar` from 4.4.0 to 4.4.2 - [Release notes](https://github.com/DefinitelyTyped/DefinitelyTyped/releases) - [Commits](https://github.com/DefinitelyTyped/DefinitelyTyped/commits/HEAD/types/keytar) Updates `eslint-config-next` from 16.1.6 to 16.2.1 - [Release notes](https://github.com/vercel/next.js/releases) - [Changelog](https://github.com/vercel/next.js/blob/canary/release.js) - [Commits](https://github.com/vercel/next.js/commits/v16.2.1/packages/eslint-config-next) Updates `tailwindcss` from 4.2.1 to 4.2.2 - [Release notes](https://github.com/tailwindlabs/tailwindcss/releases) - [Changelog](https://github.com/tailwindlabs/tailwindcss/blob/main/CHANGELOG.md) - [Commits](https://github.com/tailwindlabs/tailwindcss/commits/v4.2.2/packages/tailwindcss) Updates `typescript` from 5.9.3 to 6.0.2 - [Release notes](https://github.com/microsoft/TypeScript/releases) - [Commits](https://github.com/microsoft/TypeScript/compare/v5.9.3...v6.0.2) Updates `typescript-eslint` from 8.57.1 to 8.58.0 - [Release notes](https://github.com/typescript-eslint/typescript-eslint/releases) - [Changelog](https://github.com/typescript-eslint/typescript-eslint/blob/main/packages/typescript-eslint/CHANGELOG.md) - [Commits](https://github.com/typescript-eslint/typescript-eslint/commits/v8.58.0/packages/typescript-eslint) Updates `vitest` from 4.1.0 to 4.1.2 - [Release notes](https://github.com/vitest-dev/vitest/releases) - [Commits](https://github.com/vitest-dev/vitest/commits/v4.1.2/packages/vitest) --- updated-dependencies: - dependency-name: "@tailwindcss/postcss" dependency-version: 4.2.2 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: development - dependency-name: "@types/keytar" dependency-version: 4.4.2 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: development - dependency-name: eslint-config-next dependency-version: 16.2.1 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: development - dependency-name: tailwindcss dependency-version: 4.2.2 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: development - dependency-name: typescript dependency-version: 6.0.2 dependency-type: direct:development update-type: version-update:semver-major dependency-group: development - dependency-name: typescript-eslint dependency-version: 8.58.0 dependency-type: direct:development update-type: version-update:semver-minor dependency-group: development - dependency-name: vitest dependency-version: 4.1.2 dependency-type: direct:development update-type: version-update:semver-patch dependency-group: development ... Signed-off-by: dependabot[bot] <support@github.com>	2026-03-30 17:32:51 -03:00
Chris Staley	d5647eab33	fix: remove dead userDismissed ref after auto-open removal The userDismissed ref was only read by the removed auto-open useEffect. Remove the ref declaration and the three onClose assignments that set it.	2026-03-30 17:32:49 -03:00
Chris Staley	89eb8885b1	fix: remove unnecessary comment from previous commit	2026-03-30 17:32:49 -03:00
Chris Staley	a5dc5687f8	fix: remove auto-opening OAuth/API key modal on provider detail page Auto-opening the "Add Connection" dialog when navigating to a provider with zero connections was a poor UX pattern. It surprised users who were simply browsing provider details (e.g. after deleting a connection or checking settings). The page already displays a clear empty state with an "Add Connection" button — users should click it when ready.	2026-03-30 17:32:49 -03:00
oyi77	6780485051	feat(cache): persistent metrics, cache entry browser, settings UI, MCP tools, prefix analyzer Implements remaining features from #813: Phase 1 - Persistent Metrics: - Add cache_metrics table for persistent hit/miss tracking - Semantic cache stats now survive server restarts Phase 2 - Cache Entry Browser: - /api/cache/entries endpoint with search, pagination, delete - CacheEntriesTab component for browsing cached entries Phase 3 - Settings UI: - CacheSettingsTab for semantic/prompt cache configuration - /api/settings/cache-config endpoint Phase 4 - Prefix Analyzer: - src/lib/promptCache/prefixAnalyzer.ts for intelligent caching - Analyzes message arrays to find stable prefixes Phase 5 - Provider Support: - Added deepseek to CACHING_PROVIDERS Phase 6 - MCP Tools: - omniroute_cache_stats tool - omniroute_cache_flush tool Phase 7 - Retention: - cleanOldMetrics() for auto-purge of old entries Closes #813	2026-03-30 17:32:45 -03:00
oyi77	d043e7a242	feat(cache): fix cache page to display prompt cache metrics and trend data Closes #813	2026-03-30 17:32:45 -03:00
Chris Staley	c5d9b5f51d	fix: apply PR review feedback for Gemini CLI quota - Add early return guard for missing accessToken in getGeminiUsage - Add 10s fetch timeout (AbortSignal.timeout) on retrieveUserQuota calls - Clamp used value with Math.max(0, ...) for non-negative display - Use full accessToken as cache key instead of truncated prefix - Replace catch(err: any) with instanceof Error check in models route	2026-03-30 17:32:42 -03:00
Chris Staley	35e2892b98	feat: add real Gemini CLI quota tracking via retrieveUserQuota API Replace stub getGeminiUsage() with per-model quota fetching from Google Cloud Code Assist's retrieveUserQuota endpoint (same API the official Gemini CLI /stats command uses). Fixes OAuth env var name, aligns model list with official Gemini CLI VALID_GEMINI_MODELS, and makes "Import from /models" discover new models via the quota endpoint.	2026-03-30 17:32:42 -03:00
R.D.	b492c5ac1a	Fix cloudflared startup TLS handling	2026-03-30 16:31:07 -04:00
R.D.	03a860dd6f	Fix combo smoke tests for reasoning responses	2026-03-30 16:23:53 -04:00
diegosouzapw	11dfdbb7a3	feat(analytics): add diversity score card UI and diversity API route Implement DiversityScoreCard component to fetch and display provider diversity score with loading state and conditional styling, integrate it into AnalyticsPage overview, and add a new API route at src/app/api/analytics/diversity/route.ts to return the diversity report using getDiversityReport	2026-03-30 16:37:49 -03:00
tombii	007b5d7f50	fix(test): split CacheStatsCard check into cache page test Integration test was failing because CacheStatsCard was moved from settings page to cache page in previous commit. Split the test into two separate describe blocks for accurate page-specific verification. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 19:49:57 +02:00
tombii	c6eadc504b	fix(usage): include cache tokens in input token counts - Fix getLoggedInputTokens to return full prompt_tokens (input + cache_read + cache_creation) - Fix usageExtractor for non-streaming Claude responses to calculate total correctly - Add formatUsageLog helper to show CR=<cache_read> in logs - Add migration 012 to fix historical token counts in usage_history - Move prompt cache metrics from Settings to /dashboard/cache page Per Claude API docs: Total input tokens = input_tokens + cache_creation_input_tokens + cache_read_input_tokens Fixes issue where totalInputTokens (71k) was less than totalCacheCreationTokens (1.35M). Tested: - All 1134 unit tests pass - Cache metrics API returns correct totals - Migration is idempotent and tracked in _omniroute_migrations - Logs show cache read tokens: 'in=6055 \| out=211 \| CR=22399' Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>	2026-03-30 19:24:26 +02:00
				`@@ -0,0 +1 @@`
				`export { analyzePrefix, shouldInjectCacheControl } from "./prefixAnalyzer";`